Python image_to_data Beispiele, pytesseract.pytesseract.image_to_data Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: ocr_text_utilities.py Projekt: srihari-nagaraj/anuvaad

def ocr(crop_image, config, left, top):
    if config:
        temp_df = pytesseract.image_to_data(crop_image,
                                            config,
                                            lang=LANG_MAPPING[lang][0],
                                            output_type=Output.DATAFRAME)
    else:
        temp_df = pytesseract.image_to_data(crop_image,
                                            lang=LANG_MAPPING[lang][0],
                                            output_type=Output.DATAFRAME)
    temp_df = temp_df[temp_df.text.notnull()]
    text = ""
    coord = []

    for index, row in temp_df.iterrows():
        word_coord = {}
        temp_text = str(row["text"])
        temp_conf = row["conf"]
        text = text + " " + str(temp_text)
        word_coord['text'] = str(temp_text)
        word_coord['conf'] = temp_conf
        word_coord['text_left'] = int(row["left"] + left)
        word_coord['text_top'] = int(row["top"] + top)
        word_coord['text_width'] = int(row["width"])
        word_coord['text_height'] = int(row["height"])
        coord.append(word_coord)
    return coord, text

Beispiel #2

0

Datei anzeigen

Datei: text_processing.py Projekt: nruengener/button-pusher

def find_target(boxes, image, label):
    """ Detect the box with the given label """
    for box in boxes:
        roi = image[box[1]:box[1] + box[3], box[0]:box[0] + box[2]]
        if DEBUG:
            cv2.imshow('orig ' + str(j), roi)

        start = time.time()

        # preprocess region of interest
        img_preprocessed = image_processing.process_image(roi)

        # find tokens for OCR, tokens are in format x1, y1, x2 ,y2
        tokens = image_processing.find_tokens(img_preprocessed)
        if TRACE:
            print("#tokens: ", len(tokens))
        tokens = filter_tokens(tokens, img_preprocessed)
        if TRACE:
            print("#filtered: ", len(tokens))

        end = time.time()
        print("time taken for tokens including preprocessing: ", end - start)

        if TRACE:
            for t in tokens:
                cv2.rectangle(roi, (t[0], t[1]), (t[2], t[3]), (255, 200, 50), 2)
                global j
                cv2.imshow('filtered token ' + str(j), roi)
                j = j + 1

        text = ""
        for (x1, y1, x2, y2) in tokens:
            tesseract_img = roi[y1:y2, x1:x2]
            tesseract_img = image_processing.resize_to_width(tesseract_img, 300, False)
            tesseract_img = cv2.cvtColor(tesseract_img, cv2.COLOR_BGR2GRAY)
            ret, tesseract_img = cv2.threshold(tesseract_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

            start = time.time()
            data = pytesseract.image_to_data(tesseract_img,
                                             config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890E",
                                             output_type=Output.DICT)  # DICT
            end = time.time()
            print("tesseract time taken for one token: ", end - start)

            if TRACE:
                print("data['conf']: ", data['conf'])

            # add if confidence > 60
            if data['conf'][len(data['conf']) - 1] > 60:
                text = text + data['text'][len(data['conf']) - 1]

        if DEBUG:
            print("button label: ", text)

        # return first box with searched label
        if label == text or (label == '0' and text == 'E') :
            return box

    return None

Beispiel #3

0

Datei anzeigen

Datei: ocr.py Projekt: eagle-sb/anuvaad

def ocr(crop_image, configs, left, top, language):
    if configs:
        #temp_df = pytesseract.image_to_data(crop_image,config='--psm 7', lang=LANG_MAPPING[language][0],output_type=Output.DATAFRAME)
        temp_df = pytesseract.image_to_data(
            crop_image,
            lang=language,
            config='--psm 7',
            output_type=Output.DATAFRAME)  #config='--psm 7',
    else:
        #temp_df = pytesseract.image_to_data(crop_image, lang= LANG_MAPPING[language][0],output_type=Output.DATAFRAME)
        temp_df = pytesseract.image_to_data(crop_image,
                                            lang=language,
                                            output_type=Output.DATAFRAME)
    temp_df = temp_df[temp_df.text.notnull()]
    text = ""
    coord = []
    #print("kkkkkkkkkkkkkkkkkkkkkkkkkkkkk",temp_df)
    for index, row in temp_df.iterrows():
        temp_dict = {}
        vert = []
        temp_dict['identifier'] = str(uuid.uuid4())
        vert.append({'x': int(row["left"] + left), 'y': row["top"] + top})
        vert.append({
            'x': int(row["left"] + left) + int(row["width"]),
            'y': row["top"] + top
        })
        vert.append({
            'x': int(row["left"] + left) + int(row["width"]),
            'y': row["top"] + top + int(row["height"])
        })
        vert.append({
            'x': int(row["left"] + left),
            'y': row["top"] + top + int(row["height"])
        })
        #temp_dict['text'] = str(row["text"])

        temp_dict['text'] = process_text(row['text'])
        temp_dict['conf'] = row["conf"]
        temp_dict['boundingBox'] = {}
        temp_dict['boundingBox']["vertices"] = vert
        if text == '':
            text = temp_dict['text']
        else:
            text = text + " " + temp_dict['text']
        coord.append(temp_dict)
    return coord, text

Beispiel #4

0

Datei anzeigen

def low_conf_ocr(lang,left,top,width,height,image):
    crop_image = image.crop((left-5, top-7, left+width+5, top+height+7))
    crop_image.save("/home/naresh/check/"+str(uuid.uuid4())+'.jpg')
    temp_df_eng = pytesseract.image_to_data(crop_image,config='--psm 6', lang= LANG_MAPPING[lang][2],output_type=Output.DATAFRAME)
    temp_df_hin = pytesseract.image_to_data(crop_image, config='--psm 6',lang= LANG_MAPPING[lang][0],output_type=Output.DATAFRAME)
    eng_index = temp_df_eng['conf'].argmax()
    hin_index = temp_df_hin['conf'].argmax()
    eng_conf  = temp_df_eng['conf'][eng_index]
    hin_conf  = temp_df_hin['conf'][hin_index]
    if eng_conf>=hin_conf:
        conf = eng_conf
        text = temp_df_eng['text'][eng_index]
    else:
        conf = hin_conf
        text = temp_df_hin['text'][hin_index]


    return text, conf

Beispiel #5

0

Datei anzeigen

def home():
    prop = {}
    res = {}
    if request.method == 'POST':
        f = request.files['file']
        key = request.form.get('var')
        img = Image.open(f.stream)
        config = r'--oem 3 --psm 6'  ## works well
        if key == '1':  ## Aadhaar Rear side to get pincode,state,town and address
            img_rear = get_img1(img)
            d = pytesseract.image_to_data(img_rear,
                                          lang='eng',
                                          output_type=Output.DICT,
                                          config=config)
            res = get_result1(d)
            return jsonify(res)
        elif key == '2':
            name_front = get_name_img(img)
            img_front = get_img2(img)
            d = pytesseract.image_to_data(img_front,
                                          lang='eng',
                                          output_type=Output.DICT,
                                          config=config)
            res = {}
            res = get_result2(d)
            res['name'] = get_name2(d)
            return jsonify(res)
        elif key == '3':  #PAN CARD OCR
            res = panocr(img)
            return jsonify(res)
        elif key == '4':  #VOTERID OCR REAR
            data = Voter_IDREAR(img)
            return jsonify(data)
        elif key == '5':  # Voter ID front
            data = Voter_IDFRONT(img)
            return jsonify(data)

    return render_template('index.html')

Beispiel #6

0

Datei anzeigen

Datei: api.py Projekt: MichaelMueller/WheresTheFckReceipt

    def __process_image_file(self, c: sqlite3.Cursor, path, dir_id, doc_path,
                             page):

        if c.execute("select id from images where path = ?",
                     (path, )).fetchone() is not None:
            self.__add_message("Skipping already indexed file {}.".format(
                path.replace(self.path + "/", "")))
            return

        img = cv2.imread(path)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(img_gray, (9, 9), 0)
        img = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY, 11, 2)
        # img = 255 - thresh
        # cv2.imwrite(path.replace(self.path, "C:/Users/mueller/Desktop/output"), img)
        # img = remove_noise.process_image_for_ocr(path)
        if self.tesseract_exe:
            pytesseract.tesseract_cmd = self.tesseract_exe
        d = pytesseract.image_to_data(img, output_type=Output.DICT)

        c.execute("insert into 'images' (path, directory_id) values (?, ?)",
                  (path, dir_id))
        image_id = c.lastrowid
        for j in range(len(d["text"])):
            if not d['text'][j].strip():
                continue
            c.execute(
                "insert into 'texts' (text, left, top, width, height, image_id) values (?, ?, ?, ?, ?, ?)",
                (d['text'][j], d['left'][j], d['top'][j], d['width'][j],
                 d['height'][j], image_id))

        if doc_path is not None and page is not None:
            doc_id = c.execute("select id from documents where path = ?",
                               (doc_path, )).fetchone()
            if doc_id is None:
                c.execute(
                    "insert into documents (path, directory_id) values (?, ?)",
                    (doc_path, dir_id))
                doc_id = c.lastrowid
            else:
                doc_id = doc_id[0]
            c.execute(
                "update images set document_id = ?, doc_page = ? where id = ?",
                (doc_id, page, image_id))

Beispiel #7

0

Datei anzeigen

Datei: text_processing.py Projekt: nruengener/button-pusher

def recognize_text_in_boxes(boxes, image):
    boxes_ocr = []
    for box in boxes:
        roi = image[box[1]:box[1] + box[3], box[0]:box[0] + box[2]]

        # preprocess region of interest
        img_preprocessed = image_processing.process_image(roi)

        # find tokens for OCR
        tokens = image_processing.find_tokens(img_preprocessed)
        tokens = filter_tokens(tokens, img_preprocessed)
        print("# tokens for box ", box, ": ", len(tokens))
        text = ""
        i = 0
        for (x1, y1, x2, y2) in tokens:
            tesseract_img = roi[y1:y2, x1:x2]
            tesseract_img = image_processing.resize_to_width(tesseract_img, 300, False)
            tesseract_img = cv2.cvtColor(tesseract_img, cv2.COLOR_BGR2GRAY)
            ret, tesseract_img = cv2.threshold(tesseract_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            if TRACE:
                cv2.imshow('tesser img' + str(i), tesseract_img)
                i = i + 1

            data = pytesseract.image_to_data(tesseract_img,
                                             config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890Ee",
                                             output_type=Output.DICT)  # DICT

            # ocr_result = pytesseract.image_to_string(tesseract_img, lang='eng',
            #                         config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890Ee")  # -psm 10
            # print("ocr result: ", ocr_result)

            token_text = data['text']
            if data['conf'][len(data['conf']) - 1] > 0:
                text = text + data['text'][len(data['conf']) - 1]

            if TRACE:
                print("token_text: ", token_text)
                print("data['conf']: ", data['conf'])

        print("text read from box: ", text)
        boxes_ocr.append((box, text))

    return boxes_ocr

Beispiel #8

0

Datei anzeigen

Datei: OCR.py Projekt: ttyhu/Flask-OCR

    def scan_data(self):

        dict_data = pytesseract.image_to_data(Image.open(self.filename),
                                              output_type=Output.DICT)
        json_array = []

        for i in range(len(dict_data['text'])):

            # if is empty
            if len(dict_data['text'][i]) != 0:
                json_line = {
                    'text': dict_data['text'][i],
                    'left': dict_data['left'][i],
                    'top': dict_data['top'][i],
                    'width': dict_data['width'][i],
                    'height': dict_data['height'][i]
                }
                json_array.append(json_line)

        return json_array

Beispiel #9

0

Datei anzeigen

Datei: utils.py Projekt: wangrl2016/coding

def current_words_location(pid, words, output_dir='out'):
    """
    获取当前页面上文字的位置
    """
    # 1. 获取到手机截图
    photo_name = phone.get_page_photo(pid, output_dir)
    if photo_name is None:
        return None
    # 2. 对截图进行识别
    data = pytesseract.image_to_data(Image.open(
        os.path.join(output_dir, photo_name)),
                                     output_type=Output.DICT,
                                     lang='chi_sim')
    # 3. 截图信息对比
    for i in range(0, len(data['text'])):
        if data['text'][i].__eq__(words[0]):
            is_found = True
            for j, word in enumerate(words):
                # 保证数组不越界
                if i + j >= len(data['text']):
                    return None
                if not word.__eq__(data['text'][i + j]):
                    is_found = False
                    break
            if is_found:
                # 4. 返回处理结果
                os.remove(os.path.join(output_dir, photo_name))
                if data['width'][i] == 0 or data['height'][i] == 0:
                    return None
                return {
                    'x': data['left'][i],
                    'y': data['top'][i],
                    'w': data['width'][i],
                    'h': data['height'][i]
                }
    os.remove(os.path.join(output_dir, photo_name))
    return None

Beispiel #10

0

Datei anzeigen

Datei: word_bounding_boxes.py Projekt: CreepyD246/Simple-Text-Detection-Using-Python

import cv2
from pytesseract import pytesseract
from pytesseract import Output

pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"

img = cv2.imread("ur_mom.png")

image_data = pytesseract.image_to_data(img, output_type=Output.DICT)

print(image_data['left'])

# Printing each word
# for word in image_data['text']:
# 	print(word)

for i, word in enumerate(image_data['text']):
	if word != '':
		x,y,w,h = image_data['left'][i],image_data['top'][i],image_data['width'][i],image_data['height'][i]
		cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),3)
		cv2.putText(img,word,(x,y-16),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)

cv2.imshow("window", img)
cv2.waitKey(0)

Beispiel #11

0

Datei anzeigen

def img_to_series(img_name):
    data = pytesseract.image_to_data(Image.open(img_name, "r").convert("L"))
    save_file = open("data.tsv", "w")
    save_file.write(data)
    save_file.close()
    return pd.read_csv("data.tsv", sep="\t")

Beispiel #12

0

Datei anzeigen

Datei: text_detecting.py Projekt: TamTH-Dev/orders_detection

def detect_text_data(cropped_img):
    text_data = pytesseract.image_to_data(cropped_img,
                                          config=TESSDATA_CONFIG,
                                          output_type=Output.DICT)

    return text_data

Beispiel #13

0

Datei anzeigen

Datei: test.py Projekt: viditshah/Shipmnts

def MAWB(data,img):

	x =re.findall(r'\d{3}[-\s]\d{4}[\s]\d{4}[\s]' , data)
	y = re.findall(r'[:]\d{11}', data)
	z = re.findall('[0-9][0-9][0-9]-\d{8}',data)

	boxes = pt.image_to_data(img,output_type=Output.DICT) 
	out = pt.image_to_data(img)
	#print ("#####################**********8")

	if (len(x) != 0):
		return (x,boxes,out,data,img)
	elif (len(y) !=0):
		return (y,boxes,out,data,img)
	elif (len(z) != 0):
		return (z,boxes,out,data,img)
	else :	
		#print ("#####################**********8")
		(h, w) = img.shape[:2]
		#M = cv2.getRotationMatrix2D((h/2,w/2), 90, 1)
		#rotated90 = cv2.warpAffine(img, M, (h, w))
		rotated90 = ndimage.rotate(img, 90)
		#cv2.imwrite('90.jpg',rotated90)
		text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1')
		x =re.findall('[0-9][0-9][0-9]-\d{8}',text90)
		if (len(x)!=0):
			p#rint ("90")
			return (x,boxes,out,text90,rotated90)
		else:
			
			(h, w) = img.shape[:2]
			#M = cv2.getRotationMatrix2D((h/2,w/2), 180, 1)
			#rotated90 = cv2.warpAffine(img, M, (h, w))
			rotated90 = ndimage.rotate(img, 180)
			text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1')
			boxes = pt.image_to_data(rotated90,output_type=Output.DICT) 
			out = pt.image_to_data(rotated90)
			#cv2.imwrite('180.jpg',rotated90)
			x =re.findall('[0-9][0-9][0-9]-\d{8}',text90)
			if  (len(x) != 0):
			
				return (x,boxes,out,text90,rotated90)
			else:
				
				(h, w) = img.shape[:2]
				#M = cv2.getRotationMatrix2D((h/2,w/2), 270, 1)
				#rotated90 = cv2.warpAffine(img, M, (h, w))
				rotated90 = ndimage.rotate(img, 270)
				#cv2.imwrite('270.jpg',rotated90)
				text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1')
				boxes = pt.image_to_data(rotated90,output_type=Output.DICT) 
				out = pt.image_to_data(rotated90)
				x =re.findall('[0-9][0-9][0-9]-\d{8}',text90)
				if (len(x) !=0):
					
					return (x,boxes,out,text90,rotated90)
				else :
					
					(h, w) = img.shape[:2]
					#M = cv2.getRotationMatrix2D((h/2,w/2), 360, 1)
					#rotated90 = cv2.warpAffine(img, M, (h, w))
					rotated90 = ndimage.rotate(img, 360)
					#cv2.imwrite('360.jpg',rotated90)
					boxes = pt.image_to_data(rotated90,output_type=Output.DICT) 
					out = pt.image_to_data(rotated90)
					text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1')
					x =re.findall('[0-9][0-9][0-9]-\d{8}',text90)
					if (len(x) != 0):
						
						return (x,boxes,out,text90,rotated90)
					else :
						return (None)

Beispiel #14

0

Datei anzeigen

Datei: data_extraction.py Projekt: kalnar328/Diabipal-OCR

def data_output(temp):
    dict = pytesseract.image_to_data(temp, output_type=Output.DICT, config=custom_config)
    return dict

Beispiel #15

0

Datei anzeigen

import cv2
from pytesseract import pytesseract
from pytesseract import Output

pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"

foto = cv2.imread("nowaf.png")

foto_data = pytesseract.image_to_data(foto, output_type=Output.DICT)

for i, word in enumerate(foto_data['text']):
    if word != "":
        x, y, w, h = foto_data['left'][i], foto_data['top'][i], foto_data[
            'width'][i], foto_data['height'][i]
        cv2.rectangle(foto, (x, y), (x + w, y + h), (0, 255, 0), 3)
        cv2.putText(foto, word, (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 1,
                    (0, 0, 255), 2)

cv2.imshow("hasil", foto)
cv2.waitKey(0)

Beispiel #16

0

Datei anzeigen

Datei: axisdetection.py Projekt: CFLai1990/ObjDetection

def get_axes_texts(img, axis_entities):
    """The function for getting the texts in the axis"""
    try:
        data = []
        # No axes in the image
        if img is None or axis_entities is None:
            return data
        (img_height, img_width) = img.shape[:2]
        for axis_id, axis_entity in enumerate(axis_entities):
            axis_bbox = axis_entity.get("bbox")
            axis_direction = axis_entity.get("direction")
            axis_score = axis_entity.get("score")
            new_axis_bbox = axis_bbox
            if axis_bbox:
                axis_x = axis_bbox.get("x")
                axis_y = axis_bbox.get("y")
                axis_width = axis_bbox.get("width")
                axis_height = axis_bbox.get("height")
                if axis_x and axis_y and axis_width and axis_height:
                    if axis_x >= 0 and axis_y >= 0 and axis_width > 0 and axis_height > 0:
                        # Step 0: extend the axis image to avoid mistakes
                        if axis_direction == 0:
                            axis_x_extra = max(round(axis_width * 0.02), 6)
                            if axis_x >= axis_x_extra:
                                axis_x = axis_x - axis_x_extra
                                axis_width = axis_width + axis_x_extra
                            if axis_x + axis_width + axis_x_extra < img_width:
                                axis_width = axis_width + axis_x_extra
                        elif axis_direction == 90:
                            axis_y_extra = max(round(axis_height * 0.02), 6)
                            if axis_y >= axis_y_extra:
                                axis_y = axis_y - axis_y_extra
                                axis_height = axis_height + axis_y_extra
                            if axis_y + axis_height + axis_y_extra < img_height:
                                axis_height = axis_height + axis_y_extra
                        new_axis_bbox = {
                            "x": axis_x,
                            "y": axis_y,
                            "width": axis_width,
                            "height": axis_height
                        }
                        # Step 1: crop the axis image
                        axis_img = img[axis_y:(axis_y + axis_height),
                                       axis_x:(axis_x + axis_width)].copy()
                        # Step 2: enhance the contrast
                        axis_img_enhanced = contrast_enhance(axis_img)
                        # Step 3: partition the image
                        line_img, tick_img, title_img, ticks_bbox = partition_axis(axis_img_enhanced, \
                            axis_id, axis_direction)
                        tick_texts = None
                        title_texts = None
                        if tick_img is not None and isinstance(
                                tick_img, np.ndarray):
                            tick_img_pil = CV2PIL(tick_img)
                            if TESTING["axis"]["sign"]:
                                tick_img_pil.save(TESTING['dir'] + '/axis_' + str(axis_id) + \
                                '_test_tick.png')
                            tick_texts = pt.image_to_data(tick_img_pil,
                                                          config='--psm 6')
                            tick_texts = understand_data(tick_texts)
                        if title_img is not None and isinstance(
                                tick_img, np.ndarray):
                            title_img_pil = CV2PIL(title_img)
                            if TESTING["axis"]["sign"]:
                                title_img_pil.save(TESTING['dir'] + '/axis_' + str(axis_id) + \
                                    '_test_title.png')
                            title_texts = pt.image_to_string(title_img_pil,
                                                             config='--psm 6')
                        if tick_texts is not None:
                            img_shape = {
                                "width": img_width,
                                "height": img_height
                            }
                            formated_axis = get_format_axis(tick_texts, title_texts,\
                                ticks_bbox, img_shape,\
                                new_axis_bbox, axis_direction, axis_score)
                            data.append(formated_axis)
    except Exception as e:
        print(repr(e))
        traceback.print_exc()
    return data

Beispiel #17

0

Datei anzeigen

Datei: recognize_license_plate.py Projekt: Haydart/AutomaticLicensePlateRecognition

def read_text(image_path):
    print(pytesseract.image_to_data(Image.open(image_path)))

Beispiel #18

0

Datei anzeigen

Datei: ocr_text_utilities.py Projekt: dwtcourses/anuvaad

def extract_text_from_image(filepath, desired_width, desired_height, df, lang):
    image = Image.open(filepath)
    h_ratio = image.size[1] / desired_height
    w_ratio = image.size[0] / desired_width
    word_coord_lis = []
    text_list = []

    for index, row in df.iterrows():
        left = row['text_left'] * w_ratio
        top = row['text_top'] * h_ratio
        right = (row['text_left'] + row['text_width']) * w_ratio
        bottom = (row['text_top'] + row['text_height']) * h_ratio
        coord = []
        crop_image = image.crop((left - 5, top - 5, right + 5, bottom + 5))
        if row['text_height'] > 2 * row['font_size']:
            temp_df = pytesseract.image_to_data(crop_image,
                                                lang=LANG_MAPPING[lang] +
                                                "+eng",
                                                output_type=Output.DATAFRAME)
            temp_df = temp_df[temp_df.text.notnull()]

            text = ""
            for index2, row1 in temp_df.iterrows():
                word_coord = {}
                text = text + " " + str(row1["text"])
                word_coord['text'] = str(row1["text"])
                word_coord['conf'] = row1["conf"]
                word_coord['text_left'] = int(row1["left"] + left)
                word_coord['text_top'] = int(row1["top"] + top)
                word_coord['text_width'] = int(row1["width"])
                word_coord['text_height'] = int(row1["height"])
                coord.append(word_coord)

            word_coord_lis.append(coord)
            text_list.append(text)
        else:
            temp_df = pytesseract.image_to_data(crop_image,
                                                config='--psm 7',
                                                lang=LANG_MAPPING[lang] +
                                                "+eng",
                                                output_type=Output.DATAFRAME)
            temp_df = temp_df[temp_df.text.notnull()]
            text = ""

            for index2, row2 in temp_df.iterrows():
                word_coord = {}
                text = text + " " + str(row2["text"])
                word_coord['text'] = str(row2["text"])
                word_coord['conf'] = row2["conf"]
                word_coord['text_left'] = int(row2["left"] + left)
                word_coord['text_top'] = int(row2["top"] + top)
                word_coord['text_width'] = int(row2["width"])
                word_coord['text_height'] = int(row2["height"])
                coord.append(word_coord)

            word_coord_lis.append(coord)
            text_list.append(text)

    df['word_coords'] = word_coord_lis
    df['text'] = text_list
    return df

Beispiel #19

0

Datei anzeigen

def extract_text_from_image(filepath, desired_width, desired_height, df, lang):
    image   = Image.open(filepath)
    h_ratio = image.size[1]/desired_height
    w_ratio = image.size[0]/desired_width
    word_coord_lis = []
    text_list = []

    check ={"devnagari_text:":[],"original":[]}

    for index, row in df.iterrows():
        left   = row['text_left']*w_ratio
        top    = row['text_top']*h_ratio
        right  = (row['text_left'] + row['text_width'])*w_ratio
        bottom = (row['text_top'] + row['text_height'])*h_ratio
        coord  = []
        crop_image = image.crop((left-CROP_CONFIG[lang]['left'], top-CROP_CONFIG[lang]['top'], right+CROP_CONFIG[lang]['right'], bottom+CROP_CONFIG[lang]['bottom']))
        #crop_image.save(str(index) + '.jpg')
        if row['text_height']>2*row['font_size']:
            temp_df = pytesseract.image_to_data(crop_image, lang= LANG_MAPPING[lang][0],output_type=Output.DATAFRAME)

            temp_df = temp_df[temp_df.text.notnull()]

            text = ""
            for index2, row1 in temp_df.iterrows():
                word_coord = {}
                temp_text  = str(row1["text"])
                temp_conf  = row1["conf"]
                #if temp_conf<30:
                    #check["devnagari_text:"].append(str(temp_text))
                   # temp_text, temp_conf  = low_conf_ocr(lang,int(row1["left"]+left),int(row1["top"]+top),int(row1["width"]),int(row1["height"]),image)
                    #check["original"].append(str(temp_text))
                text = text +" "+ str(temp_text)
                word_coord['text']          = str(temp_text)
                word_coord['conf']          = temp_conf
                word_coord['text_left']     = int(row1["left"]+left)
                word_coord['text_top']      = int(row1["top"]+top)
                word_coord['text_width']    = int(row1["width"])
                word_coord['text_height']   = int(row1["height"])
                coord.append(word_coord)

            word_coord_lis.append(coord)
            text_list.append(text)
        else:
            temp_df = pytesseract.image_to_data(crop_image,config='--psm 7', lang=LANG_MAPPING[lang][0],output_type=Output.DATAFRAME)
            temp_df = temp_df[temp_df.text.notnull()]
            text = ""

            for index2, row2 in temp_df.iterrows():
                word_coord = {}
                temp_text  = str(row2["text"])
                temp_conf  = row2["conf"]
                #if temp_conf<30:
                    #check["devnagari_text:"].append(str(temp_text))
                    #temp_text, temp_conf  = low_conf_ocr(lang,int(row2["left"]+left),int(row2["top"]+top),int(row2["width"]),int(row2["height"]),image)
                    #check["original"].append(str(temp_text))
                text = text +" "+ str(temp_text)
                word_coord['text']          = str(temp_text)
                word_coord['conf']          = temp_conf
                word_coord['text_left']     = int(row2["left"]+left)
                word_coord['text_top']      = int(row2["top"]+top)
                word_coord['text_width']    = int(row2["width"])
                word_coord['text_height']   = int(row2["height"])
                coord.append(word_coord)

            word_coord_lis.append(coord)
            text_list.append(text)


    df['word_coords'] = word_coord_lis
    df['text']        = text_list
    return df

Beispiel #20

0

Datei anzeigen

point_table = point_in_table(table)

# places_intersection_points(point_table)  # Функция показывает точки пересечения

# show_cell_numbers()  # Функция показывает номера ячеек

# Отображает строку по номеру
xs = 0
for i in point_table:
    xs += 1
    print(xs)
    cv2.imwrite('out' + os.sep + 'cropped.png', show_row(point_table, xs))

    img = cv2.cvtColor(show_row(point_table, xs), cv2.COLOR_BGR2RGB)
    data = pytesseract.image_to_data(img, lang='eng', config=r'--oem 3 --psm 11')

    # Перебираем данные про текстовые надписи
    for i, el in enumerate(data.splitlines()):
        if i == 0:
            continue
        el = el.split()
        try:
            # Создаем подписи на картинке
            x, y, w, h = int(el[6]), int(el[7]), int(el[8]), int(el[9])
            # cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 1)
            print(x)
            print(y)
            cv2.putText(img, el[11], (x + 15, y + 7), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)
        except IndexError:
            print("Операция была пропущена")

Beispiel #21

0

Datei anzeigen

Datei: app.py Projekt: janithRS/Diabipal-OCR

def processURL():
    # get path from url
    url = request.args.get('url')
    str1 = url.split('files/')
    str2 = str1[0] + 'files%2F'
    str3 = str1[1].split('?alt')
    url1 = str2 + str3[0]

    # return a json object with image info from the 'url'
    r = requests.get(url1).json()
    token = r['downloadTokens']

    # complete path for the image in firebase storage
    path = url1 + '?alt=media&token=' + token

    IMAGE_SIZE = 1800
    BINARY_THRESHOLD = 180

    # custom configurations
    # custom_psm_config = r'--psm 4'
    language = 'eng'
    custom_config = r'--oem 1 --psm 6 -c preserve_interword_spaces=1'

    # return {"path": path}
    #
    # find column headers
    testNameColumn = ['description', 'test', 'test name']
    unitColumn = ['unit', 'units']
    rangeColumn = ['ref. range', 'reference value', 'reference range']
    resultColumn = ['result']
    flagColumn = ['flag']

    count = 0
    testName = ''
    unit = ''
    refRange = ''
    result = ''
    flag = ''

    x_name, x_flag, x_unit, x_range, x_result = 0, 0, 0, 0, 0
    y_name, y_flag, y_unit, y_range, y_result = 0, 0, 0, 0, 0

    testNameTableData = ''
    unitTableData = ''
    rangeTableData = ''
    resultTableData = ''
    flagTableData = ''

    # define the path to the image
    # path = r'E:\Reserach\Resources\Dataset\FBS\Glucose-Test-Results.jpg'
    # load the image from the specified location/directory using openCV
    # image = cv2.imread(path)
    response = requests.get(path)
    image_bytes = BytesIO(response.content)

    img = Image.open(image_bytes)
    img.show()

    # rescale image
    def set_image_dpi(file_path):
        im = Image.open(file_path)
        length_x, width_y = im.size
        factor = max(1, int(IMAGE_SIZE / length_x))
        size = factor * length_x, factor * width_y
        im_resized = im.resize(size, Image.ANTIALIAS)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
        temp_filename = temp_file.name
        im_resized.save(temp_filename, dpi=(300, 300))
        return temp_filename

    def image_smoothening(img):
        ret1, th1 = cv2.threshold(img, BINARY_THRESHOLD, 255,
                                  cv2.THRESH_BINARY)
        ret2, th2 = cv2.threshold(th1, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        blur = cv2.GaussianBlur(th2, (1, 1), 0)
        ret3, th3 = cv2.threshold(blur, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th3

    # def inverse(img):
    #     ret, tv1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
    #     return ret

    def remove_noise_and_smooth(file_path):
        # load the image in grayscale mode using 0
        img = cv2.imread(file_path, 0)
        filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255,
                                         cv2.ADAPTIVE_THRESH_MEAN_C,
                                         cv2.THRESH_BINARY, 41, 3)
        kernel = np.ones((1, 1), np.uint8)
        opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
        img = image_smoothening(img)
        or_image = cv2.bitwise_or(img, closing)
        return or_image

    def process_image_for_ocr(file_path):
        temp_filename = set_image_dpi(file_path)
        im_new = remove_noise_and_smooth(temp_filename)
        return im_new

    # processed image
    # temp = process_image_for_ocr(path)

    # extract text and write a text file
    text = pytesseract.image_to_string(img,
                                       lang=language,
                                       config=custom_config)
    print("final text", text)
    # return text

    for line in text.splitlines():
        for item in testNameColumn:
            nameExists = line.lower().find(item)
            if nameExists != -1:
                testName = item
                count = count + 1

        for item in unitColumn:
            unitExists = line.lower().find(item)
            if unitExists != -1:
                unit = item
                count = count + 1

        for item in rangeColumn:
            rangeExists = line.lower().find(item)
            if rangeExists != -1:
                refRange = item
                count = count + 1

        for item in resultColumn:
            resultExists = line.lower().find(item)
            if resultExists != -1:
                result = item
                count = count + 1

        for item in flagColumn:
            flagExists = line.lower().find(item)
            if flagExists != -1:
                flag = item
                count = count + 1

        if count == 4:
            print(count, " Column Headers")
            break

    # coordinates
    gray = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2GRAY)
    # Returns result containing box boundaries, confidences
    dict = pytesseract.image_to_data(gray,
                                     output_type=Output.DICT,
                                     config=custom_config)
    # dict = {'level': [1,1,2], 'text':['','']},
    n_boxes = len(dict['level'])

    # range => starting from 0 to n_boxes
    for i in range(n_boxes):
        (x, y, w, h) = (dict['left'][i], dict['top'][i], dict['width'][i],
                        dict['height'][i])
        # starting point = (x,y)
        # ending point = (x + w, y + h)
        # color = (0, 0, 255)
        # thickness
        # image = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)

        if len(dict['text'][i].lower()) >= 3:

            if testName.lower().find(
                    dict['text'][i].lower()) != -1 and (dict['text'][i] != ''):
                if testName.startswith('test'):
                    if dict['text'][i].lower() == 'test':
                        x_name = x
                        y_name = y
            if flag.lower().find(
                    dict['text'][i].lower()) != -1 and (dict['text'][i] != ''):
                x_flag = x
                y_flag = y

            if unit.lower().find(
                    dict['text'][i].lower()) != -1 and (dict['text'][i] != ''):
                x_unit, y_unit = x, y

            if refRange.lower().find(
                    dict['text'][i].lower()) != -1 and (dict['text'][i] != ''):
                if refRange.startswith('ref'):
                    if dict['text'][i].lower() == 'reference':
                        x_range = x
                        y_range = y

            if result.lower().find(
                    dict['text'][i].lower()) != -1 and (dict['text'][i] != ''):
                x_result = x
                y_result = y

            if x_name == x or x_name == x + 1 or x_name == x - 1:
                testNameTableData = dict['text'][i]

            if x_result == x or x_result == x + 1 or x_result == x - 1:
                resultTableData = dict['text'][i]

            if x_flag == x or x_flag == x + 1 or x_flag == x - 1:
                flagTableData = dict['text'][i]

            if x_range == x or x_range == x + 1 or x_range == x - 1:
                rangeTableData = dict['text'][i]

        # get each columns x coordinate
        # find values starting with that x coordinate

        # data for the table
            table = [[
                testNameTableData, resultTableData, flagTableData,
                unitTableData, rangeTableData
            ]]

    # array
    array = {
        testName.upper(): testNameTableData,
        result.upper(): resultTableData,
        flag.upper(): flagTableData,
        unit.upper(): unitTableData,
        refRange.upper(): rangeTableData
    }
    print(array)

    return {"array": array}

Beispiel #22

0

Datei anzeigen

# converting to its binary form
bw = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

cv2.imwrite("Binary.png", bw_img)

dst = cv2.fastNlMeansDenoising(bw_img, None, 1, 7, 21)
cv2.imwrite('denoised.png', dst)
plt.subplot(122), plt.imshow(dst)
plt.show()
plt.imsave('newimg.png', np.array(dst), cmap=cm.gray)

# configuring parameters for tesseract
custom_config = r'--oem 3 --psm 6'
# now feeding image to tesseract
details = pytesseract.image_to_data('newimg.png',
                                    output_type=Output.DICT,
                                    config=custom_config,
                                    lang='eng')
print(details.keys())
total_boxes = len(details['text'])
for sequence_number in range(total_boxes):
    if int(details['conf'][sequence_number]) > 30:
        (x, y, w, h) = (details['left'][sequence_number],
                        details['top'][sequence_number],
                        details['width'][sequence_number],
                        details['height'][sequence_number])
        threshold_img = cv2.rectangle(threshold_img, (x, y), (x + w, y + h),
                                      (0, 255, 0), 2)
# display image
cv2.imshow("captured_txt", threshold_img)
# Maintain output window until user presses a key
cv2.waitKey(0)