def ocr(crop_image, config, left, top): if config: temp_df = pytesseract.image_to_data(crop_image, config, lang=LANG_MAPPING[lang][0], output_type=Output.DATAFRAME) else: temp_df = pytesseract.image_to_data(crop_image, lang=LANG_MAPPING[lang][0], output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" coord = [] for index, row in temp_df.iterrows(): word_coord = {} temp_text = str(row["text"]) temp_conf = row["conf"] text = text + " " + str(temp_text) word_coord['text'] = str(temp_text) word_coord['conf'] = temp_conf word_coord['text_left'] = int(row["left"] + left) word_coord['text_top'] = int(row["top"] + top) word_coord['text_width'] = int(row["width"]) word_coord['text_height'] = int(row["height"]) coord.append(word_coord) return coord, text
def find_target(boxes, image, label): """ Detect the box with the given label """ for box in boxes: roi = image[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] if DEBUG: cv2.imshow('orig ' + str(j), roi) start = time.time() # preprocess region of interest img_preprocessed = image_processing.process_image(roi) # find tokens for OCR, tokens are in format x1, y1, x2 ,y2 tokens = image_processing.find_tokens(img_preprocessed) if TRACE: print("#tokens: ", len(tokens)) tokens = filter_tokens(tokens, img_preprocessed) if TRACE: print("#filtered: ", len(tokens)) end = time.time() print("time taken for tokens including preprocessing: ", end - start) if TRACE: for t in tokens: cv2.rectangle(roi, (t[0], t[1]), (t[2], t[3]), (255, 200, 50), 2) global j cv2.imshow('filtered token ' + str(j), roi) j = j + 1 text = "" for (x1, y1, x2, y2) in tokens: tesseract_img = roi[y1:y2, x1:x2] tesseract_img = image_processing.resize_to_width(tesseract_img, 300, False) tesseract_img = cv2.cvtColor(tesseract_img, cv2.COLOR_BGR2GRAY) ret, tesseract_img = cv2.threshold(tesseract_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) start = time.time() data = pytesseract.image_to_data(tesseract_img, config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890E", output_type=Output.DICT) # DICT end = time.time() print("tesseract time taken for one token: ", end - start) if TRACE: print("data['conf']: ", data['conf']) # add if confidence > 60 if data['conf'][len(data['conf']) - 1] > 60: text = text + data['text'][len(data['conf']) - 1] if DEBUG: print("button label: ", text) # return first box with searched label if label == text or (label == '0' and text == 'E') : return box return None
def ocr(crop_image, configs, left, top, language): if configs: #temp_df = pytesseract.image_to_data(crop_image,config='--psm 7', lang=LANG_MAPPING[language][0],output_type=Output.DATAFRAME) temp_df = pytesseract.image_to_data( crop_image, lang=language, config='--psm 7', output_type=Output.DATAFRAME) #config='--psm 7', else: #temp_df = pytesseract.image_to_data(crop_image, lang= LANG_MAPPING[language][0],output_type=Output.DATAFRAME) temp_df = pytesseract.image_to_data(crop_image, lang=language, output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" coord = [] #print("kkkkkkkkkkkkkkkkkkkkkkkkkkkkk",temp_df) for index, row in temp_df.iterrows(): temp_dict = {} vert = [] temp_dict['identifier'] = str(uuid.uuid4()) vert.append({'x': int(row["left"] + left), 'y': row["top"] + top}) vert.append({ 'x': int(row["left"] + left) + int(row["width"]), 'y': row["top"] + top }) vert.append({ 'x': int(row["left"] + left) + int(row["width"]), 'y': row["top"] + top + int(row["height"]) }) vert.append({ 'x': int(row["left"] + left), 'y': row["top"] + top + int(row["height"]) }) #temp_dict['text'] = str(row["text"]) temp_dict['text'] = process_text(row['text']) temp_dict['conf'] = row["conf"] temp_dict['boundingBox'] = {} temp_dict['boundingBox']["vertices"] = vert if text == '': text = temp_dict['text'] else: text = text + " " + temp_dict['text'] coord.append(temp_dict) return coord, text
def low_conf_ocr(lang,left,top,width,height,image): crop_image = image.crop((left-5, top-7, left+width+5, top+height+7)) crop_image.save("/home/naresh/check/"+str(uuid.uuid4())+'.jpg') temp_df_eng = pytesseract.image_to_data(crop_image,config='--psm 6', lang= LANG_MAPPING[lang][2],output_type=Output.DATAFRAME) temp_df_hin = pytesseract.image_to_data(crop_image, config='--psm 6',lang= LANG_MAPPING[lang][0],output_type=Output.DATAFRAME) eng_index = temp_df_eng['conf'].argmax() hin_index = temp_df_hin['conf'].argmax() eng_conf = temp_df_eng['conf'][eng_index] hin_conf = temp_df_hin['conf'][hin_index] if eng_conf>=hin_conf: conf = eng_conf text = temp_df_eng['text'][eng_index] else: conf = hin_conf text = temp_df_hin['text'][hin_index] return text, conf
def home(): prop = {} res = {} if request.method == 'POST': f = request.files['file'] key = request.form.get('var') img = Image.open(f.stream) config = r'--oem 3 --psm 6' ## works well if key == '1': ## Aadhaar Rear side to get pincode,state,town and address img_rear = get_img1(img) d = pytesseract.image_to_data(img_rear, lang='eng', output_type=Output.DICT, config=config) res = get_result1(d) return jsonify(res) elif key == '2': name_front = get_name_img(img) img_front = get_img2(img) d = pytesseract.image_to_data(img_front, lang='eng', output_type=Output.DICT, config=config) res = {} res = get_result2(d) res['name'] = get_name2(d) return jsonify(res) elif key == '3': #PAN CARD OCR res = panocr(img) return jsonify(res) elif key == '4': #VOTERID OCR REAR data = Voter_IDREAR(img) return jsonify(data) elif key == '5': # Voter ID front data = Voter_IDFRONT(img) return jsonify(data) return render_template('index.html')
def __process_image_file(self, c: sqlite3.Cursor, path, dir_id, doc_path, page): if c.execute("select id from images where path = ?", (path, )).fetchone() is not None: self.__add_message("Skipping already indexed file {}.".format( path.replace(self.path + "/", ""))) return img = cv2.imread(path) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(img_gray, (9, 9), 0) img = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # img = 255 - thresh # cv2.imwrite(path.replace(self.path, "C:/Users/mueller/Desktop/output"), img) # img = remove_noise.process_image_for_ocr(path) if self.tesseract_exe: pytesseract.tesseract_cmd = self.tesseract_exe d = pytesseract.image_to_data(img, output_type=Output.DICT) c.execute("insert into 'images' (path, directory_id) values (?, ?)", (path, dir_id)) image_id = c.lastrowid for j in range(len(d["text"])): if not d['text'][j].strip(): continue c.execute( "insert into 'texts' (text, left, top, width, height, image_id) values (?, ?, ?, ?, ?, ?)", (d['text'][j], d['left'][j], d['top'][j], d['width'][j], d['height'][j], image_id)) if doc_path is not None and page is not None: doc_id = c.execute("select id from documents where path = ?", (doc_path, )).fetchone() if doc_id is None: c.execute( "insert into documents (path, directory_id) values (?, ?)", (doc_path, dir_id)) doc_id = c.lastrowid else: doc_id = doc_id[0] c.execute( "update images set document_id = ?, doc_page = ? where id = ?", (doc_id, page, image_id))
def recognize_text_in_boxes(boxes, image): boxes_ocr = [] for box in boxes: roi = image[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] # preprocess region of interest img_preprocessed = image_processing.process_image(roi) # find tokens for OCR tokens = image_processing.find_tokens(img_preprocessed) tokens = filter_tokens(tokens, img_preprocessed) print("# tokens for box ", box, ": ", len(tokens)) text = "" i = 0 for (x1, y1, x2, y2) in tokens: tesseract_img = roi[y1:y2, x1:x2] tesseract_img = image_processing.resize_to_width(tesseract_img, 300, False) tesseract_img = cv2.cvtColor(tesseract_img, cv2.COLOR_BGR2GRAY) ret, tesseract_img = cv2.threshold(tesseract_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) if TRACE: cv2.imshow('tesser img' + str(i), tesseract_img) i = i + 1 data = pytesseract.image_to_data(tesseract_img, config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890Ee", output_type=Output.DICT) # DICT # ocr_result = pytesseract.image_to_string(tesseract_img, lang='eng', # config="--psm 10 --oem 0 -c tessedit_char_whitelist=01234567890Ee") # -psm 10 # print("ocr result: ", ocr_result) token_text = data['text'] if data['conf'][len(data['conf']) - 1] > 0: text = text + data['text'][len(data['conf']) - 1] if TRACE: print("token_text: ", token_text) print("data['conf']: ", data['conf']) print("text read from box: ", text) boxes_ocr.append((box, text)) return boxes_ocr
def scan_data(self): dict_data = pytesseract.image_to_data(Image.open(self.filename), output_type=Output.DICT) json_array = [] for i in range(len(dict_data['text'])): # if is empty if len(dict_data['text'][i]) != 0: json_line = { 'text': dict_data['text'][i], 'left': dict_data['left'][i], 'top': dict_data['top'][i], 'width': dict_data['width'][i], 'height': dict_data['height'][i] } json_array.append(json_line) return json_array
def current_words_location(pid, words, output_dir='out'): """ 获取当前页面上文字的位置 """ # 1. 获取到手机截图 photo_name = phone.get_page_photo(pid, output_dir) if photo_name is None: return None # 2. 对截图进行识别 data = pytesseract.image_to_data(Image.open( os.path.join(output_dir, photo_name)), output_type=Output.DICT, lang='chi_sim') # 3. 截图信息对比 for i in range(0, len(data['text'])): if data['text'][i].__eq__(words[0]): is_found = True for j, word in enumerate(words): # 保证数组不越界 if i + j >= len(data['text']): return None if not word.__eq__(data['text'][i + j]): is_found = False break if is_found: # 4. 返回处理结果 os.remove(os.path.join(output_dir, photo_name)) if data['width'][i] == 0 or data['height'][i] == 0: return None return { 'x': data['left'][i], 'y': data['top'][i], 'w': data['width'][i], 'h': data['height'][i] } os.remove(os.path.join(output_dir, photo_name)) return None
import cv2 from pytesseract import pytesseract from pytesseract import Output pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" img = cv2.imread("ur_mom.png") image_data = pytesseract.image_to_data(img, output_type=Output.DICT) print(image_data['left']) # Printing each word # for word in image_data['text']: # print(word) for i, word in enumerate(image_data['text']): if word != '': x,y,w,h = image_data['left'][i],image_data['top'][i],image_data['width'][i],image_data['height'][i] cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),3) cv2.putText(img,word,(x,y-16),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.imshow("window", img) cv2.waitKey(0)
def img_to_series(img_name): data = pytesseract.image_to_data(Image.open(img_name, "r").convert("L")) save_file = open("data.tsv", "w") save_file.write(data) save_file.close() return pd.read_csv("data.tsv", sep="\t")
def detect_text_data(cropped_img): text_data = pytesseract.image_to_data(cropped_img, config=TESSDATA_CONFIG, output_type=Output.DICT) return text_data
def MAWB(data,img): x =re.findall(r'\d{3}[-\s]\d{4}[\s]\d{4}[\s]' , data) y = re.findall(r'[:]\d{11}', data) z = re.findall('[0-9][0-9][0-9]-\d{8}',data) boxes = pt.image_to_data(img,output_type=Output.DICT) out = pt.image_to_data(img) #print ("#####################**********8") if (len(x) != 0): return (x,boxes,out,data,img) elif (len(y) !=0): return (y,boxes,out,data,img) elif (len(z) != 0): return (z,boxes,out,data,img) else : #print ("#####################**********8") (h, w) = img.shape[:2] #M = cv2.getRotationMatrix2D((h/2,w/2), 90, 1) #rotated90 = cv2.warpAffine(img, M, (h, w)) rotated90 = ndimage.rotate(img, 90) #cv2.imwrite('90.jpg',rotated90) text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1') x =re.findall('[0-9][0-9][0-9]-\d{8}',text90) if (len(x)!=0): p#rint ("90") return (x,boxes,out,text90,rotated90) else: (h, w) = img.shape[:2] #M = cv2.getRotationMatrix2D((h/2,w/2), 180, 1) #rotated90 = cv2.warpAffine(img, M, (h, w)) rotated90 = ndimage.rotate(img, 180) text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1') boxes = pt.image_to_data(rotated90,output_type=Output.DICT) out = pt.image_to_data(rotated90) #cv2.imwrite('180.jpg',rotated90) x =re.findall('[0-9][0-9][0-9]-\d{8}',text90) if (len(x) != 0): return (x,boxes,out,text90,rotated90) else: (h, w) = img.shape[:2] #M = cv2.getRotationMatrix2D((h/2,w/2), 270, 1) #rotated90 = cv2.warpAffine(img, M, (h, w)) rotated90 = ndimage.rotate(img, 270) #cv2.imwrite('270.jpg',rotated90) text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1') boxes = pt.image_to_data(rotated90,output_type=Output.DICT) out = pt.image_to_data(rotated90) x =re.findall('[0-9][0-9][0-9]-\d{8}',text90) if (len(x) !=0): return (x,boxes,out,text90,rotated90) else : (h, w) = img.shape[:2] #M = cv2.getRotationMatrix2D((h/2,w/2), 360, 1) #rotated90 = cv2.warpAffine(img, M, (h, w)) rotated90 = ndimage.rotate(img, 360) #cv2.imwrite('360.jpg',rotated90) boxes = pt.image_to_data(rotated90,output_type=Output.DICT) out = pt.image_to_data(rotated90) text90 = pt.image_to_string(rotated90, config='--psm 6 --oem 1') x =re.findall('[0-9][0-9][0-9]-\d{8}',text90) if (len(x) != 0): return (x,boxes,out,text90,rotated90) else : return (None)
def data_output(temp): dict = pytesseract.image_to_data(temp, output_type=Output.DICT, config=custom_config) return dict
import cv2 from pytesseract import pytesseract from pytesseract import Output pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" foto = cv2.imread("nowaf.png") foto_data = pytesseract.image_to_data(foto, output_type=Output.DICT) for i, word in enumerate(foto_data['text']): if word != "": x, y, w, h = foto_data['left'][i], foto_data['top'][i], foto_data[ 'width'][i], foto_data['height'][i] cv2.rectangle(foto, (x, y), (x + w, y + h), (0, 255, 0), 3) cv2.putText(foto, word, (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2) cv2.imshow("hasil", foto) cv2.waitKey(0)
def get_axes_texts(img, axis_entities): """The function for getting the texts in the axis""" try: data = [] # No axes in the image if img is None or axis_entities is None: return data (img_height, img_width) = img.shape[:2] for axis_id, axis_entity in enumerate(axis_entities): axis_bbox = axis_entity.get("bbox") axis_direction = axis_entity.get("direction") axis_score = axis_entity.get("score") new_axis_bbox = axis_bbox if axis_bbox: axis_x = axis_bbox.get("x") axis_y = axis_bbox.get("y") axis_width = axis_bbox.get("width") axis_height = axis_bbox.get("height") if axis_x and axis_y and axis_width and axis_height: if axis_x >= 0 and axis_y >= 0 and axis_width > 0 and axis_height > 0: # Step 0: extend the axis image to avoid mistakes if axis_direction == 0: axis_x_extra = max(round(axis_width * 0.02), 6) if axis_x >= axis_x_extra: axis_x = axis_x - axis_x_extra axis_width = axis_width + axis_x_extra if axis_x + axis_width + axis_x_extra < img_width: axis_width = axis_width + axis_x_extra elif axis_direction == 90: axis_y_extra = max(round(axis_height * 0.02), 6) if axis_y >= axis_y_extra: axis_y = axis_y - axis_y_extra axis_height = axis_height + axis_y_extra if axis_y + axis_height + axis_y_extra < img_height: axis_height = axis_height + axis_y_extra new_axis_bbox = { "x": axis_x, "y": axis_y, "width": axis_width, "height": axis_height } # Step 1: crop the axis image axis_img = img[axis_y:(axis_y + axis_height), axis_x:(axis_x + axis_width)].copy() # Step 2: enhance the contrast axis_img_enhanced = contrast_enhance(axis_img) # Step 3: partition the image line_img, tick_img, title_img, ticks_bbox = partition_axis(axis_img_enhanced, \ axis_id, axis_direction) tick_texts = None title_texts = None if tick_img is not None and isinstance( tick_img, np.ndarray): tick_img_pil = CV2PIL(tick_img) if TESTING["axis"]["sign"]: tick_img_pil.save(TESTING['dir'] + '/axis_' + str(axis_id) + \ '_test_tick.png') tick_texts = pt.image_to_data(tick_img_pil, config='--psm 6') tick_texts = understand_data(tick_texts) if title_img is not None and isinstance( tick_img, np.ndarray): title_img_pil = CV2PIL(title_img) if TESTING["axis"]["sign"]: title_img_pil.save(TESTING['dir'] + '/axis_' + str(axis_id) + \ '_test_title.png') title_texts = pt.image_to_string(title_img_pil, config='--psm 6') if tick_texts is not None: img_shape = { "width": img_width, "height": img_height } formated_axis = get_format_axis(tick_texts, title_texts,\ ticks_bbox, img_shape,\ new_axis_bbox, axis_direction, axis_score) data.append(formated_axis) except Exception as e: print(repr(e)) traceback.print_exc() return data
def read_text(image_path): print(pytesseract.image_to_data(Image.open(image_path)))
def extract_text_from_image(filepath, desired_width, desired_height, df, lang): image = Image.open(filepath) h_ratio = image.size[1] / desired_height w_ratio = image.size[0] / desired_width word_coord_lis = [] text_list = [] for index, row in df.iterrows(): left = row['text_left'] * w_ratio top = row['text_top'] * h_ratio right = (row['text_left'] + row['text_width']) * w_ratio bottom = (row['text_top'] + row['text_height']) * h_ratio coord = [] crop_image = image.crop((left - 5, top - 5, right + 5, bottom + 5)) if row['text_height'] > 2 * row['font_size']: temp_df = pytesseract.image_to_data(crop_image, lang=LANG_MAPPING[lang] + "+eng", output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" for index2, row1 in temp_df.iterrows(): word_coord = {} text = text + " " + str(row1["text"]) word_coord['text'] = str(row1["text"]) word_coord['conf'] = row1["conf"] word_coord['text_left'] = int(row1["left"] + left) word_coord['text_top'] = int(row1["top"] + top) word_coord['text_width'] = int(row1["width"]) word_coord['text_height'] = int(row1["height"]) coord.append(word_coord) word_coord_lis.append(coord) text_list.append(text) else: temp_df = pytesseract.image_to_data(crop_image, config='--psm 7', lang=LANG_MAPPING[lang] + "+eng", output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" for index2, row2 in temp_df.iterrows(): word_coord = {} text = text + " " + str(row2["text"]) word_coord['text'] = str(row2["text"]) word_coord['conf'] = row2["conf"] word_coord['text_left'] = int(row2["left"] + left) word_coord['text_top'] = int(row2["top"] + top) word_coord['text_width'] = int(row2["width"]) word_coord['text_height'] = int(row2["height"]) coord.append(word_coord) word_coord_lis.append(coord) text_list.append(text) df['word_coords'] = word_coord_lis df['text'] = text_list return df
def extract_text_from_image(filepath, desired_width, desired_height, df, lang): image = Image.open(filepath) h_ratio = image.size[1]/desired_height w_ratio = image.size[0]/desired_width word_coord_lis = [] text_list = [] check ={"devnagari_text:":[],"original":[]} for index, row in df.iterrows(): left = row['text_left']*w_ratio top = row['text_top']*h_ratio right = (row['text_left'] + row['text_width'])*w_ratio bottom = (row['text_top'] + row['text_height'])*h_ratio coord = [] crop_image = image.crop((left-CROP_CONFIG[lang]['left'], top-CROP_CONFIG[lang]['top'], right+CROP_CONFIG[lang]['right'], bottom+CROP_CONFIG[lang]['bottom'])) #crop_image.save(str(index) + '.jpg') if row['text_height']>2*row['font_size']: temp_df = pytesseract.image_to_data(crop_image, lang= LANG_MAPPING[lang][0],output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" for index2, row1 in temp_df.iterrows(): word_coord = {} temp_text = str(row1["text"]) temp_conf = row1["conf"] #if temp_conf<30: #check["devnagari_text:"].append(str(temp_text)) # temp_text, temp_conf = low_conf_ocr(lang,int(row1["left"]+left),int(row1["top"]+top),int(row1["width"]),int(row1["height"]),image) #check["original"].append(str(temp_text)) text = text +" "+ str(temp_text) word_coord['text'] = str(temp_text) word_coord['conf'] = temp_conf word_coord['text_left'] = int(row1["left"]+left) word_coord['text_top'] = int(row1["top"]+top) word_coord['text_width'] = int(row1["width"]) word_coord['text_height'] = int(row1["height"]) coord.append(word_coord) word_coord_lis.append(coord) text_list.append(text) else: temp_df = pytesseract.image_to_data(crop_image,config='--psm 7', lang=LANG_MAPPING[lang][0],output_type=Output.DATAFRAME) temp_df = temp_df[temp_df.text.notnull()] text = "" for index2, row2 in temp_df.iterrows(): word_coord = {} temp_text = str(row2["text"]) temp_conf = row2["conf"] #if temp_conf<30: #check["devnagari_text:"].append(str(temp_text)) #temp_text, temp_conf = low_conf_ocr(lang,int(row2["left"]+left),int(row2["top"]+top),int(row2["width"]),int(row2["height"]),image) #check["original"].append(str(temp_text)) text = text +" "+ str(temp_text) word_coord['text'] = str(temp_text) word_coord['conf'] = temp_conf word_coord['text_left'] = int(row2["left"]+left) word_coord['text_top'] = int(row2["top"]+top) word_coord['text_width'] = int(row2["width"]) word_coord['text_height'] = int(row2["height"]) coord.append(word_coord) word_coord_lis.append(coord) text_list.append(text) df['word_coords'] = word_coord_lis df['text'] = text_list return df
point_table = point_in_table(table) # places_intersection_points(point_table) # Функция показывает точки пересечения # show_cell_numbers() # Функция показывает номера ячеек # Отображает строку по номеру xs = 0 for i in point_table: xs += 1 print(xs) cv2.imwrite('out' + os.sep + 'cropped.png', show_row(point_table, xs)) img = cv2.cvtColor(show_row(point_table, xs), cv2.COLOR_BGR2RGB) data = pytesseract.image_to_data(img, lang='eng', config=r'--oem 3 --psm 11') # Перебираем данные про текстовые надписи for i, el in enumerate(data.splitlines()): if i == 0: continue el = el.split() try: # Создаем подписи на картинке x, y, w, h = int(el[6]), int(el[7]), int(el[8]), int(el[9]) # cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 1) print(x) print(y) cv2.putText(img, el[11], (x + 15, y + 7), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) except IndexError: print("Операция была пропущена")
def processURL(): # get path from url url = request.args.get('url') str1 = url.split('files/') str2 = str1[0] + 'files%2F' str3 = str1[1].split('?alt') url1 = str2 + str3[0] # return a json object with image info from the 'url' r = requests.get(url1).json() token = r['downloadTokens'] # complete path for the image in firebase storage path = url1 + '?alt=media&token=' + token IMAGE_SIZE = 1800 BINARY_THRESHOLD = 180 # custom configurations # custom_psm_config = r'--psm 4' language = 'eng' custom_config = r'--oem 1 --psm 6 -c preserve_interword_spaces=1' # return {"path": path} # # find column headers testNameColumn = ['description', 'test', 'test name'] unitColumn = ['unit', 'units'] rangeColumn = ['ref. range', 'reference value', 'reference range'] resultColumn = ['result'] flagColumn = ['flag'] count = 0 testName = '' unit = '' refRange = '' result = '' flag = '' x_name, x_flag, x_unit, x_range, x_result = 0, 0, 0, 0, 0 y_name, y_flag, y_unit, y_range, y_result = 0, 0, 0, 0, 0 testNameTableData = '' unitTableData = '' rangeTableData = '' resultTableData = '' flagTableData = '' # define the path to the image # path = r'E:\Reserach\Resources\Dataset\FBS\Glucose-Test-Results.jpg' # load the image from the specified location/directory using openCV # image = cv2.imread(path) response = requests.get(path) image_bytes = BytesIO(response.content) img = Image.open(image_bytes) img.show() # rescale image def set_image_dpi(file_path): im = Image.open(file_path) length_x, width_y = im.size factor = max(1, int(IMAGE_SIZE / length_x)) size = factor * length_x, factor * width_y im_resized = im.resize(size, Image.ANTIALIAS) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') temp_filename = temp_file.name im_resized.save(temp_filename, dpi=(300, 300)) return temp_filename def image_smoothening(img): ret1, th1 = cv2.threshold(img, BINARY_THRESHOLD, 255, cv2.THRESH_BINARY) ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) blur = cv2.GaussianBlur(th2, (1, 1), 0) ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return th3 # def inverse(img): # ret, tv1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV) # return ret def remove_noise_and_smooth(file_path): # load the image in grayscale mode using 0 img = cv2.imread(file_path, 0) filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3) kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) img = image_smoothening(img) or_image = cv2.bitwise_or(img, closing) return or_image def process_image_for_ocr(file_path): temp_filename = set_image_dpi(file_path) im_new = remove_noise_and_smooth(temp_filename) return im_new # processed image # temp = process_image_for_ocr(path) # extract text and write a text file text = pytesseract.image_to_string(img, lang=language, config=custom_config) print("final text", text) # return text for line in text.splitlines(): for item in testNameColumn: nameExists = line.lower().find(item) if nameExists != -1: testName = item count = count + 1 for item in unitColumn: unitExists = line.lower().find(item) if unitExists != -1: unit = item count = count + 1 for item in rangeColumn: rangeExists = line.lower().find(item) if rangeExists != -1: refRange = item count = count + 1 for item in resultColumn: resultExists = line.lower().find(item) if resultExists != -1: result = item count = count + 1 for item in flagColumn: flagExists = line.lower().find(item) if flagExists != -1: flag = item count = count + 1 if count == 4: print(count, " Column Headers") break # coordinates gray = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2GRAY) # Returns result containing box boundaries, confidences dict = pytesseract.image_to_data(gray, output_type=Output.DICT, config=custom_config) # dict = {'level': [1,1,2], 'text':['','']}, n_boxes = len(dict['level']) # range => starting from 0 to n_boxes for i in range(n_boxes): (x, y, w, h) = (dict['left'][i], dict['top'][i], dict['width'][i], dict['height'][i]) # starting point = (x,y) # ending point = (x + w, y + h) # color = (0, 0, 255) # thickness # image = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2) if len(dict['text'][i].lower()) >= 3: if testName.lower().find( dict['text'][i].lower()) != -1 and (dict['text'][i] != ''): if testName.startswith('test'): if dict['text'][i].lower() == 'test': x_name = x y_name = y if flag.lower().find( dict['text'][i].lower()) != -1 and (dict['text'][i] != ''): x_flag = x y_flag = y if unit.lower().find( dict['text'][i].lower()) != -1 and (dict['text'][i] != ''): x_unit, y_unit = x, y if refRange.lower().find( dict['text'][i].lower()) != -1 and (dict['text'][i] != ''): if refRange.startswith('ref'): if dict['text'][i].lower() == 'reference': x_range = x y_range = y if result.lower().find( dict['text'][i].lower()) != -1 and (dict['text'][i] != ''): x_result = x y_result = y if x_name == x or x_name == x + 1 or x_name == x - 1: testNameTableData = dict['text'][i] if x_result == x or x_result == x + 1 or x_result == x - 1: resultTableData = dict['text'][i] if x_flag == x or x_flag == x + 1 or x_flag == x - 1: flagTableData = dict['text'][i] if x_range == x or x_range == x + 1 or x_range == x - 1: rangeTableData = dict['text'][i] # get each columns x coordinate # find values starting with that x coordinate # data for the table table = [[ testNameTableData, resultTableData, flagTableData, unitTableData, rangeTableData ]] # array array = { testName.upper(): testNameTableData, result.upper(): resultTableData, flag.upper(): flagTableData, unit.upper(): unitTableData, refRange.upper(): rangeTableData } print(array) return {"array": array}
# converting to its binary form bw = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) cv2.imwrite("Binary.png", bw_img) dst = cv2.fastNlMeansDenoising(bw_img, None, 1, 7, 21) cv2.imwrite('denoised.png', dst) plt.subplot(122), plt.imshow(dst) plt.show() plt.imsave('newimg.png', np.array(dst), cmap=cm.gray) # configuring parameters for tesseract custom_config = r'--oem 3 --psm 6' # now feeding image to tesseract details = pytesseract.image_to_data('newimg.png', output_type=Output.DICT, config=custom_config, lang='eng') print(details.keys()) total_boxes = len(details['text']) for sequence_number in range(total_boxes): if int(details['conf'][sequence_number]) > 30: (x, y, w, h) = (details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number], details['height'][sequence_number]) threshold_img = cv2.rectangle(threshold_img, (x, y), (x + w, y + h), (0, 255, 0), 2) # display image cv2.imshow("captured_txt", threshold_img) # Maintain output window until user presses a key cv2.waitKey(0)