def get_texts(xml, found, img_path, task_id): dom = ET.parse(xml) obj = dom.findall("./object") dic = {} texts = defaultdict(str) ori_img = np.array(found) _, ori = cv2.imencode('.jpg', ori_img) ori = base64.b64encode(ori.tostring()) for ob in obj: name = str(ob.getchildren()[0].text) if 'extract' in name: bnd_box = ob.findall("bndbox")[0] x_min = bnd_box.findall("xmin")[0].text y_min = bnd_box.findall("ymin")[0].text x_max = bnd_box.findall("xmax")[0].text y_max = bnd_box.findall("ymax")[0].text dic[name] = [int(x_min), int(y_min), int(x_max), int(y_max)] _, img = cv2.imencode('.jpg', found) img = base64.b64encode(img.tostring()) data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori} images = requests.post('http://172.30.20.154:32021/text_predict', data=data) for key, value in dic.items(): if key == '商标-extract': save_path = '/' + '/'.join( img_path.split('/')[1:5]) + '/trademark/' if not os.path.exists(save_path): os.makedirs(save_path) save_path = save_path + str(task_id) + '.jpg' Image.fromarray(found[value[1]:value[3], value[0]:value[2]]).resize( (500, 300)).save(save_path) texts[key.replace('-extract', '')] = save_path continue try: # if images: # image_positions = [[i[0], rec_txt(i[1])] # for i # in # images] new_images = [] for i in images: if i[0][1] > value[1] and i[0][7] < value[3]: new_images.append(i) new = [] for i in new_images: if i[0][0] > value[0] and i[0][6] < value[2]: new.append(i) elif i[0][0] < value[0] and (value[0] < i[0][6] < value[2]): i[0][0] = value[0] new.append( [i[0], found[i[0][1]:i[0][7], value[0]:i[0][6]]]) elif (value[2] > i[0][0] > value[0]) and i[0][6] > value[2]: i[0][6] = value[2] new.append( [i[0], found[i[0][1]:i[0][7], i[0][0]:value[2]]]) elif i[0][0] < value[0] and i[0][6] > value[2]: i[0][0] = value[0] i[0][6] = value[2] new.append( [i[0], found[i[0][1]:i[0][7], value[0]:value[2]]]) if new: image_positions = [] for j in new: if j[1].any(): _, img = cv2.imencode('.jpg', j[1]) img = base64.b64encode(img.tostring()) data = {'img': img} content = requests.post( 'http://172.30.20.154:32020/predict', data=data).json()[:2] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break content = calculate(content) image_positions.append([j[0], content]) texts[key.replace('-extract', '')] = single_ocr(image_positions).replace( '\n', '') else: _, img = cv2.imencode('.jpg', found) img = base64.b64encode(img.tostring()) data = {'img': img} content = requests.post('http://172.30.20.154:32020/predict', data=data).json()[:2] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break content = calculate(content) texts[key.replace('-extract', '')] = content except Exception as e: print(e) continue if '企业名称' in texts.keys(): texts['企业名称'] = texts['企业名称'].replace('企业名称', '') return texts
def generate_table(cell, src): # import pickle pos, cols, rows, col_point, row_point, tables, table_shape = cell[0][ 1], cell[1], cell[2], cell[3], cell[4], cell[5], cell[6] col_point = sorted(col_point) row_point = sorted(row_point) tables = sorted(tables, key=lambda i: i[1][3])[:-1] tables = sorted(tables, key=lambda i: i[1][0] + i[1][1]) # 表格内所有单字位置 table_im = src[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]] table_line_regions = text_predict(table_im, 1, 1, table_im) torch.cuda.empty_cache() word_list = [] # print('table_line_length', len(table_line_regions)) for region_index, region in enumerate(table_line_regions): region_y = [region[0][1], region[0][5]] region_x = [region[0][0], region[0][2]] # Image.fromarray(region[1]).save(f'1/{region_index}.jpg') content = predict(Image.fromarray(region[1]).convert('L')) torch.cuda.empty_cache() content = (content[0][0], content[0][1], content[1]) for indexi, cont in enumerate(content[1]): if cont[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break x = content[2] content = calculate(content) for index, word in enumerate(content): word_list.append([[ x[index][0] + region_x[0], region_y[0], x[index][1] + region_x[0], region_y[0], x[index][0] + region_x[0], region_y[1], x[index][1] + region_x[0], region_y[1] ], word]) # # 保存表格行列焦点坐标 # show_im = np.ones(table_shape, np.uint8) # import itertools # for x, y in itertools.product([int(i) for i in col_point], [int(i) for i in row_point]): # cv2.circle(show_im, (x, y), 1, (255, 255, 255), 1) # Image.fromarray(show_im).save('show_im.jpg') for i in tables: d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0} for index, value in enumerate(col_point): if index == 0: d_range = 50 else: d_range = (col_point[index] - col_point[index - 1]) / 2 if i[1][0] > col_point[index] - d_range: d['col_begin'] = index for index, value in enumerate(col_point): if index == len(col_point) - 1: d_range = 50 else: d_range = (col_point[index + 1] - col_point[index]) / 2 if i[1][0] + i[1][2] < col_point[index] + d_range: d['col_end'] = index break for index, value in enumerate(row_point): if index == 0: d_range = 50 else: d_range = (row_point[index] - row_point[index - 1]) / 2 if i[1][1] > row_point[index] - d_range: d['row_begin'] = index for index, value in enumerate(row_point): if index == len(row_point) - 1: d_range = 50 else: d_range = (row_point[index + 1] - row_point[index]) / 2 if i[1][1] + i[1][3] < row_point[index] + d_range: d['row_end'] = index break if d['col_begin'] >= d['col_end']: d['col_end'] = d['col_begin'] + 1 if d['row_begin'] >= d['row_end']: d['row_end'] = d['row_begin'] + 1 # print('123'*3, d) i.append(d) # print(pos[0], pos[1], pos[2], pos[3]) # table_im = src[pos[1]:pos[1]+pos[3], pos[0]:pos[0]+pos[2]] # Image.fromarray(table_im).show() # images = text_predict(table_im, 1, 1, table_im) cell_list = [] for row_p in range(len(row_point) - 1): for col_p in range(len(col_point) - 1): roi = table_im[int(row_point[row_p]):int(row_point[row_p + 1]), int(col_point[col_p]):int(col_point[col_p + 1])] cell_list.append([ roi, [ int(col_point[col_p]), int(row_point[row_p]), int(col_point[col_p + 1] - col_point[col_p]), int(row_point[row_p + 1] - int(row_point[row_p])) ], { 'col_begin': col_p, 'col_end': col_p + 1, 'row_begin': row_p, 'row_end': row_p + 1 }, 0 ]) # 判断单元格是否正确检测 for i in tables: col_begin, col_end, row_begin, row_end = \ i[-1]['col_begin'], i[-1]['col_end'], i[-1]['row_begin'], i[-1]['row_end'] for col in range(col_begin, col_end): for row in range(row_begin, row_end): for cell in cell_list: if cell[2]['col_begin'] == col_begin and cell[2]['col_end'] == col_end and\ cell[2]['row_begin'] == row_begin and cell[2]['row_end'] == row_end: cell[-1] = 1 # 没有检测到单元格则赋值 for i in cell_list: if i[-1] == 0: print('not detect cell', i[1:]) tables.append(i[:-1]) # images = text_predict(table_im) # # 单元格位置 # # for cell in tables: # # print(cell[1:]) # # 保存表格图 # save_table = table_im.copy() # # for word in word_list: # # word = word[0] # # cv2.rectangle(save_table, (word[0], word[1]), (word[6], word[7]), (255, 0, 0), 1) # for i in table_line_regions: # print(123456, i[0]) # cv2.rectangle(save_table, (i[0][0] - 1, i[0][1] - 1), (i[0][6] + 1, i[0][7] + 1), (255, 0, 0), 1) # # import random # # for i in tables: # # cv2.rectangle(save_table, (i[1][0], i[1][1]), (i[1][0]+i[1][2], i[1][1]+i[1][3]), (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), 1) # from config_url import DETECT_URL # import requests, base64 # _, img = cv2.imencode('.jpg', table_im) # img = base64.b64encode(img.tostring()) # # data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori} # data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img} # crop_area_json = requests.post(DETECT_URL, data=data) # crop_area = [] # # while_i += 1 # if crop_area_json.json() != '': # for i in crop_area_json.json(): # image = base64.b64decode(i[1]) # image = np.fromstring(image, np.uint8) # image = cv2.imdecode(image, cv2.IMREAD_COLOR) # crop_area.append([i[0], image]) # for te in crop_area: # print(2221111, te[0]) # t = te[0] # cv2.rectangle(save_table, (t[0], t[1]), (t[6], t[7]), (0, 0, 255), 1) # Image.fromarray(save_table).save('able1.jpg') # Image.fromarray(table_im).save('able3.jpg') # 去除检测错误的表格单元格 tables_cell = {} for cell in tables: tmp = f"{cell[2]['row_begin']}_{cell[2]['row_end']}_{cell[2]['col_begin']}_{cell[2]['col_end']}" if tmp not in tables_cell.keys(): tables_cell[tmp] = cell[:-1] else: if tables_cell[tmp][1][2] * tables_cell[tmp][1][3] < cell[1][ 2] * cell[1][3]: tables_cell[tmp] = cell[:-1] # for cell in tables_cell: # print(111, cell[1:]) tables = [[ v[0], v[1], { 'row_begin': int(k.split('_')[0]), 'row_end': int(k.split('_')[1]), 'col_begin': int(k.split('_')[2]), 'col_end': int(k.split('_')[3]) } ] for k, v in tables_cell.items()] save_table = table_im.copy() for index_i, i in enumerate(tables): print('cell location: ', i[-1]) cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]] cv2.putText(save_table, str(index_i), (cell_region[0] + 2, cell_region[1] + 2), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1) cv2.rectangle(save_table, (cell_region[0], cell_region[1]), (cell_region[2], cell_region[3]), (255, 0, 0), 1) word_str = [] for word in word_list: word_center_point = ((word[0][0] + word[0][2]) / 2, (word[0][1] + word[0][5]) / 2) if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \ cell_region[3]: word_str.append(word) # if i[2]['row_begin'] == 3 and i[2]['row_end'] == 4 and i[2]['col_begin'] == 0 and i[2]['col_end'] == 1: # print(cell_region) # print(word_str) word_str = sorted(word_str, key=lambda x: x[0][1]) # print('word_str', word_str) # print('table', i[2]) # print(i[2], word_str) word_lines = [] word_temp = [] for index, word in enumerate(word_str): if len(word_temp) == 0: word_temp.append(word) if len(word_str) == 1: word_lines.append(word_temp) continue if word[0][1] == word_temp[-1][0][1]: word_temp.append(word) else: word_temp = sorted(word_temp, key=lambda x: x[0][0]) # print(1111, word_temp) word_lines.append(word_temp) word_temp = [word] if index == len(word_str) - 1: if len(word_temp) != 0: # print(2222, word_temp) word_lines.append(word_temp) word_str = '' # new_word_lines = [] # for line in word_lines: # if line in new_word_lines: # print(1111111) # continue # new_word_lines.append(line) # word_lines = new_word_lines.copy() for line in word_lines: # print('line', line) for word in line: word_str += word[1] i.append([word_str, i[1][2], i[1][3]]) Image.fromarray(save_table).save('able1.jpg') # for cell in tables: # # print('*'*5, cell[1:]) # cell_w, cell_h = cell[1][2], cell[1][3] # cell_ims, text = [], '' # for image in images: # image_im, cell_im = image[0], cell[1] # if image_im[0] > cell_im[0]+cell_im[2]: # continue # if image_im[1] > cell_im[1]+cell_im[3]: # continue # if image_im[6] < cell_im[0]: # continue # if image_im[7] < cell_im[1]: # continue # x0, y0, x1, y1 = max(image_im[0], cell_im[0]), max(image_im[1], cell_im[1]), \ # min(image_im[6], cell_im[0]+cell_im[2]), min(image_im[7], cell_im[1]+cell_im[3]) # cell_ims.append([x0, y0, x1, y1]) # for i in cell_ims: # try: # cell_im = table_im[i[1]:i[3], i[0]:i[2]] # content = predict(Image.fromarray(cell_im).convert('L')) # for indexi, i in enumerate(content[1]): # if i[0] > 0.9: # content[0][indexi] = content[0][indexi][0] # content[1][indexi] = [-1] # while 1: # try: # content[1].remove([-1]) # except: # break # content = calculate(content) # # Image.fromarray(j[1]).save('found/{}.jpg'.format(''.join(img_path.split('/')))) # torch.cuda.empty_cache() # text += content # except Exception as ex: # print('ocr error', ex) # continue # cell.append([text, cell_w, cell_h]) # print('cell text:', text) tables = sorted(tables, key=lambda x: x[2]['row_begin']) new_table = [] for i in tables: new_table.append([i[2], i[3]]) return new_table, rows, cols, pos
def get_text(request): print('dddddddddddddddddddddddd') img_path = request.form.get('img_path') par = request.form.get('par') task_id = request.form.get('task_id') FT = request.form.get('FT') page = request.form.get('page') print(img_path) try: if img_path.lower().endswith('.pdf'): pdf = fitz.open(img_path) page_num = pdf[int(page) - 1] trans = fitz.Matrix(3, 3).preRotate(0) pm = page_num.getPixmap(matrix=trans, alpha=False) ori_img = fourier_demo(Image.frombytes("RGB", [pm.width, pm.height], pm.samples), 'FT001') else: ori_img = fourier_demo(Image.open(img_path).convert('RGB'), 'FT001') ft = select(FT[:11] + '001') print('FT:', FT[:11] + '001') # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS) input_img = ori_img.copy() ori_w, ori_h = ori_img.size # data_image = str(os.path.splitext(img_path)[0].split('/')[-1]) + '_' + str(page) # data_image = '/home/ddwork/wce_data/ori_images/{}_{}.jpg'.format(data_image, task_id) # input_img.save(data_image) # input_img = np.array(input_img) ori_img = np.array(ori_img) _, ori = cv2.imencode('.jpg', ori_img) ori = base64.b64encode(ori.tostring()) # input_img = seal_eliminate(input_img) import time start = time.time() while_i = 0 images = [] while 1: input_img.thumbnail((2000 - while_i * 100, 2000 - while_i * 100), Image.ANTIALIAS) # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS) # img_name = img_name.convert('RGB') scale_w, scale_h = input_img.size # print(scale_w, scale_h) scale_w, scale_h = ori_w / scale_w, ori_h / scale_h print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h) img = np.array(input_img) # B_channel, G_channel, R_channel = cv2.split(img) # cv2.imwrite('test.png', R_channel) # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR) _, img = cv2.imencode('.jpg', img) img = base64.b64encode(img.tostring()) data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori} images_json = requests.post(DETECT_URL, data=data) # images = text_predict(img, scale_w, scale_h, ori_img) torch.cuda.empty_cache() while_i += 1 if images_json.json() != '': for i in images_json.json(): image = base64.b64decode(i[1]) image = np.fromstring(image, np.uint8) image = cv2.imdecode(image, cv2.IMREAD_COLOR) images.append([i[0], image]) break print(111111111111111111111111111, time.time() - start) start = time.time() # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i # in # images] image_positions = [] for index, j in enumerate(images): # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5: try: _, img = cv2.imencode('.jpg', j[1]) img = base64.b64encode(img.tostring()) data = {'img': img} content = requests.post(RECOGNISE_URL, data=data).json()[:2] # ori_content = [i[0] for i in content[0]] # prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break content = calculate(content) image_positions.append([j[0], content.replace('“', '').replace('‘', '')]) except Exception as e: print('h w', e) continue # torch.cuda.empty_cache() # data_json[task_id] = [par, data_image, FT, image_positions] # data_json = WCE.create(field_id=int(task_id), par=str(par), image_path=data_image, FT=FT, file_type=FT[:11], # image_positions=str(image_positions), edited=False, trained=False) # data_json.save() print(222222222222222222222222222, time.time() - start) text = single_ocr(image_positions) print(text) # with open(img_path + '.txt', 'w', encoding='utf-8') as fd: # fd.write(text) texts = ft.extract_info(img_path, page, FT[:11] + '001', text) print(texts) # try: # found = get_warp(input_img, image_positions, FT) # print('ssssssssssssssssssssssss') # found_texts = get_texts('warp_templates/{}/template.xml'.format(FT), found, img_path, task_id) # except Exception as e: found_texts = '' # print(e) print('==================================================================') print(texts, found_texts) torch.cuda.empty_cache() # 资质证书判断 if FT[:11] == 'FT001003110': FT = FT[:8] + texts.get('version') # 路径中取日期 try: if texts.get('发证日期') == '' or not texts.get('发证日期'): import re date_path = re.search('([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)', os.path.split(img_path)[1]) if date_path: texts['发证日期'] = date_path.groups()[0] except: pass if texts == 'FT999' and found_texts: return response.json( {'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': found_texts, 'FT': FT}) if texts != 'FT999' and found_texts == '': return response.json({'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT}) if found_texts: for key, value in texts.items(): try: if value == '': texts[key] = found_texts[key] except: continue blank = 0 for key, value in texts.items(): if value == '': blank += 1 if blank == len(texts) - 1: return response.json( {'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999'}) else: return response.json({'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT}) except Exception as e: print(e) return response.json( {'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999'})
def generate_table(cell, ori_img): # import pickle # pickle.dump(cell, open('table.pkl', 'wb')) pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[2], cell[3], cell[4], cell[5] print(11111111111111, pos) table_im = ori_img[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]] # table_line_regions = text_predict(table_im, 1, 1, table_im) table_line_regions = [] _, img = cv2.imencode('.jpg', table_im) img = base64.b64encode(img.tostring()) data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img} images_json = requests.post(DETECT_URL, data=data) if images_json.json() != '': for i in images_json.json(): image = base64.b64decode(i[1]) image = np.fromstring(image, np.uint8) image = cv2.imdecode(image, cv2.IMREAD_COLOR) table_line_regions.append([i[0], image]) torch.cuda.empty_cache() word_list = [] for region in table_line_regions: region_y = [region[0][1], region[0][5]] region_x = [region[0][0], region[0][2]] # content = predict(Image.fromarray(region[1]).convert('L')) # torch.cuda.empty_cache() # content = (content[0][0], content[0][1], content[1]) _, img = cv2.imencode('.jpg', region[1]) img = base64.b64encode(img.tostring()) data = {'img': img} contents = requests.post(RECOGNISE_URL, data=data).json() content, x = contents[:2], contents[2] for indexi, cont in enumerate(content[1]): if cont[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break # x = content[2] content = calculate(content) for index, word in enumerate(content): word_list.append( [[x[index][0] + region_x[0], region_y[0], x[index][1] + region_x[0], region_y[0], x[index][0] + region_x[0], region_y[1], x[index][1] + region_x[0], region_y[1]], word]) # print(word_list) # for region in table_line_regions: # cv2.rectangle(table_im, (region[0][0], region[0][1]), (region[0][6], region[0][7]), (0, 255, 0), 1) # for i in word_list: # cv2.rectangle(table_im, (i[0][0], i[0][1]), (i[0][6], i[0][7]), (255, 0, 0), 1) # # Image.fromarray(table_im).save('single_word.jpg') col_point = sorted(col_point) row_point = sorted(row_point) tables = sorted(tables, key=lambda i: i[1][3])[:-1] tables = sorted(tables, key=lambda i: i[1][0] + i[1][1]) for i in tables: d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0} for index, value in enumerate(col_point): if index == 0: d_range = 50 else: d_range = (col_point[index] - col_point[index - 1]) / 2 if i[1][0] > col_point[index] - d_range: # print(33333333333, i[1], index) d['col_begin'] = index for index, value in enumerate(col_point): if index == len(col_point) - 1: d_range = 50 else: d_range = (col_point[index + 1] - col_point[index]) / 2 if i[1][0] + i[1][2] < col_point[index] + d_range: d['col_end'] = index break for index, value in enumerate(row_point): if index == 0: d_range = 50 else: d_range = (row_point[index] - row_point[index - 1]) / 2 if i[1][1] > row_point[index] - d_range: d['row_begin'] = index for index, value in enumerate(row_point): if index == len(row_point) - 1: d_range = 50 else: d_range = (row_point[index + 1] - row_point[index]) / 2 if i[1][1] + i[1][3] < row_point[index] + d_range: d['row_end'] = index break i.append(d) # for index, i in enumerate(tables): # texts = '' # try: # print(11111111, i[1:]) # i[0] = Image.fromarray(i[0]) # new_i = i[0].copy() # ori_w, ori_h = i[0].size # new_i.thumbnail((1500, 1500), Image.ANTIALIAS) # scale_w, scale_h = new_i.size # scale_w, scale_h = ori_w / scale_w, ori_h / scale_h # new_i = np.array(new_i.convert('RGB')) # # print(type(new_i)) # # Image.fromarray(new_i).save('core.jpg') # if new_i.shape[1] > 16 and new_i.shape[0] > 16: # images = text_predict(new_i, scale_w, scale_h, np.array(i[0])) # torch.cuda.empty_cache() # # images = text_predict(new_i) # else: # i.append([texts, i[1][2], i[1][3]]) # continue # # torch.cuda.empty_cache() # if images: # for image in sorted(images, key=lambda ii: ii[0][1]): # content = predict(Image.fromarray(image[1]).convert('L')) # for indexi, cont in enumerate(content[1]): # if cont[0] > 0.9: # content[0][indexi] = content[0][indexi][0] # content[1][indexi] = [-1] # while 1: # try: # content[1].remove([-1]) # except: # break # content = calculate(content) # # print('content', content) # texts += content # # elif new_i.any() and new_i.shape[0] < new_i.shape[1] * 1.5: # try: # content = predict(Image.fromarray(new_i).convert('L')) # for indexi, cont in enumerate(content[1]): # if cont[0] > 0.9: # content[0][indexi] = content[0][indexi][0] # content[1][indexi] = [-1] # while 1: # try: # content[1].remove([-1]) # except: # break # content = calculate(content) # texts += content # except Exception as ex: # print('small_image_warning', ex) # i.append([texts, i[1][2], i[1][3]]) # # print('54321') # except Exception as e: # print('table_text warning', e) for i in tables: cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]] word_str = [] for word in word_list: word_center_point = ((word[0][0] + word[0][2]) / 2, (word[0][1] + word[0][5]) / 2) if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \ cell_region[3]: word_str.append(word) word_str = sorted(word_str, key=lambda x: x[0][1]) word_lines = [] word_temp = [] for index, word in enumerate(word_str): if len(word_temp) == 0: word_temp.append(word) continue if word[0][1] == word_temp[-1][0][1]: word_temp.append(word) else: word_temp = sorted(word_temp, key=lambda x: x[0][0]) word_lines.append(word_temp) word_temp = [word] if index == len(word_str) - 1: if len(word_temp) != 0: word_lines.append(word_temp) word_str = '' for line in word_lines: for word in line: word_str += word[1] i.append([word_str, i[1][2], i[1][3]]) new_table = [] for i in tables: new_table.append([i[2], i[3]]) return new_table, rows, cols, pos
def generate_table(self, cell): def closest_index(points: list, target): for index, value in enumerate(points): if abs(value - target) < abs(points[index - 1] - target ) and index > 0 or index == 0: closest = index return closest pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[ 1], cell[2], cell[3], cell[4], cell[5] table_im = self.image[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]] table_copy = table_im.copy() text_rects = text_detection.predict(table_im, 1, 1, table_copy) char_info = [] for text_rect in text_rects: region_y = [text_rect[0][1], text_rect[0][5] ] # text_rectangle_ymin, text_rectangle_ymax region_x = [text_rect[0][0], text_rect[0][2] ] # text_rectangle_xmin, text_rectangle_xmax char_list, prob_list, char_positions = text_recognition.predict( Image.fromarray(text_rect[1]).convert('L')) for index, top5_confidence in enumerate(prob_list): if top5_confidence[0] > 0.5: char_list[index] = char_list[index][ 0] # get the top1 char recognition result if confidence > 50% prob_list[index] = [ -1 ] # then set the confirmed char confidence to -1 prob_list = list(filter(lambda x: x[0] != -1, prob_list)) content = [char_list, prob_list, char_positions] content = calculate( content ) # replace low confidence char recognition result by edit distance for index, char in enumerate(content): char_left, char_right = char_positions[index] char_info.append([[ char_left + region_x[0], region_y[0], char_right + region_x[0], region_y[0], char_left + region_x[0], region_y[1], char_right + region_x[0], region_y[1] ], char]) for text_rect in text_rects: xmin, ymin = text_rect[0][0], text_rect[0][1] xmax, ymax = text_rect[0][6], text_rect[0][7] cv2.rectangle(table_copy, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1) save_result(table_copy, char_info, color=(255, 0, 0), thickness=2, save_dir='char_position', file=self.file) col_point = sorted(col_point) row_point = sorted(row_point) for table in tables: cell_dict = { 'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0 } cell_x, cell_y, cell_w, cell_h = table[1] cell_dict['col_begin'] = closest_index(col_point, cell_x) cell_dict['col_end'] = closest_index(col_point, cell_x + cell_w) cell_dict['row_begin'] = closest_index(row_point, cell_y) cell_dict['row_end'] = closest_index(row_point, cell_y + cell_h) table.append(cell_dict) cell_char = [] for info in char_info: char_xmin, char_xmax, char_ymin, char_ymax = info[0][0], info[ 0][2], info[0][3], info[0][5] char_xcenter, char_ycenter = np.mean( [[char_xmin, char_ymin], [char_xmax, char_ymax]], axis=0) if cell_x < char_xcenter < cell_x + cell_w and cell_y < char_ycenter < cell_y + cell_h: cell_char.append(info) cell_char = sorted(cell_char, key=lambda x: x[0][1]) print('cell_char:', cell_char) cell_text = [] text_temp = [] if len(cell_char) == 1: cell_text = [cell_char] else: for index, char in enumerate(cell_char): if len(text_temp) == 0: text_temp.append(char) continue if char[0][1] == text_temp[-1][0][1]: text_temp.append(char) else: text_temp = sorted(text_temp, key=lambda x: x[0][0]) cell_text.append(text_temp) text_temp = [char] if index == len(cell_char) - 1: if len(text_temp) != 0: cell_text.append(text_temp) cell_text = "".join( [char[1] for line in cell_text for char in line]) print('cell_text:', cell_text) table.append([cell_text, table[1][2], table[1][3]]) new_table = [] for table in tables: new_table.append([table[2], table[3]]) return new_table, rows, cols, pos
def single_ocr(document, img_name, start_page, new_url): start2 = time.time() img_name = skew_detect.get_rotated_img(img_name) ori_img = np.array(img_name) _, ori = cv2.imencode('.jpg', ori_img) ori = base64.b64encode(ori.tostring()) ori_w, ori_h = img_name.size print('旋转时间:', time.time() - start2) while_i = 0 start = time.time() images = [] while 1: img_name.thumbnail((1500 - while_i * 100, 1500 - while_i * 100), Image.ANTIALIAS) # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS) # img_name = img_name.convert('RGB') scale_w, scale_h = img_name.size # print(scale_w, scale_h) scale_w, scale_h = ori_w / scale_w, ori_h / scale_h print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h) img = np.array(img_name) # B_channel, G_channel, R_channel = cv2.split(img) # cv2.imwrite('test.png', R_channel) # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR) _, img = cv2.imencode('.jpg', img) img = base64.b64encode(img.tostring()) data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori} images_json = requests.post(DETECT_URL, data=data) # images = text_predict(img, scale_w, scale_h, ori_img) torch.cuda.empty_cache() while_i += 1 if images_json.json() != '': for i in images_json.json(): image = base64.b64decode(i[1]) image = np.fromstring(image, np.uint8) image = cv2.imdecode(image, cv2.IMREAD_COLOR) images.append([i[0], image]) break print('ctpn time: ', time.time() - start) results = [] start = time.time() # print(1111111, images) for index, j in enumerate(images): # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5: try: _, img = cv2.imencode('.jpg', j[1]) img = base64.b64encode(img.tostring()) data = {'img': img} content = requests.post(RECOGNISE_URL, data=data).json()[:2] # ori_content = [i[0] for i in content[0]] # prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break content = calculate(content) results.append([j[0], content.replace('“', '').replace('‘', '')]) except Exception as e: print('h w', e) continue print('识别时间', time.time() - start) print(results) start = time.time() torch.cuda.empty_cache() results = sorted(results, key=lambda i: i[0][1]) new_results = results line_images = [] cut_index = 0 curr_index = 0 for index, i in enumerate(new_results): if index == len(new_results) - 1: if cut_index < index: line_images.append(new_results[cut_index:index]) line_images.append(new_results[index:]) else: line_images.append(new_results[index:]) break # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > ( # new_results[index][0][7] - new_results[index][0][1]) * 4 / 5: # line_images.append(new_results[cut_index: index + 1]) # cut_index = index + 1 if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1]) > ( new_results[curr_index][0][7] - new_results[curr_index][0][1]) * 4 / 5: line_images.append(new_results[cut_index: index + 1]) cut_index = index + 1 curr_index = index + 1 for index, i in enumerate(line_images): line_images[index] = sorted(i, key=lambda a: a[0][0]) texts = [] position = [] for i in line_images: text = '' for index, j in enumerate(i): try: position.append([j[0], j[1]]) if index == len(i) - 1: text += j[1] elif abs(i[index + 1][0][0] - i[index][0][6]) > 3 * ( abs(i[index][0][6] - i[index][0][0]) / len(i[index][1])): text += j[1] + ' ' else: text += j[1] except: continue texts.append([[i[0][0], i[-1][0]], text]) print(img_name.size) document = unetParse(ori_img, texts, document).save2docx() # try: # texts = sort_paragraph(Image.fromarray(ori_img), texts) # except Exception as e: # print(e) # return document, position # document = save2docx(document, texts, Image.fromarray(ori_img), start_page) print('版式表格时间:', time.time() - start) return document, position
def get_text(request): img_path = request.get('img_path') par = request.get('par') task_id = request.get('task_id') FT = request.get('FT') page = request.get('page') print(img_path) try: if img_path.lower().endswith('.pdf'): pdf = fitz.open(img_path) page_num = pdf[int(page) - 1] trans = fitz.Matrix(3, 3).preRotate(0) pm = page_num.getPixmap(matrix=trans, alpha=False) ori_img = fourier_demo( Image.frombytes("RGB", [pm.width, pm.height], pm.samples), 'FT001') else: ori_img = fourier_demo( Image.open(img_path).convert('RGB'), 'FT001') f = select(FT[:11] + '001') print('FT:', FT[:11] + '001') # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS) ori_w, ori_h = ori_img.size input_img = ori_img.copy() input_img.thumbnail((2000, 2000), Image.ANTIALIAS) # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS) scale_w, scale_h = input_img.size scale_w, scale_h = ori_w / scale_w, ori_h / scale_h input_img = input_img.convert('RGB') data_image = str( os.path.splitext(img_path)[0].split('/')[-1]) + '_' + str(page) data_image = '/home/ddwork/wce_data/ori_images/{}_{}.jpg'.format( data_image, task_id) input_img.save(data_image) input_img = np.array(input_img) # input_img = seal_eliminate(input_img) import time start = time.time() print("text_predict zhiqian") images = text_predict(input_img, scale_w, scale_h, ori_img) print("text_predict zhihou") torch.cuda.empty_cache() print(111111111111111111111111111, "SAD", time.time() - start, 'HAPPY') start = time.time() # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i # in # images] image_positions = [] for j in images: try: print("predict front!!!!!!!!!!!!!!") content = predict(Image.fromarray(j[1]).convert('L')) print("predict back!!!!!!!!!!!!!!") for index, i in enumerate(content[1]): if i[0] > 0.9: content[0][index] = content[0][index][0] content[1].pop(index) # if i[0] < 0.9: # img = Image.fromarray(j[1]).convert('L') # width, height = img.size[0], img.size[1] # scale = height * 1.0 / 32 # width = int(width / scale) # # img = img.resize([width, 32], Image.ANTIALIAS) # img = np.array(img) # new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8] # word, prob = attention(new_img) # if prob > 0.9: # content[0][index] = word[0] # content[1].pop(index) # else: # content[0][index] = content[0][index][0] # content[1].pop(index) content = calculate(content) image_positions.append( [j[0], content.replace('“', '').replace('‘', '')]) except Exception as e: print(e) continue # torch.cuda.empty_cache() # data_json[task_id] = [par, data_image, FT, image_positions] data_json = WCE.create(field_id=int(task_id), par=str(par), image_path=data_image, FT=FT, file_type=FT[:11], image_positions=str(image_positions), edited=False, trained=False) data_json.save() print(222222222222222222222222222, time.time() - start) text = single_ocr(image_positions) print(text) # with open(img_path + '.txt', 'w', encoding='utf-8') as fd: # fd.write(text) texts = f.extract_info(img_path, page, FT[:11] + '001', text) print(texts) # try: # found = get_warp(input_img, image_positions, FT) # found_texts = get_texts('warp_templates/{}/template.xml'.format(FT), found, img_path, task_id) # except Exception as e: # print(e) found_texts = '' print( '==================================================================' ) print(texts, found_texts) torch.cuda.empty_cache() # 资质证书判断 if FT[:11] == 'FT001003110': FT = FT[:8] + texts.get('version') # 路径中取日期 try: if texts.get('发证日期') == '' or not texts.get('发证日期'): import re date_path = re.search( '([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)', os.path.split(img_path)[1]) if date_path: texts['发证日期'] = date_path.groups()[0] except: pass if texts == 'FT999' and found_texts: return { 'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': found_texts, 'FT': FT } if texts != 'FT999' and found_texts == '': return { 'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT } if found_texts: for key, value in texts.items(): try: if value == '': texts[key] = found_texts[key] except: continue blank = 0 for key, value in texts.items(): if value == '': blank += 1 if blank == len(texts) - 1: return { 'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999' } else: return { 'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT } except Exception as e: print(e) return { 'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999' }
def fast_ocr(request): ori_time = time.time() ori_start_time = datetime.now() print('start...') img_path = request.form.get('img_path') print(img_path) print(request.form.get('position')) position = '[' + request.form.get('position') + ']' rotate = int(request.form.get('rotate')) page = request.form.get('pageNum', 1) # FT = request.form.get('FT', None) # file_type = request.form.get('file_type', None) # par_code = request.form.get('par_code', None) # project_id = request.form.get('project_id', None) # # with open('/home/ddwork/projects/compound_log/project_infos/fast_ocr.log', 'a', encoding='utf-8') as f: # f.write(str(FT) + '\t' + str(file_type) + '\t' + str(par_code) + '\t' + str(project_id) + '\t' + str(img_path) + '\t' + str(page) + '\n') print(page) position = eval(position) if img_path.lower().endswith('pdf'): image_w = int(request.form.get('imageW')) image_h = int(request.form.get('imageH')) pdf = fitz.open(img_path) page = pdf[int(page) - 1] trans = fitz.Matrix(3, 3).preRotate(0) pm = page.getPixmap(matrix=trans, alpha=False) img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) img = np.array(img) else: img = np.array(Image.open(img_path).convert('RGB')) image_h, image_w = img.shape[:-1] print('原图:', img.shape) # crop_img = img[position[1]:position[3], position[0]:position[2]] # crop_img = np.array(crop_img) # crop_img = rotate_img(Image.fromarray(crop_img), rotate).convert('L') # ori_img = np.array(crop_img) # img = rotate_img(Image.fromarray(img), rotate).convert('L') # img = Image.fromarray(img) print('rotate', rotate) # Image.fromarray(img).save('11111111.jpg') img_h, img_w, c = img.shape position[0] = position[0] if position[0] > 0 else 0 position[1] = position[1] if position[0] > 0 else 0 ori_img = img[int(position[1] * img_h / image_h):int(position[3] * img_h / image_h), int(position[0] * img_w / image_w):int(position[2] * img_w / image_w)] ori_img = rotate_img(Image.fromarray(ori_img), rotate).convert('L') # ori_w, ori_h = ori_img.size crop_img = np.array(ori_img.convert('RGB')) # Image.fromarray(crop_img).save('11111111.jpg') table_infos = [] # 如果框选高宽比过大,则不考虑表格 # TODO 判断可能有问题 # print(111111111111111111, image.shape[0], image.shape[1]) start_table_time = time.time() if crop_img.shape[1] / crop_img.shape[0] > 3 and crop_img.shape[ 1] / np.array(ori_img).shape[1] < 0.3: print('判断不走表格!') pass else: try: # 判断是否为表格 # from config_url import TABLE_URL # import base64, requests # retval, buffer = cv2.imencode('.jpg', crop_img) # pic_str = base64.b64encode(buffer) # pic_str = pic_str.decode() # r = requests.post(TABLE_URL, data={"img": pic_str}) # img_byte = base64.b64decode(r.content.decode("utf-8")) # img_np_arr = np.fromstring(img_byte, np.uint8) # src = cv2.imdecode(img_np_arr, cv2.IMREAD_COLOR) tables = extract_table(crop_img) texts_table = [] if tables: if tables != 'not table': for table in tables: table_time = time.time() texts_table.append( ['table', generate_table(table, crop_img)]) print('generate_table time is ', time.time() - table_time) for table in texts_table: cell_info = [] table_info = [['' for row in range(table[1][2])] for col in range(table[1][1])] for tb in table[1][0]: d = tb[0] for row in range(d['row_begin'], d['row_end']): for col in range(d['col_begin'], d['col_end']): try: table_info[row][col] += tb[1][0] if d not in cell_info: cell_info.append(d) except: print('cell error') print(f'###start{str(table_info)}end###') x0, y0, x1, y1 = table[-1][-1][0], table[-1][-1][1], table[-1][-1][0]+table[-1][-1][2], \ table[-1][-1][1]+table[-1][-1][3] new_cell_info = [] for cell in cell_info: if cell['row_end'] - cell['row_begin'] == 1 and cell[ 'col_end'] - cell['col_begin'] == 1: continue new_cell_info.append( [[cell['row_begin'], cell['col_begin']], [cell['row_end'] - 1, cell['col_end'] - 1]]) cell_info = new_cell_info table_infos.append([[ [x0, y0, x1, y1], [x0, y0, x1, y1] ], f'###start{str(table_info)}******{str(cell_info)}end###' ]) # return response.text(f'###start{str(table_info)}end###') except Exception as ex: print('table error', ex) print('table detect time is ', time.time() - start_table_time) # crop_img = cv2.copyMakeBorder(crop_img, int(image_h / 2), int(image_h / 2), int(image_w / 2), int(image_w / 2), # cv2.BORDER_REPLICATE) # short_size = 640 # h, w = crop_img.shape[:2] # short_edge = min(h, w) # if short_edge < short_size: # # 保证短边 >= inputsize # scale = short_size / short_edge # if scale > 1: # crop_img = cv2.resize(crop_img, dsize=None, fx=scale, fy=scale) # ori_img = np.array(ori_img) # _, ori = cv2.imencode('.jpg', ori_img) # ori = base64.b64encode(ori.tostring()) crop_img = Image.fromarray(crop_img) while_i = 0 st_time = time.time() # crop_area = [] while 1: crop_img.thumbnail((1500 - while_i * 100, 1500 - while_i * 100), Image.ANTIALIAS) # crop_img = crop_img.resize((1500, 1500)) # scale_w, scale_h = crop_img.size # scale_w, scale_h = ori_w / scale_w, ori_h / scale_h # crop_img = crop_img.resize((1000, 1000)) # crop_img.save('111.jpg') crop_img = np.array(crop_img) # _, img = cv2.imencode('.jpg', crop_img) # img = base64.b64encode(img.tostring()) # data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img} # crop_area_json = requests.post(DETECT_URL, data=data) # while_i += 1 # if crop_area_json.json() != '': # for i in crop_area_json.json(): # image = base64.b64decode(i[1]) # image = np.fromstring(image, np.uint8) # image = cv2.imdecode(image, cv2.IMREAD_COLOR) # crop_area.append([i[0], image]) # break crop_area = text_predict(crop_img, 1, 1, crop_img) torch.cuda.empty_cache() break print('ctpn time: ', time.time() - st_time, ' counts: ', len(crop_area)) # Image.fromarray(crop_img).show() # Image.fromarray(crop_area[0][1]).show() # save_img = crop_img.copy() # for te in crop_area: # # print(1111, te[0]) # t = te[0] # cv2.rectangle(save_img, (t[0], t[1]), (t[6], t[7]), (255, 0, 0), 1) # Image.fromarray(save_img).save('able2.jpg') # # from pan.predict import text_predict # img_save = crop_img.copy() # sss = text_predict(img_save, 1, 1, img_save) # for i in sss: # print(123456, i[0]) # cv2.rectangle(img_save, (i[0][0] - 1, i[0][1] - 1), (i[0][6] + 1, i[0][7] + 1), (255, 0, 0), 1) # Image.fromarray(img_save).save('able4.jpg') new_results = [] for index, j in enumerate(crop_area): # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i # in # images] try: # _, img = cv2.imencode('.jpg', j[1]) # img = base64.b64encode(img.tostring()) # data = {'img': img} # content = requests.post(RECOGNISE_URL, data=data).json()[:2] content, _ = predict(Image.fromarray(j[1]).convert('L')) for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break print(content) content = calculate(content) # Image.fromarray(j[1]).save('found/{}.jpg'.format(''.join(img_path.split('/')))) # torch.cuda.empty_cache() print(content) new_results.append([j[0], content]) except Exception as ex: print(ex) continue # torch.cuda.empty_cache() # data_json[task_id] = [par, data_image, FT, image_positions] document = '' new_results = sorted(new_results, key=lambda i: i[0][1]) line_images = [] cut_index = 0 curr_index = 0 print(2222222222, len(new_results)) for index, i in enumerate(new_results): try: if index == len(new_results) - 1: # print(cut_index) if cut_index < index: line_images.append(new_results[cut_index:]) else: line_images.append(new_results[index:]) break # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > ( # new_results[index][0][7] - new_results[index][0][1]) * 4 / 5: # line_images.append(new_results[cut_index: index + 1]) # cut_index = index + 1 if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1] ) < (new_results[curr_index][0][7] - new_results[curr_index][0][1]) * 3 / 4: for result in new_results[cut_index:index + 1]: if count_area(new_results[index + 1], result) > (result[0][6] - result[0][0]) / 2: line_images.append(new_results[cut_index:index + 1]) cut_index = index + 1 curr_index = index + 1 continue else: line_images.append(new_results[cut_index:index + 1]) cut_index = index + 1 curr_index = index + 1 except: continue for index, i in enumerate(line_images): line_images[index] = sorted(i, key=lambda a: a[0][0] + a[0][1]) texts = [] for i in line_images: text = '' for index, j in enumerate(i): try: if index == len(i) - 1: text += j[1] elif abs(i[index + 1][0][6] - i[index][0][6]) > 3 * (abs( i[index][0][6] - i[index][0][0]) / len(i[index][1])): text += j[1] + ' ' else: text += j[1] except: continue texts.append([[i[0][0], i[-1][0]], text]) crop_w = crop_img.shape[1] document = layout(texts, crop_w, table_infos) # print(document) # for i in texts: # print(11111, i) # document += i[1] + '\n' if document == '': # document = rec_txt(np.array(ori_img.convert('L'))).replace('“', '').replace('‘', '') # torch.cuda.empty_cache() try: # _, img = cv2.imencode('.jpg', ori_img) # img = base64.b64encode(img.tostring()) # data = {'img': img} # content = requests.post('http://172.30.81.191:32010/predict', data=data).json()[:2] # document = content content, _ = predict(Image.fromarray(ori_img).convert('L')) for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1].pop(indexi) document = calculate(content) # torch.cuda.empty_cache() except: pass print('ddddddddddddddd', document) if document == ([], []): document = '' ori_end_time = datetime.now() ori_return = json.dumps([document]) print('ori_time;', time.time() - ori_time, '\n', 'ori_start_time:', ori_start_time, '\n', 'ori_end_time:', ori_end_time) return response.text(ori_return)
def generate_table(cell, ori_img): # import pickle # pickle.dump(cell, open('table.pkl', 'wb')) pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[ 2], cell[3], cell[4], cell[5] # print(11111111111111, pos) table_im = ori_img[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]] # table_line_regions = text_predict(table_im, 1, 1, table_im) _, d = cv2.imencode('.jpg', table_im) d = base64.b64encode(d) d = d.decode() data = {'img': d, 'scale_w': 1, 'scale_h': 1, 'ori_img': d} table_line_regions = requests.post(DETECT_URL, data=data).json() table_line_regions = [[ xx[0], cv2.imdecode(np.fromstring(base64.b64decode(xx[1]), np.uint8), cv2.IMREAD_COLOR) ] for xx in table_line_regions] word_list = [] for region in table_line_regions: region_y = [region[0][1], region[0][5]] region_x = [region[0][0], region[0][2]] # content = predict(Image.fromarray(region[1]).convert('L')) # content = (content[0][0], content[0][1], content[1]) # Image.fromarray(region[1]).show() _, tmp = cv2.imencode('.jpg', region[1]) tmp = base64.b64encode(tmp) tmp = tmp.decode() data = {'img': tmp} content = requests.post(RECOGNISE_URL, data=data).json() # print(content) for indexi, cont in enumerate(content[1]): if cont[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break x = content[2] content = calculate(content) for index, word in enumerate(content): word_list.append([[ x[index][0] + region_x[0], region_y[0], x[index][1] + region_x[0], region_y[0], x[index][0] + region_x[0], region_y[1], x[index][1] + region_x[0], region_y[1] ], word]) # print(word_list) for region in table_line_regions: cv2.rectangle(table_im, (region[0][0], region[0][1]), (region[0][6], region[0][7]), (0, 255, 0), 1) for i in word_list: cv2.rectangle(table_im, (i[0][0], i[0][1]), (i[0][6], i[0][7]), (255, 0, 0), 1) Image.fromarray(table_im).save('single_word.jpg') col_point = sorted(col_point) row_point = sorted(row_point) tables = sorted(tables, key=lambda i: i[1][3])[:-1] tables = sorted(tables, key=lambda i: i[1][0] + i[1][1]) for i in tables: d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0} for index, value in enumerate(col_point): if index == 0: d_range = 50 else: d_range = (col_point[index] - col_point[index - 1]) / 2 if i[1][0] > col_point[index] - d_range: # print(33333333333, i[1], index) d['col_begin'] = index for index, value in enumerate(col_point): if index == len(col_point) - 1: d_range = 50 else: d_range = (col_point[index + 1] - col_point[index]) / 2 if i[1][0] + i[1][2] < col_point[index] + d_range: d['col_end'] = index break for index, value in enumerate(row_point): if index == 0: d_range = 50 else: d_range = (row_point[index] - row_point[index - 1]) / 2 if i[1][1] > row_point[index] - d_range: d['row_begin'] = index for index, value in enumerate(row_point): if index == len(row_point) - 1: d_range = 50 else: d_range = (row_point[index + 1] - row_point[index]) / 2 if i[1][1] + i[1][3] < row_point[index] + d_range: d['row_end'] = index break i.append(d) for i in tables: cv2.rectangle(table_im, (i[1][0], i[1][1]), (i[1][0] + i[1][2], i[1][1] + i[1][3]), (255, 0, 0), 1) cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]] word_str = [] for word in word_list: word_center_point = ((word[0][0] + word[0][2]) / 2, (word[0][1] + word[0][5]) / 2) if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \ cell_region[3]: word_str.append(word) word_str = sorted(word_str, key=lambda x: x[0][1]) word_lines = [] word_temp = [] for index, word in enumerate(word_str): if len(word_temp) == 0: word_temp.append(word) continue if word[0][1] == word_temp[-1][0][1]: word_temp.append(word) else: word_temp = sorted(word_temp, key=lambda x: x[0][0]) word_lines.append(word_temp) word_temp = [word] if index == len(word_str) - 1: if len(word_temp) != 0: word_lines.append(word_temp) word_str = '' for line in word_lines: for word in line: word_str += word[1] i.append([word_str, i[1][2], i[1][3]]) Image.fromarray(table_im).save('single_word.jpg') new_table = [] for i in tables: new_table.append([i[2], i[3]]) return new_table, rows, cols, pos
def generate_table(cell): # import pickle # pickle.dump(cell, open('table.pkl', 'wb')) pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[ 2], cell[3], cell[4], cell[5] col_point = sorted(col_point) row_point = sorted(row_point) tables = sorted(tables, key=lambda i: i[1][3])[:-1] tables = sorted(tables, key=lambda i: i[1][0] + i[1][1]) # print(tables[14][1]) # Image.fromarray(tables[14][0]).show() # table = document.add_table(rows, cols, style='Table Grid') for i in tables: d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0} for index, value in enumerate(col_point): if index == 0: d_range = 50 else: d_range = (col_point[index] - col_point[index - 1]) / 2 if i[1][0] > col_point[index] - d_range: # print(33333333333, i[1], index) d['col_begin'] = index for index, value in enumerate(col_point): if index == len(col_point) - 1: d_range = 50 else: d_range = (col_point[index + 1] - col_point[index]) / 2 if i[1][0] + i[1][2] < col_point[index] + d_range: d['col_end'] = index break for index, value in enumerate(row_point): if index == 0: d_range = 50 else: d_range = (row_point[index] - row_point[index - 1]) / 2 if i[1][1] > row_point[index] - d_range: d['row_begin'] = index for index, value in enumerate(row_point): if index == len(row_point) - 1: d_range = 50 else: d_range = (row_point[index + 1] - row_point[index]) / 2 if i[1][1] + i[1][3] < row_point[index] + d_range: d['row_end'] = index break i.append(d) for index, i in enumerate(tables): texts = '' try: # new_i = cv2.resize(i[0], (i[1][2] * 2, i[1][3] * 2)) # new_i = cv2.copyMakeBorder(i[0], int(i[1][3] / 2), int(i[1][3] / 2), int(i[1][2] / 2), int(i[1][2] / 2), cv2.BORDER_REPLICATE) i[0] = Image.fromarray(i[0]) new_i = i[0].copy() ori_w, ori_h = i[0].size new_i.thumbnail((1500, 1500), Image.ANTIALIAS) scale_w, scale_h = new_i.size scale_w, scale_h = ori_w / scale_w, ori_h / scale_h new_i = np.array(new_i.convert('RGB')) # print(type(new_i)) # Image.fromarray(new_i).save('core.jpg') if new_i.shape[1] > 16 and new_i.shape[0] > 16: images = text_predict(new_i, scale_w, scale_h, np.array(i[0])) torch.cuda.empty_cache() # images = text_predict(new_i) else: i.append([texts, i[1][2], i[1][3]]) continue # torch.cuda.empty_cache() if images: for image in sorted(images, key=lambda ii: ii[0][1]): # texts += rec_txt(image[1]).replace('“', '') # print('1111111111', predict(Image.fromarray(image[1]).convert('L'))) content = predict(Image.fromarray(image[1]).convert('L')) ori_content = [i[0] for i in content[0]] prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, cont in enumerate(content[1]): if cont[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break # for indexa, ia in enumerate(content[1]): # if ia[0] < 0.9: # content[0][index] = content[0][index][0] # content[1].pop(index) # img = Image.fromarray(image[1]).convert('L') # width, height = img.size[0], img.size[1] # scale = height * 1.0 / 32 # width = int(width / scale) # # img = img.resize([width, 32], Image.ANTIALIAS) # img = np.array(img) # new_img = img[:, (content[2][indexa] - 1) * 8:(content[2][indexa] + 2) * 8] # word, prob = attention(new_img) # # print('prob', prob) # if prob > 0.9: # content[0][indexa] = word # content[1].pop(indexa) # else: # content[0][indexa] = content[0][indexa][0] # content[1].pop(indexa) content = calculate(content) # print('content', content) texts += content elif new_i.any() and new_i.shape[0] < new_i.shape[1] * 1.5: # texts += rec_txt(new_i).replace('“', '') try: content = predict(Image.fromarray(new_i).convert('L')) ori_content = [i[0] for i in content[0]] prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, cont in enumerate(content[1]): if cont[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break # for indexa, ia in enumerate(content[1]): # if ia[0] < 0.9: # img = Image.fromarray(new_i).convert('L') # width, height = img.size[0], img.size[1] # scale = height * 1.0 / 32 # width = int(width / scale) # # img = img.resize([width, 32], Image.ANTIALIAS) # img = np.array(img) # new_img = img[:, (content[2][indexa] - 1) * 8:(content[2][indexa] + 2) * 8] # word, prob = attention(new_img) # if prob > 0.9: # content[0][indexa] = word # content[1].pop(indexa) # else: # content[0][indexa] = content[0][indexa][0] # content[1].pop(indexa) content = calculate(content) texts += content except Exception as ex: print('small_image_warning', ex) # torch.cuda.empty_cache() # else: # i.append([texts, i[1][2], i[1][3]]) # print('12345', texts, '***', i[1]) i.append([texts, i[1][2], i[1][3]]) # print('54321') except Exception as e: print('table_text warning', e) new_table = [] for i in tables: new_table.append([i[2], i[3]]) return new_table, rows, cols, pos
def single_ocr(document, img_name, start_page, new_url): img_name = skew_detect.get_rotated_img(img_name) ori_img = np.array(img_name) ori_w, ori_h = img_name.size img_name.thumbnail((1500, 1500), Image.ANTIALIAS) # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS) # img_name = img_name.convert('RGB') scale_w, scale_h = img_name.size # print(scale_w, scale_h) scale_w, scale_h = ori_w / scale_w, ori_h / scale_h print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h) img = np.array(img_name) # B_channel, G_channel, R_channel = cv2.split(img) # cv2.imwrite('test.png', R_channel) # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR) start = time.time() images = text_predict(img, scale_w, scale_h, ori_img) torch.cuda.empty_cache() print('ctpn time: ', time.time() - start) # new_images = [] # images = new_images # Image.fromarray(img).save('paragraph.jpg') # Image.fromarray(img).show() try: tables = extract_table(ori_img) if tables == 'not table': has_table = False else: has_table = True # for table in tables: # table[0][1][1] = table[0][1][1] / scale_h # table[0][1][3] = table[0][1][3] / scale_h except: has_table = False print(2222222222222222222222222, has_table) results = [] start = time.time() for index, j in enumerate(images): # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5: try: if has_table: count = 0 for table in tables: if table[0][1][1] + table[0][1][3] > j[0][1] > table[0][1][ 1]: continue else: count += 1 if count == len(tables): content = predict(Image.fromarray(j[1]).convert('L')) ori_content = [i[0] for i in content[0]] prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break # ori_content = [i[0] for i in content[0]] # with open(os.path.splitext(new_url)[0] + '.txt', 'a', encoding='utf-8') as f: # for index, i in enumerate(content[1]): # if i[0] > 0.9: # content[0][index] = content[0][index][0] # content[1].pop(index) # if i[0] < 0.9: # img = Image.fromarray(j[1]).convert('L') # width, height = img.size[0], img.size[1] # scale = height * 1.0 / 32 # width = int(width / scale) # # img = img.resize([width, 32], Image.ANTIALIAS) # img = np.array(img) # new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8] # word, prob = attention(new_img) # if prob > 0.9: # content[0][index] = word[0] # content[1].pop(index) # else: # content[0][index] = content[0][index][0] # content[1].pop(index) content = calculate(content) # for i, j_i in zip(ori_content, content): # if j_i != i: # f.write(i + '------------>' + j_i + '\n') # content = rec_txt(j[1]) # torch.cuda.empty_cache() results.append( [j[0], content.replace('“', '').replace('‘', '')]) else: content = predict(Image.fromarray(j[1]).convert('L')) ori_content = [i[0] for i in content[0]] prob_content = [[i, j] for i, j in zip(content[0], content[1])] for indexi, i in enumerate(content[1]): if i[0] > 0.9: content[0][indexi] = content[0][indexi][0] content[1][indexi] = [-1] while 1: try: content[1].remove([-1]) except: break # ori_content = [i[0] for i in content[0]] # with open(os.path.splitext(new_url)[0] + '.txt', 'a', encoding='utf-8') as f: # for index, i in enumerate(content[1]): # if i[0] > 0.9: # content[0][index] = content[0][index][0] # content[1].pop(index) # if i[0] < 0.9: # img = Image.fromarray(j[1]).convert('L') # width, height = img.size[0], img.size[1] # scale = height * 1.0 / 32 # width = int(width / scale) # # img = img.resize([width, 32], Image.ANTIALIAS) # img = np.array(img) # new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8] # word, prob = attention(new_img) # if prob > 0.9: # content[0][index] = word[0] # content[1].pop(index) # else: # content[0][index] = content[0][index][0] # content[1].pop(index) content = calculate(content) # for i, j_i in zip(ori_content, content): # if j_i != i: # f.write(i + '------------>' + j_i + '\n') # content = rec_txt(j[1]) # torch.cuda.empty_cache() results.append( [j[0], content.replace('“', '').replace('‘', '')]) except Exception as e: print(e) continue torch.cuda.empty_cache() print(33333333333333333, time.time() - start) results = sorted(results, key=lambda i: i[0][1]) new_results = results line_images = [] cut_index = 0 curr_index = 0 for index, i in enumerate(new_results): if index == len(new_results) - 1: if cut_index < index: line_images.append(new_results[cut_index:index]) line_images.append(new_results[index:]) else: line_images.append(new_results[index:]) break # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > ( # new_results[index][0][7] - new_results[index][0][1]) * 4 / 5: # line_images.append(new_results[cut_index: index + 1]) # cut_index = index + 1 if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1] ) > (new_results[curr_index][0][7] - new_results[curr_index][0][1]) * 4 / 5: line_images.append(new_results[cut_index:index + 1]) cut_index = index + 1 curr_index = index + 1 for index, i in enumerate(line_images): line_images[index] = sorted(i, key=lambda a: a[0][0]) texts = [] position = [] for i in line_images: text = '' for index, j in enumerate(i): try: position.append([j[0], j[1]]) if index == len(i) - 1: text += j[1] elif abs(i[index + 1][0][0] - i[index][0][6]) > 3 * (abs( i[index][0][6] - i[index][0][0]) / len(i[index][1])): text += j[1] + ' ' else: text += j[1] except: continue texts.append([[i[0][0], i[-1][0]], text]) print(img_name.size) if has_table: for table in tables: table_index = 0 for index, i in enumerate(texts): # print(i) # print(type(i[0]), type(table[1][1])) if i[0] == 'table': # print(table[0][1]) if table[0][1][1] > i[1][3][1]: table_index = index + 1 elif table[0][1][1] > i[0][0][1]: table_index = index + 1 try: texts.insert(table_index, ['table', generate_table(table, ori_img)]) except Exception as e: print(e) continue # import pickle # pickle.dump(texts, open('texts.pkl', 'wb')) try: texts = sort_paragraph(Image.fromarray(ori_img), texts) except Exception as e: print(e) return document, position document = save2docx(document, texts, Image.fromarray(ori_img), start_page) return document, position