Exemple #1
0
def get_texts(xml, found, img_path, task_id):
    dom = ET.parse(xml)
    obj = dom.findall("./object")
    dic = {}
    texts = defaultdict(str)
    ori_img = np.array(found)
    _, ori = cv2.imencode('.jpg', ori_img)
    ori = base64.b64encode(ori.tostring())
    for ob in obj:
        name = str(ob.getchildren()[0].text)
        if 'extract' in name:
            bnd_box = ob.findall("bndbox")[0]
            x_min = bnd_box.findall("xmin")[0].text
            y_min = bnd_box.findall("ymin")[0].text
            x_max = bnd_box.findall("xmax")[0].text
            y_max = bnd_box.findall("ymax")[0].text
            dic[name] = [int(x_min), int(y_min), int(x_max), int(y_max)]
    _, img = cv2.imencode('.jpg', found)
    img = base64.b64encode(img.tostring())
    data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori}
    images = requests.post('http://172.30.20.154:32021/text_predict',
                           data=data)
    for key, value in dic.items():
        if key == '商标-extract':
            save_path = '/' + '/'.join(
                img_path.split('/')[1:5]) + '/trademark/'
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            save_path = save_path + str(task_id) + '.jpg'
            Image.fromarray(found[value[1]:value[3],
                                  value[0]:value[2]]).resize(
                                      (500, 300)).save(save_path)
            texts[key.replace('-extract', '')] = save_path
            continue
        try:
            # if images:
            #     image_positions = [[i[0], rec_txt(i[1])]
            #                        for i
            #                        in
            #                        images]
            new_images = []
            for i in images:
                if i[0][1] > value[1] and i[0][7] < value[3]:
                    new_images.append(i)
            new = []
            for i in new_images:
                if i[0][0] > value[0] and i[0][6] < value[2]:
                    new.append(i)
                elif i[0][0] < value[0] and (value[0] < i[0][6] < value[2]):
                    i[0][0] = value[0]
                    new.append(
                        [i[0], found[i[0][1]:i[0][7], value[0]:i[0][6]]])
                elif (value[2] > i[0][0] > value[0]) and i[0][6] > value[2]:
                    i[0][6] = value[2]
                    new.append(
                        [i[0], found[i[0][1]:i[0][7], i[0][0]:value[2]]])
                elif i[0][0] < value[0] and i[0][6] > value[2]:
                    i[0][0] = value[0]
                    i[0][6] = value[2]
                    new.append(
                        [i[0], found[i[0][1]:i[0][7], value[0]:value[2]]])
            if new:
                image_positions = []
                for j in new:
                    if j[1].any():
                        _, img = cv2.imencode('.jpg', j[1])
                        img = base64.b64encode(img.tostring())
                        data = {'img': img}
                        content = requests.post(
                            'http://172.30.20.154:32020/predict',
                            data=data).json()[:2]
                        for indexi, i in enumerate(content[1]):
                            if i[0] > 0.9:
                                content[0][indexi] = content[0][indexi][0]
                                content[1][indexi] = [-1]
                        while 1:
                            try:
                                content[1].remove([-1])
                            except:
                                break
                        content = calculate(content)
                        image_positions.append([j[0], content])
                texts[key.replace('-extract',
                                  '')] = single_ocr(image_positions).replace(
                                      '\n', '')
            else:
                _, img = cv2.imencode('.jpg', found)
                img = base64.b64encode(img.tostring())
                data = {'img': img}
                content = requests.post('http://172.30.20.154:32020/predict',
                                        data=data).json()[:2]
                for indexi, i in enumerate(content[1]):
                    if i[0] > 0.9:
                        content[0][indexi] = content[0][indexi][0]
                        content[1][indexi] = [-1]
                while 1:
                    try:
                        content[1].remove([-1])
                    except:
                        break
                content = calculate(content)
                texts[key.replace('-extract', '')] = content
        except Exception as e:
            print(e)
            continue
    if '企业名称' in texts.keys():
        texts['企业名称'] = texts['企业名称'].replace('企业名称', '')
    return texts
Exemple #2
0
def generate_table(cell, src):
    # import pickle
    pos, cols, rows, col_point, row_point, tables, table_shape = cell[0][
        1], cell[1], cell[2], cell[3], cell[4], cell[5], cell[6]
    col_point = sorted(col_point)
    row_point = sorted(row_point)
    tables = sorted(tables, key=lambda i: i[1][3])[:-1]
    tables = sorted(tables, key=lambda i: i[1][0] + i[1][1])

    # 表格内所有单字位置
    table_im = src[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]]
    table_line_regions = text_predict(table_im, 1, 1, table_im)
    torch.cuda.empty_cache()

    word_list = []
    # print('table_line_length', len(table_line_regions))
    for region_index, region in enumerate(table_line_regions):
        region_y = [region[0][1], region[0][5]]
        region_x = [region[0][0], region[0][2]]
        # Image.fromarray(region[1]).save(f'1/{region_index}.jpg')
        content = predict(Image.fromarray(region[1]).convert('L'))

        torch.cuda.empty_cache()
        content = (content[0][0], content[0][1], content[1])
        for indexi, cont in enumerate(content[1]):
            if cont[0] > 0.9:
                content[0][indexi] = content[0][indexi][0]
                content[1][indexi] = [-1]
        while 1:
            try:
                content[1].remove([-1])
            except:
                break
        x = content[2]
        content = calculate(content)

        for index, word in enumerate(content):
            word_list.append([[
                x[index][0] + region_x[0], region_y[0],
                x[index][1] + region_x[0], region_y[0],
                x[index][0] + region_x[0], region_y[1],
                x[index][1] + region_x[0], region_y[1]
            ], word])

    # # 保存表格行列焦点坐标
    # show_im = np.ones(table_shape, np.uint8)
    # import itertools
    # for x, y in itertools.product([int(i) for i in col_point], [int(i) for i in row_point]):
    #     cv2.circle(show_im, (x, y), 1, (255, 255, 255), 1)
    # Image.fromarray(show_im).save('show_im.jpg')

    for i in tables:
        d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0}
        for index, value in enumerate(col_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (col_point[index] - col_point[index - 1]) / 2
            if i[1][0] > col_point[index] - d_range:
                d['col_begin'] = index
        for index, value in enumerate(col_point):
            if index == len(col_point) - 1:
                d_range = 50
            else:
                d_range = (col_point[index + 1] - col_point[index]) / 2
            if i[1][0] + i[1][2] < col_point[index] + d_range:
                d['col_end'] = index
                break
        for index, value in enumerate(row_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (row_point[index] - row_point[index - 1]) / 2
            if i[1][1] > row_point[index] - d_range:
                d['row_begin'] = index
        for index, value in enumerate(row_point):
            if index == len(row_point) - 1:
                d_range = 50
            else:
                d_range = (row_point[index + 1] - row_point[index]) / 2
            if i[1][1] + i[1][3] < row_point[index] + d_range:
                d['row_end'] = index
                break
        if d['col_begin'] >= d['col_end']:
            d['col_end'] = d['col_begin'] + 1
        if d['row_begin'] >= d['row_end']:
            d['row_end'] = d['row_begin'] + 1
        # print('123'*3, d)
        i.append(d)

    # print(pos[0], pos[1], pos[2], pos[3])
    # table_im = src[pos[1]:pos[1]+pos[3], pos[0]:pos[0]+pos[2]]
    # Image.fromarray(table_im).show()
    # images = text_predict(table_im, 1, 1, table_im)

    cell_list = []
    for row_p in range(len(row_point) - 1):
        for col_p in range(len(col_point) - 1):
            roi = table_im[int(row_point[row_p]):int(row_point[row_p + 1]),
                           int(col_point[col_p]):int(col_point[col_p + 1])]
            cell_list.append([
                roi,
                [
                    int(col_point[col_p]),
                    int(row_point[row_p]),
                    int(col_point[col_p + 1] - col_point[col_p]),
                    int(row_point[row_p + 1] - int(row_point[row_p]))
                ], {
                    'col_begin': col_p,
                    'col_end': col_p + 1,
                    'row_begin': row_p,
                    'row_end': row_p + 1
                }, 0
            ])

    # 判断单元格是否正确检测
    for i in tables:
        col_begin, col_end, row_begin, row_end = \
            i[-1]['col_begin'], i[-1]['col_end'], i[-1]['row_begin'], i[-1]['row_end']
        for col in range(col_begin, col_end):
            for row in range(row_begin, row_end):
                for cell in cell_list:
                    if cell[2]['col_begin'] == col_begin and cell[2]['col_end'] == col_end and\
                            cell[2]['row_begin'] == row_begin and cell[2]['row_end'] == row_end:
                        cell[-1] = 1
    # 没有检测到单元格则赋值
    for i in cell_list:
        if i[-1] == 0:
            print('not detect cell', i[1:])
            tables.append(i[:-1])

    # images = text_predict(table_im)

    # # 单元格位置
    # # for cell in tables:
    # #     print(cell[1:])
    # # 保存表格图
    # save_table = table_im.copy()
    # # for word in word_list:
    # #     word = word[0]
    # #     cv2.rectangle(save_table, (word[0], word[1]), (word[6], word[7]), (255, 0, 0), 1)
    # for i in table_line_regions:
    #     print(123456, i[0])
    #     cv2.rectangle(save_table, (i[0][0] - 1, i[0][1] - 1), (i[0][6] + 1, i[0][7] + 1), (255, 0, 0), 1)
    # # import random
    # # for i in tables:
    # #     cv2.rectangle(save_table, (i[1][0], i[1][1]), (i[1][0]+i[1][2], i[1][1]+i[1][3]), (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), 1)
    # from config_url import DETECT_URL
    # import requests, base64
    # _, img = cv2.imencode('.jpg', table_im)
    # img = base64.b64encode(img.tostring())
    # # data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori}
    # data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img}
    # crop_area_json = requests.post(DETECT_URL, data=data)
    # crop_area = []
    # # while_i += 1
    # if crop_area_json.json() != '':
    #     for i in crop_area_json.json():
    #         image = base64.b64decode(i[1])
    #         image = np.fromstring(image, np.uint8)
    #         image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    #         crop_area.append([i[0], image])
    # for te in crop_area:
    #     print(2221111, te[0])
    #     t = te[0]
    #     cv2.rectangle(save_table, (t[0], t[1]), (t[6], t[7]), (0, 0, 255), 1)
    # Image.fromarray(save_table).save('able1.jpg')
    # Image.fromarray(table_im).save('able3.jpg')

    # 去除检测错误的表格单元格
    tables_cell = {}
    for cell in tables:
        tmp = f"{cell[2]['row_begin']}_{cell[2]['row_end']}_{cell[2]['col_begin']}_{cell[2]['col_end']}"
        if tmp not in tables_cell.keys():
            tables_cell[tmp] = cell[:-1]
        else:
            if tables_cell[tmp][1][2] * tables_cell[tmp][1][3] < cell[1][
                    2] * cell[1][3]:
                tables_cell[tmp] = cell[:-1]
    # for cell in tables_cell:
    #     print(111, cell[1:])
    tables = [[
        v[0], v[1], {
            'row_begin': int(k.split('_')[0]),
            'row_end': int(k.split('_')[1]),
            'col_begin': int(k.split('_')[2]),
            'col_end': int(k.split('_')[3])
        }
    ] for k, v in tables_cell.items()]

    save_table = table_im.copy()
    for index_i, i in enumerate(tables):
        print('cell location: ', i[-1])
        cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]]
        cv2.putText(save_table, str(index_i),
                    (cell_region[0] + 2, cell_region[1] + 2),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
        cv2.rectangle(save_table, (cell_region[0], cell_region[1]),
                      (cell_region[2], cell_region[3]), (255, 0, 0), 1)
        word_str = []

        for word in word_list:
            word_center_point = ((word[0][0] + word[0][2]) / 2,
                                 (word[0][1] + word[0][5]) / 2)
            if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \
                    cell_region[3]:
                word_str.append(word)
        # if i[2]['row_begin'] == 3 and i[2]['row_end'] == 4 and i[2]['col_begin'] == 0 and i[2]['col_end'] == 1:
        #     print(cell_region)
        #     print(word_str)
        word_str = sorted(word_str, key=lambda x: x[0][1])
        # print('word_str', word_str)
        # print('table', i[2])
        # print(i[2], word_str)
        word_lines = []
        word_temp = []
        for index, word in enumerate(word_str):
            if len(word_temp) == 0:
                word_temp.append(word)
                if len(word_str) == 1:
                    word_lines.append(word_temp)
                continue
            if word[0][1] == word_temp[-1][0][1]:
                word_temp.append(word)
            else:
                word_temp = sorted(word_temp, key=lambda x: x[0][0])
                # print(1111, word_temp)
                word_lines.append(word_temp)
                word_temp = [word]
            if index == len(word_str) - 1:
                if len(word_temp) != 0:
                    # print(2222, word_temp)
                    word_lines.append(word_temp)
        word_str = ''
        # new_word_lines = []
        # for line in word_lines:
        #     if line in new_word_lines:
        #         print(1111111)
        #         continue
        #     new_word_lines.append(line)
        # word_lines = new_word_lines.copy()
        for line in word_lines:
            # print('line', line)
            for word in line:
                word_str += word[1]
        i.append([word_str, i[1][2], i[1][3]])
    Image.fromarray(save_table).save('able1.jpg')
    # for cell in tables:
    #     # print('*'*5, cell[1:])
    #     cell_w, cell_h = cell[1][2], cell[1][3]
    #     cell_ims, text = [], ''
    #     for image in images:
    #         image_im, cell_im = image[0], cell[1]
    #         if image_im[0] > cell_im[0]+cell_im[2]:
    #             continue
    #         if image_im[1] > cell_im[1]+cell_im[3]:
    #             continue
    #         if image_im[6] < cell_im[0]:
    #             continue
    #         if image_im[7] < cell_im[1]:
    #             continue
    #         x0, y0, x1, y1 = max(image_im[0], cell_im[0]), max(image_im[1], cell_im[1]), \
    #                          min(image_im[6], cell_im[0]+cell_im[2]), min(image_im[7], cell_im[1]+cell_im[3])
    #         cell_ims.append([x0, y0, x1, y1])
    #     for i in cell_ims:
    #         try:
    #             cell_im = table_im[i[1]:i[3], i[0]:i[2]]
    #             content = predict(Image.fromarray(cell_im).convert('L'))
    #             for indexi, i in enumerate(content[1]):
    #                 if i[0] > 0.9:
    #                     content[0][indexi] = content[0][indexi][0]
    #                     content[1][indexi] = [-1]
    #             while 1:
    #                 try:
    #                     content[1].remove([-1])
    #                 except:
    #                     break
    #             content = calculate(content)
    #             # Image.fromarray(j[1]).save('found/{}.jpg'.format(''.join(img_path.split('/'))))
    #             torch.cuda.empty_cache()
    #             text += content
    #         except Exception as ex:
    #             print('ocr error', ex)
    #             continue
    #     cell.append([text, cell_w, cell_h])
    #     print('cell text:', text)

    tables = sorted(tables, key=lambda x: x[2]['row_begin'])

    new_table = []
    for i in tables:
        new_table.append([i[2], i[3]])

    return new_table, rows, cols, pos
Exemple #3
0
def get_text(request):
    print('dddddddddddddddddddddddd')
    img_path = request.form.get('img_path')
    par = request.form.get('par')
    task_id = request.form.get('task_id')
    FT = request.form.get('FT')
    page = request.form.get('page')
    print(img_path)
    try:
        if img_path.lower().endswith('.pdf'):
            pdf = fitz.open(img_path)
            page_num = pdf[int(page) - 1]
            trans = fitz.Matrix(3, 3).preRotate(0)
            pm = page_num.getPixmap(matrix=trans, alpha=False)
            ori_img = fourier_demo(Image.frombytes("RGB", [pm.width, pm.height], pm.samples), 'FT001')
        else:
            ori_img = fourier_demo(Image.open(img_path).convert('RGB'), 'FT001')
        ft = select(FT[:11] + '001')
        print('FT:', FT[:11] + '001')

        # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS)
        input_img = ori_img.copy()
        ori_w, ori_h = ori_img.size
        # data_image = str(os.path.splitext(img_path)[0].split('/')[-1]) + '_' + str(page)
        # data_image = '/home/ddwork/wce_data/ori_images/{}_{}.jpg'.format(data_image, task_id)
        # input_img.save(data_image)
        # input_img = np.array(input_img)
        ori_img = np.array(ori_img)
        _, ori = cv2.imencode('.jpg', ori_img)
        ori = base64.b64encode(ori.tostring())
        # input_img = seal_eliminate(input_img)
        import time
        start = time.time()
        while_i = 0
        images = []
        while 1:
            input_img.thumbnail((2000 - while_i * 100, 2000 - while_i * 100), Image.ANTIALIAS)
            # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS)
            # img_name = img_name.convert('RGB')
            scale_w, scale_h = input_img.size
            # print(scale_w, scale_h)
            scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
            print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h)
            img = np.array(input_img)
            # B_channel, G_channel, R_channel = cv2.split(img)
            # cv2.imwrite('test.png', R_channel)
            # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR)
            _, img = cv2.imencode('.jpg', img)
            img = base64.b64encode(img.tostring())
            data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori}
            images_json = requests.post(DETECT_URL, data=data)
            # images = text_predict(img, scale_w, scale_h, ori_img)
            torch.cuda.empty_cache()
            while_i += 1
            if images_json.json() != '':
                for i in images_json.json():
                    image = base64.b64decode(i[1])
                    image = np.fromstring(image, np.uint8)
                    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
                    images.append([i[0], image])
                break
        print(111111111111111111111111111, time.time() - start)
        start = time.time()
        # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i
        #                    in
        #                    images]
        image_positions = []
        for index, j in enumerate(images):
            # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5:
            try:

                _, img = cv2.imencode('.jpg', j[1])
                img = base64.b64encode(img.tostring())
                data = {'img': img}
                content = requests.post(RECOGNISE_URL, data=data).json()[:2]
                # ori_content = [i[0] for i in content[0]]
                # prob_content = [[i, j] for i, j in zip(content[0], content[1])]
                for indexi, i in enumerate(content[1]):
                    if i[0] > 0.9:
                        content[0][indexi] = content[0][indexi][0]
                        content[1][indexi] = [-1]
                while 1:
                    try:
                        content[1].remove([-1])
                    except:
                        break
                content = calculate(content)

                image_positions.append([j[0], content.replace('“', '').replace('‘', '')])
            except Exception as e:
                print('h w', e)
                continue
        # torch.cuda.empty_cache()
        # data_json[task_id] = [par, data_image, FT, image_positions]
        # data_json = WCE.create(field_id=int(task_id), par=str(par), image_path=data_image, FT=FT, file_type=FT[:11],
        #                        image_positions=str(image_positions), edited=False, trained=False)
        # data_json.save()
        print(222222222222222222222222222, time.time() - start)
        text = single_ocr(image_positions)
        print(text)
        # with open(img_path + '.txt', 'w', encoding='utf-8') as fd:
        #     fd.write(text)
        texts = ft.extract_info(img_path, page, FT[:11] + '001', text)
        print(texts)
        # try:
        #     found = get_warp(input_img, image_positions, FT)
        #     print('ssssssssssssssssssssssss')
        #     found_texts = get_texts('warp_templates/{}/template.xml'.format(FT), found, img_path, task_id)
        # except Exception as e:
        found_texts = ''
        # print(e)
        print('==================================================================')
        print(texts, found_texts)
        torch.cuda.empty_cache()
        # 资质证书判断
        if FT[:11] == 'FT001003110':
            FT = FT[:8] + texts.get('version')
        # 路径中取日期
        try:
            if texts.get('发证日期') == '' or not texts.get('发证日期'):
                import re
                date_path = re.search('([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)', os.path.split(img_path)[1])
                if date_path:
                    texts['发证日期'] = date_path.groups()[0]
        except:
            pass

        if texts == 'FT999' and found_texts:
            return response.json(
                {'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': found_texts, 'FT': FT})
        if texts != 'FT999' and found_texts == '':
            return response.json({'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT})
        if found_texts:
            for key, value in texts.items():
                try:
                    if value == '':
                        texts[key] = found_texts[key]
                except:
                    continue
        blank = 0
        for key, value in texts.items():
            if value == '':
                blank += 1
        if blank == len(texts) - 1:
            return response.json(
                {'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999'})
        else:
            return response.json({'result': 'true', 'message': '请求成功', 'taskid': task_id, 'fields': texts, 'FT': FT})
    except Exception as e:
        print(e)
        return response.json(
            {'result': 'false', 'message': '请求失败', 'taskid': task_id, 'fields': {}, 'FT': 'FT999999999'})
Exemple #4
0
def generate_table(cell, ori_img):
    # import pickle
    # pickle.dump(cell, open('table.pkl', 'wb'))
    pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[2], cell[3], cell[4], cell[5]
    print(11111111111111, pos)
    table_im = ori_img[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]]
    # table_line_regions = text_predict(table_im, 1, 1, table_im)
    table_line_regions = []
    _, img = cv2.imencode('.jpg', table_im)
    img = base64.b64encode(img.tostring())
    data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img}
    images_json = requests.post(DETECT_URL, data=data)
    if images_json.json() != '':
        for i in images_json.json():
            image = base64.b64decode(i[1])
            image = np.fromstring(image, np.uint8)
            image = cv2.imdecode(image, cv2.IMREAD_COLOR)
            table_line_regions.append([i[0], image])

    torch.cuda.empty_cache()
    word_list = []
    for region in table_line_regions:
        region_y = [region[0][1], region[0][5]]
        region_x = [region[0][0], region[0][2]]
        # content = predict(Image.fromarray(region[1]).convert('L'))
        # torch.cuda.empty_cache()
        # content = (content[0][0], content[0][1], content[1])
        _, img = cv2.imencode('.jpg', region[1])
        img = base64.b64encode(img.tostring())
        data = {'img': img}
        contents = requests.post(RECOGNISE_URL, data=data).json()
        content, x = contents[:2], contents[2]
        for indexi, cont in enumerate(content[1]):
            if cont[0] > 0.9:
                content[0][indexi] = content[0][indexi][0]
                content[1][indexi] = [-1]
        while 1:
            try:
                content[1].remove([-1])
            except:
                break
        # x = content[2]
        content = calculate(content)
        for index, word in enumerate(content):
            word_list.append(
                [[x[index][0] + region_x[0], region_y[0], x[index][1] + region_x[0], region_y[0], x[index][0]
                  + region_x[0], region_y[1], x[index][1] + region_x[0], region_y[1]], word])
    # print(word_list)
    # for region in table_line_regions:
    #     cv2.rectangle(table_im, (region[0][0], region[0][1]), (region[0][6], region[0][7]), (0, 255, 0), 1)
    # for i in word_list:
    #     cv2.rectangle(table_im, (i[0][0], i[0][1]), (i[0][6], i[0][7]), (255, 0, 0), 1)
    #
    # Image.fromarray(table_im).save('single_word.jpg')

    col_point = sorted(col_point)
    row_point = sorted(row_point)
    tables = sorted(tables, key=lambda i: i[1][3])[:-1]
    tables = sorted(tables, key=lambda i: i[1][0] + i[1][1])

    for i in tables:
        d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0}
        for index, value in enumerate(col_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (col_point[index] - col_point[index - 1]) / 2
            if i[1][0] > col_point[index] - d_range:
                # print(33333333333, i[1], index)
                d['col_begin'] = index
        for index, value in enumerate(col_point):
            if index == len(col_point) - 1:
                d_range = 50
            else:
                d_range = (col_point[index + 1] - col_point[index]) / 2
            if i[1][0] + i[1][2] < col_point[index] + d_range:
                d['col_end'] = index
                break
        for index, value in enumerate(row_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (row_point[index] - row_point[index - 1]) / 2
            if i[1][1] > row_point[index] - d_range:
                d['row_begin'] = index
        for index, value in enumerate(row_point):
            if index == len(row_point) - 1:
                d_range = 50
            else:
                d_range = (row_point[index + 1] - row_point[index]) / 2
            if i[1][1] + i[1][3] < row_point[index] + d_range:
                d['row_end'] = index
                break
        i.append(d)

    # for index, i in enumerate(tables):
    #     texts = ''
    #     try:
    #         print(11111111, i[1:])
    #         i[0] = Image.fromarray(i[0])
    #         new_i = i[0].copy()
    #         ori_w, ori_h = i[0].size
    #         new_i.thumbnail((1500, 1500), Image.ANTIALIAS)
    #         scale_w, scale_h = new_i.size
    #         scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
    #         new_i = np.array(new_i.convert('RGB'))
    #         # print(type(new_i))
    #         # Image.fromarray(new_i).save('core.jpg')
    #         if new_i.shape[1] > 16 and new_i.shape[0] > 16:
    #             images = text_predict(new_i, scale_w, scale_h, np.array(i[0]))
    #             torch.cuda.empty_cache()
    #             # images = text_predict(new_i)
    #         else:
    #             i.append([texts, i[1][2], i[1][3]])
    #             continue
    #         # torch.cuda.empty_cache()
    #         if images:
    #             for image in sorted(images, key=lambda ii: ii[0][1]):
    #                 content = predict(Image.fromarray(image[1]).convert('L'))
    #                 for indexi, cont in enumerate(content[1]):
    #                     if cont[0] > 0.9:
    #                         content[0][indexi] = content[0][indexi][0]
    #                         content[1][indexi] = [-1]
    #                 while 1:
    #                     try:
    #                         content[1].remove([-1])
    #                     except:
    #                         break
    #                 content = calculate(content)
    #                 # print('content', content)
    #                 texts += content
    #
    #         elif new_i.any() and new_i.shape[0] < new_i.shape[1] * 1.5:
    #             try:
    #                 content = predict(Image.fromarray(new_i).convert('L'))
    #                 for indexi, cont in enumerate(content[1]):
    #                     if cont[0] > 0.9:
    #                         content[0][indexi] = content[0][indexi][0]
    #                         content[1][indexi] = [-1]
    #                 while 1:
    #                     try:
    #                         content[1].remove([-1])
    #                     except:
    #                         break
    #                 content = calculate(content)
    #                 texts += content
    #             except Exception as ex:
    #                 print('small_image_warning', ex)
    #         i.append([texts, i[1][2], i[1][3]])
    #         # print('54321')
    #     except Exception as e:
    #         print('table_text warning', e)

    for i in tables:
        cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]]
        word_str = []
        for word in word_list:
            word_center_point = ((word[0][0] + word[0][2]) / 2, (word[0][1] + word[0][5]) / 2)
            if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \
                    cell_region[3]:
                word_str.append(word)
        word_str = sorted(word_str, key=lambda x: x[0][1])
        word_lines = []
        word_temp = []
        for index, word in enumerate(word_str):
            if len(word_temp) == 0:
                word_temp.append(word)
                continue
            if word[0][1] == word_temp[-1][0][1]:
                word_temp.append(word)
            else:
                word_temp = sorted(word_temp, key=lambda x: x[0][0])
                word_lines.append(word_temp)
                word_temp = [word]
            if index == len(word_str) - 1:
                if len(word_temp) != 0:
                    word_lines.append(word_temp)
        word_str = ''
        for line in word_lines:
            for word in line:
                word_str += word[1]
        i.append([word_str, i[1][2], i[1][3]])

    new_table = []
    for i in tables:
        new_table.append([i[2], i[3]])

    return new_table, rows, cols, pos
Exemple #5
0
    def generate_table(self, cell):
        def closest_index(points: list, target):
            for index, value in enumerate(points):
                if abs(value - target) < abs(points[index - 1] - target
                                             ) and index > 0 or index == 0:
                    closest = index
            return closest

        pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[
            1], cell[2], cell[3], cell[4], cell[5]

        table_im = self.image[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]]
        table_copy = table_im.copy()

        text_rects = text_detection.predict(table_im, 1, 1, table_copy)

        char_info = []
        for text_rect in text_rects:
            region_y = [text_rect[0][1], text_rect[0][5]
                        ]  # text_rectangle_ymin,  text_rectangle_ymax
            region_x = [text_rect[0][0], text_rect[0][2]
                        ]  # text_rectangle_xmin,  text_rectangle_xmax

            char_list, prob_list, char_positions = text_recognition.predict(
                Image.fromarray(text_rect[1]).convert('L'))

            for index, top5_confidence in enumerate(prob_list):
                if top5_confidence[0] > 0.5:
                    char_list[index] = char_list[index][
                        0]  # get the top1 char recognition result if confidence > 50%
                    prob_list[index] = [
                        -1
                    ]  # then set the confirmed char confidence to -1

            prob_list = list(filter(lambda x: x[0] != -1, prob_list))

            content = [char_list, prob_list, char_positions]
            content = calculate(
                content
            )  # replace low confidence char recognition result by edit distance
            for index, char in enumerate(content):
                char_left, char_right = char_positions[index]
                char_info.append([[
                    char_left + region_x[0], region_y[0],
                    char_right + region_x[0], region_y[0],
                    char_left + region_x[0], region_y[1],
                    char_right + region_x[0], region_y[1]
                ], char])

        for text_rect in text_rects:
            xmin, ymin = text_rect[0][0], text_rect[0][1]
            xmax, ymax = text_rect[0][6], text_rect[0][7]
            cv2.rectangle(table_copy, (xmin, ymin), (xmax, ymax), (0, 255, 0),
                          1)
        save_result(table_copy,
                    char_info,
                    color=(255, 0, 0),
                    thickness=2,
                    save_dir='char_position',
                    file=self.file)

        col_point = sorted(col_point)
        row_point = sorted(row_point)

        for table in tables:
            cell_dict = {
                'col_begin': 0,
                'col_end': 0,
                'row_begin': 0,
                'row_end': 0
            }
            cell_x, cell_y, cell_w, cell_h = table[1]
            cell_dict['col_begin'] = closest_index(col_point, cell_x)
            cell_dict['col_end'] = closest_index(col_point, cell_x + cell_w)
            cell_dict['row_begin'] = closest_index(row_point, cell_y)
            cell_dict['row_end'] = closest_index(row_point, cell_y + cell_h)
            table.append(cell_dict)

            cell_char = []
            for info in char_info:
                char_xmin, char_xmax, char_ymin, char_ymax = info[0][0], info[
                    0][2], info[0][3], info[0][5]
                char_xcenter, char_ycenter = np.mean(
                    [[char_xmin, char_ymin], [char_xmax, char_ymax]], axis=0)

                if cell_x < char_xcenter < cell_x + cell_w and cell_y < char_ycenter < cell_y + cell_h:
                    cell_char.append(info)

            cell_char = sorted(cell_char, key=lambda x: x[0][1])
            print('cell_char:', cell_char)
            cell_text = []
            text_temp = []
            if len(cell_char) == 1:
                cell_text = [cell_char]
            else:
                for index, char in enumerate(cell_char):
                    if len(text_temp) == 0:
                        text_temp.append(char)
                        continue
                    if char[0][1] == text_temp[-1][0][1]:
                        text_temp.append(char)
                    else:
                        text_temp = sorted(text_temp, key=lambda x: x[0][0])
                        cell_text.append(text_temp)
                        text_temp = [char]
                    if index == len(cell_char) - 1:
                        if len(text_temp) != 0:
                            cell_text.append(text_temp)

            cell_text = "".join(
                [char[1] for line in cell_text for char in line])
            print('cell_text:', cell_text)
            table.append([cell_text, table[1][2], table[1][3]])

        new_table = []
        for table in tables:
            new_table.append([table[2], table[3]])
        return new_table, rows, cols, pos
Exemple #6
0
def single_ocr(document, img_name, start_page, new_url):
    start2 = time.time()
    img_name = skew_detect.get_rotated_img(img_name)

    ori_img = np.array(img_name)
    _, ori = cv2.imencode('.jpg', ori_img)
    ori = base64.b64encode(ori.tostring())

    ori_w, ori_h = img_name.size
    print('旋转时间:', time.time() - start2)
    while_i = 0
    start = time.time()
    images = []
    while 1:
        img_name.thumbnail((1500 - while_i * 100, 1500 - while_i * 100), Image.ANTIALIAS)
        # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS)
        # img_name = img_name.convert('RGB')
        scale_w, scale_h = img_name.size
        # print(scale_w, scale_h)
        scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
        print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h)
        img = np.array(img_name)
        # B_channel, G_channel, R_channel = cv2.split(img)
        # cv2.imwrite('test.png', R_channel)
        # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR)
        _, img = cv2.imencode('.jpg', img)
        img = base64.b64encode(img.tostring())
        data = {'img': img, 'scale_w': scale_w, 'scale_h': scale_h, 'ori_img': ori}
        images_json = requests.post(DETECT_URL, data=data)
        # images = text_predict(img, scale_w, scale_h, ori_img)
        torch.cuda.empty_cache()
        while_i += 1
        if images_json.json() != '':
            for i in images_json.json():
                image = base64.b64decode(i[1])
                image = np.fromstring(image, np.uint8)
                image = cv2.imdecode(image, cv2.IMREAD_COLOR)
                images.append([i[0], image])
            break
    print('ctpn time: ', time.time() - start)

    results = []
    start = time.time()
    # print(1111111, images)
    for index, j in enumerate(images):
        # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5:
        try:

            _, img = cv2.imencode('.jpg', j[1])
            img = base64.b64encode(img.tostring())
            data = {'img': img}
            content = requests.post(RECOGNISE_URL, data=data).json()[:2]
            # ori_content = [i[0] for i in content[0]]
            # prob_content = [[i, j] for i, j in zip(content[0], content[1])]
            for indexi, i in enumerate(content[1]):
                if i[0] > 0.9:
                    content[0][indexi] = content[0][indexi][0]
                    content[1][indexi] = [-1]
            while 1:
                try:
                    content[1].remove([-1])
                except:
                    break
            content = calculate(content)

            results.append([j[0], content.replace('“', '').replace('‘', '')])
        except Exception as e:
            print('h w', e)
            continue
    print('识别时间', time.time() - start)
    print(results)
    start = time.time()
    torch.cuda.empty_cache()
    results = sorted(results, key=lambda i: i[0][1])
    new_results = results
    line_images = []
    cut_index = 0
    curr_index = 0
    for index, i in enumerate(new_results):
        if index == len(new_results) - 1:
            if cut_index < index:
                line_images.append(new_results[cut_index:index])
                line_images.append(new_results[index:])
            else:
                line_images.append(new_results[index:])
            break
        # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > (
        #         new_results[index][0][7] - new_results[index][0][1]) * 4 / 5:
        #     line_images.append(new_results[cut_index: index + 1])
        #     cut_index = index + 1
        if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1]) > (
                new_results[curr_index][0][7] - new_results[curr_index][0][1]) * 4 / 5:
            line_images.append(new_results[cut_index: index + 1])
            cut_index = index + 1
            curr_index = index + 1
    for index, i in enumerate(line_images):
        line_images[index] = sorted(i, key=lambda a: a[0][0])
    texts = []
    position = []
    for i in line_images:
        text = ''
        for index, j in enumerate(i):
            try:
                position.append([j[0], j[1]])
                if index == len(i) - 1:
                    text += j[1]
                elif abs(i[index + 1][0][0] - i[index][0][6]) > 3 * (
                        abs(i[index][0][6] - i[index][0][0]) / len(i[index][1])):
                    text += j[1] + ' '
                else:
                    text += j[1]
            except:
                continue
        texts.append([[i[0][0], i[-1][0]], text])
    print(img_name.size)

    document = unetParse(ori_img, texts, document).save2docx()
    # try:
    #     texts = sort_paragraph(Image.fromarray(ori_img), texts)
    # except Exception as e:
    #     print(e)
    #     return document, position
    # document = save2docx(document, texts, Image.fromarray(ori_img), start_page)
    print('版式表格时间:', time.time() - start)
    return document, position
Exemple #7
0
def get_text(request):
    img_path = request.get('img_path')
    par = request.get('par')
    task_id = request.get('task_id')
    FT = request.get('FT')
    page = request.get('page')
    print(img_path)
    try:
        if img_path.lower().endswith('.pdf'):
            pdf = fitz.open(img_path)
            page_num = pdf[int(page) - 1]
            trans = fitz.Matrix(3, 3).preRotate(0)
            pm = page_num.getPixmap(matrix=trans, alpha=False)
            ori_img = fourier_demo(
                Image.frombytes("RGB", [pm.width, pm.height], pm.samples),
                'FT001')
        else:
            ori_img = fourier_demo(
                Image.open(img_path).convert('RGB'), 'FT001')
        f = select(FT[:11] + '001')
        print('FT:', FT[:11] + '001')

        # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS)
        ori_w, ori_h = ori_img.size
        input_img = ori_img.copy()
        input_img.thumbnail((2000, 2000), Image.ANTIALIAS)
        # input_img = input_img.resize((2000, 2000), Image.ANTIALIAS)
        scale_w, scale_h = input_img.size
        scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
        input_img = input_img.convert('RGB')
        data_image = str(
            os.path.splitext(img_path)[0].split('/')[-1]) + '_' + str(page)
        data_image = '/home/ddwork/wce_data/ori_images/{}_{}.jpg'.format(
            data_image, task_id)
        input_img.save(data_image)
        input_img = np.array(input_img)
        # input_img = seal_eliminate(input_img)
        import time
        start = time.time()
        print("text_predict zhiqian")
        images = text_predict(input_img, scale_w, scale_h, ori_img)
        print("text_predict zhihou")
        torch.cuda.empty_cache()

        print(111111111111111111111111111, "SAD", time.time() - start, 'HAPPY')
        start = time.time()
        # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i
        #                    in
        #                    images]
        image_positions = []
        for j in images:
            try:
                print("predict front!!!!!!!!!!!!!!")
                content = predict(Image.fromarray(j[1]).convert('L'))
                print("predict back!!!!!!!!!!!!!!")
                for index, i in enumerate(content[1]):
                    if i[0] > 0.9:
                        content[0][index] = content[0][index][0]
                        content[1].pop(index)
                # if i[0] < 0.9:
                #     img = Image.fromarray(j[1]).convert('L')
                #     width, height = img.size[0], img.size[1]
                #     scale = height * 1.0 / 32
                #     width = int(width / scale)
                #
                #     img = img.resize([width, 32], Image.ANTIALIAS)
                #     img = np.array(img)
                #     new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8]
                #     word, prob = attention(new_img)
                #     if prob > 0.9:
                #         content[0][index] = word[0]
                #         content[1].pop(index)
                # else:
                #     content[0][index] = content[0][index][0]
                #     content[1].pop(index)
                content = calculate(content)
                image_positions.append(
                    [j[0], content.replace('“', '').replace('‘', '')])
            except Exception as e:
                print(e)
                continue
        # torch.cuda.empty_cache()
        # data_json[task_id] = [par, data_image, FT, image_positions]
        data_json = WCE.create(field_id=int(task_id),
                               par=str(par),
                               image_path=data_image,
                               FT=FT,
                               file_type=FT[:11],
                               image_positions=str(image_positions),
                               edited=False,
                               trained=False)
        data_json.save()
        print(222222222222222222222222222, time.time() - start)
        text = single_ocr(image_positions)
        print(text)
        # with open(img_path + '.txt', 'w', encoding='utf-8') as fd:
        #     fd.write(text)
        texts = f.extract_info(img_path, page, FT[:11] + '001', text)
        print(texts)
        # try:
        #     found = get_warp(input_img, image_positions, FT)
        #     found_texts = get_texts('warp_templates/{}/template.xml'.format(FT), found, img_path, task_id)
        # except Exception as e:
        #     print(e)
        found_texts = ''
        print(
            '=================================================================='
        )
        print(texts, found_texts)
        torch.cuda.empty_cache()
        # 资质证书判断
        if FT[:11] == 'FT001003110':
            FT = FT[:8] + texts.get('version')
        # 路径中取日期
        try:
            if texts.get('发证日期') == '' or not texts.get('发证日期'):
                import re
                date_path = re.search(
                    '([0-9]{4}[-/年][0-9]{1,2}[-/月][0-9]{1,2}日?)',
                    os.path.split(img_path)[1])
                if date_path:
                    texts['发证日期'] = date_path.groups()[0]
        except:
            pass

        if texts == 'FT999' and found_texts:
            return {
                'result': 'true',
                'message': '请求成功',
                'taskid': task_id,
                'fields': found_texts,
                'FT': FT
            }
        if texts != 'FT999' and found_texts == '':
            return {
                'result': 'true',
                'message': '请求成功',
                'taskid': task_id,
                'fields': texts,
                'FT': FT
            }
        if found_texts:
            for key, value in texts.items():
                try:
                    if value == '':
                        texts[key] = found_texts[key]
                except:
                    continue
        blank = 0
        for key, value in texts.items():
            if value == '':
                blank += 1
        if blank == len(texts) - 1:
            return {
                'result': 'false',
                'message': '请求失败',
                'taskid': task_id,
                'fields': {},
                'FT': 'FT999999999'
            }
        else:
            return {
                'result': 'true',
                'message': '请求成功',
                'taskid': task_id,
                'fields': texts,
                'FT': FT
            }
    except Exception as e:
        print(e)
        return {
            'result': 'false',
            'message': '请求失败',
            'taskid': task_id,
            'fields': {},
            'FT': 'FT999999999'
        }
Exemple #8
0
def fast_ocr(request):
    ori_time = time.time()
    ori_start_time = datetime.now()
    print('start...')
    img_path = request.form.get('img_path')
    print(img_path)
    print(request.form.get('position'))
    position = '[' + request.form.get('position') + ']'
    rotate = int(request.form.get('rotate'))

    page = request.form.get('pageNum', 1)

    # FT = request.form.get('FT', None)
    # file_type = request.form.get('file_type', None)
    # par_code = request.form.get('par_code', None)
    # project_id = request.form.get('project_id', None)
    #
    # with open('/home/ddwork/projects/compound_log/project_infos/fast_ocr.log', 'a', encoding='utf-8') as f:
    #     f.write(str(FT) + '\t' + str(file_type) + '\t' + str(par_code) + '\t' + str(project_id) + '\t' + str(img_path) + '\t' + str(page) + '\n')
    print(page)
    position = eval(position)
    if img_path.lower().endswith('pdf'):
        image_w = int(request.form.get('imageW'))
        image_h = int(request.form.get('imageH'))
        pdf = fitz.open(img_path)
        page = pdf[int(page) - 1]
        trans = fitz.Matrix(3, 3).preRotate(0)
        pm = page.getPixmap(matrix=trans, alpha=False)
        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
        img = np.array(img)
    else:
        img = np.array(Image.open(img_path).convert('RGB'))
        image_h, image_w = img.shape[:-1]
    print('原图:', img.shape)
    # crop_img = img[position[1]:position[3], position[0]:position[2]]
    # crop_img = np.array(crop_img)
    # crop_img = rotate_img(Image.fromarray(crop_img), rotate).convert('L')
    # ori_img = np.array(crop_img)

    # img = rotate_img(Image.fromarray(img), rotate).convert('L')
    # img = Image.fromarray(img)
    print('rotate', rotate)

    # Image.fromarray(img).save('11111111.jpg')

    img_h, img_w, c = img.shape
    position[0] = position[0] if position[0] > 0 else 0
    position[1] = position[1] if position[0] > 0 else 0
    ori_img = img[int(position[1] * img_h / image_h):int(position[3] * img_h /
                                                         image_h),
                  int(position[0] * img_w / image_w):int(position[2] * img_w /
                                                         image_w)]
    ori_img = rotate_img(Image.fromarray(ori_img), rotate).convert('L')
    # ori_w, ori_h = ori_img.size
    crop_img = np.array(ori_img.convert('RGB'))
    # Image.fromarray(crop_img).save('11111111.jpg')

    table_infos = []

    # 如果框选高宽比过大,则不考虑表格
    # TODO 判断可能有问题
    # print(111111111111111111, image.shape[0], image.shape[1])
    start_table_time = time.time()
    if crop_img.shape[1] / crop_img.shape[0] > 3 and crop_img.shape[
            1] / np.array(ori_img).shape[1] < 0.3:
        print('判断不走表格!')
        pass
    else:
        try:
            # 判断是否为表格
            # from config_url import TABLE_URL
            # import base64, requests
            # retval, buffer = cv2.imencode('.jpg', crop_img)
            # pic_str = base64.b64encode(buffer)
            # pic_str = pic_str.decode()
            # r = requests.post(TABLE_URL, data={"img": pic_str})
            # img_byte = base64.b64decode(r.content.decode("utf-8"))
            # img_np_arr = np.fromstring(img_byte, np.uint8)
            # src = cv2.imdecode(img_np_arr, cv2.IMREAD_COLOR)
            tables = extract_table(crop_img)
            texts_table = []
            if tables:
                if tables != 'not table':
                    for table in tables:
                        table_time = time.time()
                        texts_table.append(
                            ['table', generate_table(table, crop_img)])
                        print('generate_table time is ',
                              time.time() - table_time)

                for table in texts_table:
                    cell_info = []
                    table_info = [['' for row in range(table[1][2])]
                                  for col in range(table[1][1])]
                    for tb in table[1][0]:
                        d = tb[0]
                        for row in range(d['row_begin'], d['row_end']):
                            for col in range(d['col_begin'], d['col_end']):
                                try:
                                    table_info[row][col] += tb[1][0]
                                    if d not in cell_info:
                                        cell_info.append(d)
                                except:
                                    print('cell error')
                    print(f'###start{str(table_info)}end###')
                    x0, y0, x1, y1 = table[-1][-1][0], table[-1][-1][1], table[-1][-1][0]+table[-1][-1][2], \
                                     table[-1][-1][1]+table[-1][-1][3]

                    new_cell_info = []
                    for cell in cell_info:
                        if cell['row_end'] - cell['row_begin'] == 1 and cell[
                                'col_end'] - cell['col_begin'] == 1:
                            continue
                        new_cell_info.append(
                            [[cell['row_begin'], cell['col_begin']],
                             [cell['row_end'] - 1, cell['col_end'] - 1]])
                    cell_info = new_cell_info

                    table_infos.append([[
                        [x0, y0, x1, y1], [x0, y0, x1, y1]
                    ], f'###start{str(table_info)}******{str(cell_info)}end###'
                                        ])
                    # return response.text(f'###start{str(table_info)}end###')
        except Exception as ex:
            print('table error', ex)
    print('table detect time is ', time.time() - start_table_time)
    # crop_img = cv2.copyMakeBorder(crop_img, int(image_h / 2), int(image_h / 2), int(image_w / 2), int(image_w / 2),
    #                            cv2.BORDER_REPLICATE)
    # short_size = 640
    # h, w = crop_img.shape[:2]
    # short_edge = min(h, w)
    # if short_edge < short_size:
    #     # 保证短边 >= inputsize
    #     scale = short_size / short_edge
    #     if scale > 1:
    #         crop_img = cv2.resize(crop_img, dsize=None, fx=scale, fy=scale)
    # ori_img = np.array(ori_img)
    # _, ori = cv2.imencode('.jpg', ori_img)
    # ori = base64.b64encode(ori.tostring())
    crop_img = Image.fromarray(crop_img)
    while_i = 0
    st_time = time.time()
    # crop_area = []
    while 1:
        crop_img.thumbnail((1500 - while_i * 100, 1500 - while_i * 100),
                           Image.ANTIALIAS)
        # crop_img = crop_img.resize((1500, 1500))
        # scale_w, scale_h = crop_img.size
        # scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
        # crop_img = crop_img.resize((1000, 1000))
        # crop_img.save('111.jpg')
        crop_img = np.array(crop_img)
        # _, img = cv2.imencode('.jpg', crop_img)
        # img = base64.b64encode(img.tostring())
        # data = {'img': img, 'scale_w': 1, 'scale_h': 1, 'ori_img': img}
        # crop_area_json = requests.post(DETECT_URL, data=data)
        # while_i += 1
        # if crop_area_json.json() != '':
        #     for i in crop_area_json.json():
        #         image = base64.b64decode(i[1])
        #         image = np.fromstring(image, np.uint8)
        #         image = cv2.imdecode(image, cv2.IMREAD_COLOR)
        #         crop_area.append([i[0], image])
        #     break

        crop_area = text_predict(crop_img, 1, 1, crop_img)
        torch.cuda.empty_cache()
        break
    print('ctpn time: ', time.time() - st_time, ' counts: ', len(crop_area))
    # Image.fromarray(crop_img).show()
    # Image.fromarray(crop_area[0][1]).show()
    # save_img = crop_img.copy()
    # for te in crop_area:
    #     # print(1111, te[0])
    #     t = te[0]
    #     cv2.rectangle(save_img, (t[0], t[1]), (t[6], t[7]), (255, 0, 0), 1)
    # Image.fromarray(save_img).save('able2.jpg')
    # # from pan.predict import text_predict
    # img_save = crop_img.copy()
    # sss = text_predict(img_save, 1, 1, img_save)
    # for i in sss:
    #     print(123456, i[0])
    #     cv2.rectangle(img_save, (i[0][0] - 1, i[0][1] - 1), (i[0][6] + 1, i[0][7] + 1), (255, 0, 0), 1)
    # Image.fromarray(img_save).save('able4.jpg')
    new_results = []
    for index, j in enumerate(crop_area):
        # image_positions = [[i[0].tolist(), rec_txt(i[1]).replace('“', '').replace('"', '')] for i
        #                    in
        #                    images]
        try:
            # _, img = cv2.imencode('.jpg', j[1])
            # img = base64.b64encode(img.tostring())
            # data = {'img': img}
            # content = requests.post(RECOGNISE_URL, data=data).json()[:2]
            content, _ = predict(Image.fromarray(j[1]).convert('L'))
            for indexi, i in enumerate(content[1]):
                if i[0] > 0.9:
                    content[0][indexi] = content[0][indexi][0]
                    content[1][indexi] = [-1]
            while 1:
                try:
                    content[1].remove([-1])
                except:
                    break
            print(content)
            content = calculate(content)
            # Image.fromarray(j[1]).save('found/{}.jpg'.format(''.join(img_path.split('/'))))
            # torch.cuda.empty_cache()
            print(content)
            new_results.append([j[0], content])
        except Exception as ex:
            print(ex)
            continue
        # torch.cuda.empty_cache()
        # data_json[task_id] = [par, data_image, FT, image_positions]
    document = ''
    new_results = sorted(new_results, key=lambda i: i[0][1])
    line_images = []
    cut_index = 0
    curr_index = 0
    print(2222222222, len(new_results))
    for index, i in enumerate(new_results):
        try:
            if index == len(new_results) - 1:
                # print(cut_index)
                if cut_index < index:
                    line_images.append(new_results[cut_index:])
                else:
                    line_images.append(new_results[index:])
                break
            # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > (
            #         new_results[index][0][7] - new_results[index][0][1]) * 4 / 5:
            #     line_images.append(new_results[cut_index: index + 1])
            #     cut_index = index + 1
            if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1]
                   ) < (new_results[curr_index][0][7] -
                        new_results[curr_index][0][1]) * 3 / 4:
                for result in new_results[cut_index:index + 1]:
                    if count_area(new_results[index + 1],
                                  result) > (result[0][6] - result[0][0]) / 2:
                        line_images.append(new_results[cut_index:index + 1])
                        cut_index = index + 1
                        curr_index = index + 1
                continue
            else:
                line_images.append(new_results[cut_index:index + 1])
                cut_index = index + 1
                curr_index = index + 1
        except:
            continue

    for index, i in enumerate(line_images):
        line_images[index] = sorted(i, key=lambda a: a[0][0] + a[0][1])
    texts = []
    for i in line_images:
        text = ''
        for index, j in enumerate(i):
            try:
                if index == len(i) - 1:
                    text += j[1]
                elif abs(i[index + 1][0][6] - i[index][0][6]) > 3 * (abs(
                        i[index][0][6] - i[index][0][0]) / len(i[index][1])):
                    text += j[1] + ' '
                else:
                    text += j[1]
            except:
                continue
        texts.append([[i[0][0], i[-1][0]], text])

    crop_w = crop_img.shape[1]
    document = layout(texts, crop_w, table_infos)
    # print(document)

    # for i in texts:
    #     print(11111, i)
    #     document += i[1] + '\n'
    if document == '':
        # document = rec_txt(np.array(ori_img.convert('L'))).replace('“', '').replace('‘', '')
        # torch.cuda.empty_cache()
        try:
            # _, img = cv2.imencode('.jpg', ori_img)
            # img = base64.b64encode(img.tostring())
            # data = {'img': img}
            # content = requests.post('http://172.30.81.191:32010/predict', data=data).json()[:2]
            # document = content
            content, _ = predict(Image.fromarray(ori_img).convert('L'))
            for indexi, i in enumerate(content[1]):
                if i[0] > 0.9:
                    content[0][indexi] = content[0][indexi][0]
                    content[1].pop(indexi)
            document = calculate(content)
            # torch.cuda.empty_cache()
        except:
            pass
    print('ddddddddddddddd', document)
    if document == ([], []):
        document = ''
    ori_end_time = datetime.now()
    ori_return = json.dumps([document])
    print('ori_time;',
          time.time() - ori_time, '\n', 'ori_start_time:', ori_start_time,
          '\n', 'ori_end_time:', ori_end_time)
    return response.text(ori_return)
Exemple #9
0
def generate_table(cell, ori_img):
    # import pickle
    # pickle.dump(cell, open('table.pkl', 'wb'))
    pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[
        2], cell[3], cell[4], cell[5]
    # print(11111111111111, pos)
    table_im = ori_img[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]]

    # table_line_regions = text_predict(table_im, 1, 1, table_im)
    _, d = cv2.imencode('.jpg', table_im)
    d = base64.b64encode(d)
    d = d.decode()
    data = {'img': d, 'scale_w': 1, 'scale_h': 1, 'ori_img': d}
    table_line_regions = requests.post(DETECT_URL, data=data).json()
    table_line_regions = [[
        xx[0],
        cv2.imdecode(np.fromstring(base64.b64decode(xx[1]), np.uint8),
                     cv2.IMREAD_COLOR)
    ] for xx in table_line_regions]

    word_list = []
    for region in table_line_regions:
        region_y = [region[0][1], region[0][5]]
        region_x = [region[0][0], region[0][2]]

        # content = predict(Image.fromarray(region[1]).convert('L'))
        # content = (content[0][0], content[0][1], content[1])
        # Image.fromarray(region[1]).show()
        _, tmp = cv2.imencode('.jpg', region[1])
        tmp = base64.b64encode(tmp)
        tmp = tmp.decode()
        data = {'img': tmp}
        content = requests.post(RECOGNISE_URL, data=data).json()
        # print(content)

        for indexi, cont in enumerate(content[1]):
            if cont[0] > 0.9:
                content[0][indexi] = content[0][indexi][0]
                content[1][indexi] = [-1]
        while 1:
            try:
                content[1].remove([-1])
            except:
                break
        x = content[2]
        content = calculate(content)
        for index, word in enumerate(content):
            word_list.append([[
                x[index][0] + region_x[0], region_y[0],
                x[index][1] + region_x[0], region_y[0],
                x[index][0] + region_x[0], region_y[1],
                x[index][1] + region_x[0], region_y[1]
            ], word])
    # print(word_list)
    for region in table_line_regions:
        cv2.rectangle(table_im, (region[0][0], region[0][1]),
                      (region[0][6], region[0][7]), (0, 255, 0), 1)
    for i in word_list:
        cv2.rectangle(table_im, (i[0][0], i[0][1]), (i[0][6], i[0][7]),
                      (255, 0, 0), 1)

    Image.fromarray(table_im).save('single_word.jpg')

    col_point = sorted(col_point)
    row_point = sorted(row_point)
    tables = sorted(tables, key=lambda i: i[1][3])[:-1]
    tables = sorted(tables, key=lambda i: i[1][0] + i[1][1])

    for i in tables:
        d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0}
        for index, value in enumerate(col_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (col_point[index] - col_point[index - 1]) / 2
            if i[1][0] > col_point[index] - d_range:
                # print(33333333333, i[1], index)
                d['col_begin'] = index
        for index, value in enumerate(col_point):
            if index == len(col_point) - 1:
                d_range = 50
            else:
                d_range = (col_point[index + 1] - col_point[index]) / 2
            if i[1][0] + i[1][2] < col_point[index] + d_range:
                d['col_end'] = index
                break
        for index, value in enumerate(row_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (row_point[index] - row_point[index - 1]) / 2
            if i[1][1] > row_point[index] - d_range:
                d['row_begin'] = index
        for index, value in enumerate(row_point):
            if index == len(row_point) - 1:
                d_range = 50
            else:
                d_range = (row_point[index + 1] - row_point[index]) / 2
            if i[1][1] + i[1][3] < row_point[index] + d_range:
                d['row_end'] = index
                break
        i.append(d)

    for i in tables:
        cv2.rectangle(table_im, (i[1][0], i[1][1]),
                      (i[1][0] + i[1][2], i[1][1] + i[1][3]), (255, 0, 0), 1)
        cell_region = [i[1][0], i[1][1], i[1][0] + i[1][2], i[1][1] + i[1][3]]
        word_str = []
        for word in word_list:
            word_center_point = ((word[0][0] + word[0][2]) / 2,
                                 (word[0][1] + word[0][5]) / 2)
            if cell_region[0] < word_center_point[0] < cell_region[2] and cell_region[1] < word_center_point[1] < \
                    cell_region[3]:
                word_str.append(word)
        word_str = sorted(word_str, key=lambda x: x[0][1])
        word_lines = []
        word_temp = []
        for index, word in enumerate(word_str):
            if len(word_temp) == 0:
                word_temp.append(word)
                continue
            if word[0][1] == word_temp[-1][0][1]:
                word_temp.append(word)
            else:
                word_temp = sorted(word_temp, key=lambda x: x[0][0])
                word_lines.append(word_temp)
                word_temp = [word]
            if index == len(word_str) - 1:
                if len(word_temp) != 0:
                    word_lines.append(word_temp)
        word_str = ''
        for line in word_lines:
            for word in line:
                word_str += word[1]
        i.append([word_str, i[1][2], i[1][3]])
    Image.fromarray(table_im).save('single_word.jpg')
    new_table = []
    for i in tables:
        new_table.append([i[2], i[3]])

    return new_table, rows, cols, pos
Exemple #10
0
def generate_table(cell):
    # import pickle
    # pickle.dump(cell, open('table.pkl', 'wb'))
    pos, cols, rows, col_point, row_point, tables = cell[0][1], cell[1], cell[
        2], cell[3], cell[4], cell[5]

    col_point = sorted(col_point)
    row_point = sorted(row_point)
    tables = sorted(tables, key=lambda i: i[1][3])[:-1]
    tables = sorted(tables, key=lambda i: i[1][0] + i[1][1])
    # print(tables[14][1])
    # Image.fromarray(tables[14][0]).show()
    # table = document.add_table(rows, cols, style='Table Grid')
    for i in tables:
        d = {'col_begin': 0, 'col_end': 0, 'row_begin': 0, 'row_end': 0}
        for index, value in enumerate(col_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (col_point[index] - col_point[index - 1]) / 2
            if i[1][0] > col_point[index] - d_range:
                # print(33333333333, i[1], index)
                d['col_begin'] = index
        for index, value in enumerate(col_point):
            if index == len(col_point) - 1:
                d_range = 50
            else:
                d_range = (col_point[index + 1] - col_point[index]) / 2
            if i[1][0] + i[1][2] < col_point[index] + d_range:
                d['col_end'] = index
                break
        for index, value in enumerate(row_point):
            if index == 0:
                d_range = 50
            else:
                d_range = (row_point[index] - row_point[index - 1]) / 2
            if i[1][1] > row_point[index] - d_range:
                d['row_begin'] = index
        for index, value in enumerate(row_point):
            if index == len(row_point) - 1:
                d_range = 50
            else:
                d_range = (row_point[index + 1] - row_point[index]) / 2
            if i[1][1] + i[1][3] < row_point[index] + d_range:
                d['row_end'] = index
                break
        i.append(d)

    for index, i in enumerate(tables):
        texts = ''
        try:
            # new_i = cv2.resize(i[0], (i[1][2] * 2, i[1][3] * 2))
            # new_i = cv2.copyMakeBorder(i[0], int(i[1][3] / 2), int(i[1][3] / 2), int(i[1][2] / 2), int(i[1][2] / 2), cv2.BORDER_REPLICATE)
            i[0] = Image.fromarray(i[0])
            new_i = i[0].copy()
            ori_w, ori_h = i[0].size
            new_i.thumbnail((1500, 1500), Image.ANTIALIAS)
            scale_w, scale_h = new_i.size
            scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
            new_i = np.array(new_i.convert('RGB'))
            # print(type(new_i))
            # Image.fromarray(new_i).save('core.jpg')
            if new_i.shape[1] > 16 and new_i.shape[0] > 16:
                images = text_predict(new_i, scale_w, scale_h, np.array(i[0]))
                torch.cuda.empty_cache()
                # images = text_predict(new_i)
            else:
                i.append([texts, i[1][2], i[1][3]])
                continue
            # torch.cuda.empty_cache()
            if images:
                for image in sorted(images, key=lambda ii: ii[0][1]):
                    # texts += rec_txt(image[1]).replace('“', '')
                    # print('1111111111', predict(Image.fromarray(image[1]).convert('L')))
                    content = predict(Image.fromarray(image[1]).convert('L'))
                    ori_content = [i[0] for i in content[0]]
                    prob_content = [[i, j]
                                    for i, j in zip(content[0], content[1])]
                    for indexi, cont in enumerate(content[1]):
                        if cont[0] > 0.9:
                            content[0][indexi] = content[0][indexi][0]
                            content[1][indexi] = [-1]
                    while 1:
                        try:
                            content[1].remove([-1])
                        except:
                            break
                    # for indexa, ia in enumerate(content[1]):
                    #     if ia[0] < 0.9:
                    #         content[0][index] = content[0][index][0]
                    #         content[1].pop(index)
                    #         img = Image.fromarray(image[1]).convert('L')
                    #         width, height = img.size[0], img.size[1]
                    #         scale = height * 1.0 / 32
                    #         width = int(width / scale)
                    #
                    #         img = img.resize([width, 32], Image.ANTIALIAS)
                    #         img = np.array(img)
                    #         new_img = img[:, (content[2][indexa] - 1) * 8:(content[2][indexa] + 2) * 8]
                    #         word, prob = attention(new_img)
                    #         # print('prob', prob)
                    #         if prob > 0.9:
                    #             content[0][indexa] = word
                    #             content[1].pop(indexa)
                    #     else:
                    #         content[0][indexa] = content[0][indexa][0]
                    #         content[1].pop(indexa)
                    content = calculate(content)
                    # print('content', content)
                    texts += content

            elif new_i.any() and new_i.shape[0] < new_i.shape[1] * 1.5:
                # texts += rec_txt(new_i).replace('“', '')
                try:
                    content = predict(Image.fromarray(new_i).convert('L'))
                    ori_content = [i[0] for i in content[0]]
                    prob_content = [[i, j]
                                    for i, j in zip(content[0], content[1])]
                    for indexi, cont in enumerate(content[1]):
                        if cont[0] > 0.9:
                            content[0][indexi] = content[0][indexi][0]
                            content[1][indexi] = [-1]
                    while 1:
                        try:
                            content[1].remove([-1])
                        except:
                            break
                    # for indexa, ia in enumerate(content[1]):
                    #     if ia[0] < 0.9:
                    #         img = Image.fromarray(new_i).convert('L')
                    #         width, height = img.size[0], img.size[1]
                    #         scale = height * 1.0 / 32
                    #         width = int(width / scale)
                    #
                    #         img = img.resize([width, 32], Image.ANTIALIAS)
                    #         img = np.array(img)
                    #         new_img = img[:, (content[2][indexa] - 1) * 8:(content[2][indexa] + 2) * 8]
                    #         word, prob = attention(new_img)
                    #         if prob > 0.9:
                    #             content[0][indexa] = word
                    #             content[1].pop(indexa)
                    #     else:
                    #         content[0][indexa] = content[0][indexa][0]
                    #         content[1].pop(indexa)
                    content = calculate(content)
                    texts += content
                except Exception as ex:
                    print('small_image_warning', ex)
            # torch.cuda.empty_cache()
            # else:
            #     i.append([texts, i[1][2], i[1][3]])
            # print('12345', texts, '***', i[1])
            i.append([texts, i[1][2], i[1][3]])
            # print('54321')
        except Exception as e:
            print('table_text warning', e)

    new_table = []
    for i in tables:
        new_table.append([i[2], i[3]])

    return new_table, rows, cols, pos
Exemple #11
0
def single_ocr(document, img_name, start_page, new_url):
    img_name = skew_detect.get_rotated_img(img_name)

    ori_img = np.array(img_name)
    ori_w, ori_h = img_name.size
    img_name.thumbnail((1500, 1500), Image.ANTIALIAS)
    # img_name = img_name.resize((1500, 1500), Image.ANTIALIAS)
    # img_name = img_name.convert('RGB')
    scale_w, scale_h = img_name.size
    # print(scale_w, scale_h)
    scale_w, scale_h = ori_w / scale_w, ori_h / scale_h
    print('原图大小:', ori_w, ori_h, '缩放比例:', scale_w, scale_h)
    img = np.array(img_name)
    # B_channel, G_channel, R_channel = cv2.split(img)
    # cv2.imwrite('test.png', R_channel)
    # img = cv2.cvtColor(R_channel, cv2.COLOR_GRAY2BGR)
    start = time.time()
    images = text_predict(img, scale_w, scale_h, ori_img)
    torch.cuda.empty_cache()
    print('ctpn time: ', time.time() - start)
    # new_images = []
    # images = new_images
    # Image.fromarray(img).save('paragraph.jpg')
    # Image.fromarray(img).show()
    try:
        tables = extract_table(ori_img)
        if tables == 'not table':
            has_table = False
        else:
            has_table = True
            # for table in tables:
            #     table[0][1][1] = table[0][1][1] / scale_h
            #     table[0][1][3] = table[0][1][3] / scale_h
    except:
        has_table = False
    print(2222222222222222222222222, has_table)
    results = []
    start = time.time()
    for index, j in enumerate(images):
        # if j[1].any() and j[1].shape[0] < j[1].shape[1] * 1.5:
        try:
            if has_table:
                count = 0
                for table in tables:
                    if table[0][1][1] + table[0][1][3] > j[0][1] > table[0][1][
                            1]:
                        continue
                    else:
                        count += 1
                if count == len(tables):
                    content = predict(Image.fromarray(j[1]).convert('L'))
                    ori_content = [i[0] for i in content[0]]
                    prob_content = [[i, j]
                                    for i, j in zip(content[0], content[1])]
                    for indexi, i in enumerate(content[1]):
                        if i[0] > 0.9:
                            content[0][indexi] = content[0][indexi][0]
                            content[1][indexi] = [-1]
                    while 1:
                        try:
                            content[1].remove([-1])
                        except:
                            break
                    # ori_content = [i[0] for i in content[0]]
                    # with open(os.path.splitext(new_url)[0] + '.txt', 'a', encoding='utf-8') as f:
                    # for index, i in enumerate(content[1]):
                    #     if i[0] > 0.9:
                    #         content[0][index] = content[0][index][0]
                    #         content[1].pop(index)
                    # if i[0] < 0.9:
                    #     img = Image.fromarray(j[1]).convert('L')
                    #     width, height = img.size[0], img.size[1]
                    #     scale = height * 1.0 / 32
                    #     width = int(width / scale)
                    #
                    #     img = img.resize([width, 32], Image.ANTIALIAS)
                    #     img = np.array(img)
                    #     new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8]
                    #     word, prob = attention(new_img)
                    #     if prob > 0.9:
                    #         content[0][index] = word[0]
                    #         content[1].pop(index)
                    # else:
                    #     content[0][index] = content[0][index][0]
                    #     content[1].pop(index)
                    content = calculate(content)
                    # for i, j_i in zip(ori_content, content):
                    #     if j_i != i:
                    #         f.write(i + '------------>' + j_i + '\n')
                    # content = rec_txt(j[1])
                    # torch.cuda.empty_cache()
                    results.append(
                        [j[0], content.replace('“', '').replace('‘', '')])
            else:
                content = predict(Image.fromarray(j[1]).convert('L'))
                ori_content = [i[0] for i in content[0]]
                prob_content = [[i, j] for i, j in zip(content[0], content[1])]
                for indexi, i in enumerate(content[1]):
                    if i[0] > 0.9:
                        content[0][indexi] = content[0][indexi][0]
                        content[1][indexi] = [-1]
                while 1:
                    try:
                        content[1].remove([-1])
                    except:
                        break
                # ori_content = [i[0] for i in content[0]]
                # with open(os.path.splitext(new_url)[0] + '.txt', 'a', encoding='utf-8') as f:
                # for index, i in enumerate(content[1]):
                #     if i[0] > 0.9:
                #         content[0][index] = content[0][index][0]
                #         content[1].pop(index)
                # if i[0] < 0.9:
                #     img = Image.fromarray(j[1]).convert('L')
                #     width, height = img.size[0], img.size[1]
                #     scale = height * 1.0 / 32
                #     width = int(width / scale)
                #
                #     img = img.resize([width, 32], Image.ANTIALIAS)
                #     img = np.array(img)
                #     new_img = img[:, (content[2][index] - 1) * 8:(content[2][index] + 2) * 8]
                #     word, prob = attention(new_img)
                #     if prob > 0.9:
                #         content[0][index] = word[0]
                #         content[1].pop(index)
                # else:
                #     content[0][index] = content[0][index][0]
                #     content[1].pop(index)
                content = calculate(content)
                # for i, j_i in zip(ori_content, content):
                #     if j_i != i:
                #         f.write(i + '------------>' + j_i + '\n')
                # content = rec_txt(j[1])
                # torch.cuda.empty_cache()
                results.append(
                    [j[0], content.replace('“', '').replace('‘', '')])
        except Exception as e:
            print(e)
            continue
    torch.cuda.empty_cache()
    print(33333333333333333, time.time() - start)
    results = sorted(results, key=lambda i: i[0][1])
    new_results = results
    line_images = []
    cut_index = 0
    curr_index = 0
    for index, i in enumerate(new_results):
        if index == len(new_results) - 1:
            if cut_index < index:
                line_images.append(new_results[cut_index:index])
                line_images.append(new_results[index:])
            else:
                line_images.append(new_results[index:])
            break
        # if abs(new_results[index + 1][0][1] - new_results[index][0][1]) > (
        #         new_results[index][0][7] - new_results[index][0][1]) * 4 / 5:
        #     line_images.append(new_results[cut_index: index + 1])
        #     cut_index = index + 1
        if abs(new_results[index + 1][0][1] - new_results[curr_index][0][1]
               ) > (new_results[curr_index][0][7] -
                    new_results[curr_index][0][1]) * 4 / 5:
            line_images.append(new_results[cut_index:index + 1])
            cut_index = index + 1
            curr_index = index + 1
    for index, i in enumerate(line_images):
        line_images[index] = sorted(i, key=lambda a: a[0][0])
    texts = []
    position = []
    for i in line_images:
        text = ''
        for index, j in enumerate(i):
            try:
                position.append([j[0], j[1]])
                if index == len(i) - 1:
                    text += j[1]
                elif abs(i[index + 1][0][0] - i[index][0][6]) > 3 * (abs(
                        i[index][0][6] - i[index][0][0]) / len(i[index][1])):
                    text += j[1] + ' '
                else:
                    text += j[1]
            except:
                continue
        texts.append([[i[0][0], i[-1][0]], text])
    print(img_name.size)
    if has_table:
        for table in tables:
            table_index = 0
            for index, i in enumerate(texts):
                # print(i)
                # print(type(i[0]), type(table[1][1]))
                if i[0] == 'table':
                    # print(table[0][1])
                    if table[0][1][1] > i[1][3][1]:
                        table_index = index + 1
                elif table[0][1][1] > i[0][0][1]:
                    table_index = index + 1
            try:
                texts.insert(table_index,
                             ['table', generate_table(table, ori_img)])
            except Exception as e:
                print(e)
                continue
    # import pickle
    # pickle.dump(texts, open('texts.pkl', 'wb'))
    try:
        texts = sort_paragraph(Image.fromarray(ori_img), texts)
    except Exception as e:
        print(e)
        return document, position
    document = save2docx(document, texts, Image.fromarray(ori_img), start_page)
    return document, position