Ejemplo n.º 1
0
def method2():
    filename = tkinter.filedialog.askopenfilename()
    start = time.perf_counter()
    if filename != '':
        la = LayoutAnalyzerEx()
        area_analyzer = TextAreaAnalyzerEx()
        pro = ImagePreprocessor()
        img_path = filename
        old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0)
        im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
        old_im = pro.reshape(im)
        height, width = old_im.shape
        rot_img = pro.rotateImg(old_im)
        cv2.imwrite('c:/ml_dir/mask/thresh.png', rot_img)
        rot_img = cv2.imread('c:/ml_dir/mask/thresh.png')
        pro_img = pro.detectTable(rot_img, rot_img)
        cv2.imwrite('c:/ml_dir/mask/nomask.png', pro_img)
        rot_img = cv2.imread('c:/ml_dir/mask/nomask.png')

        pro = ImagePreprocessor()
        img_path = 'C:/ml_dir/mask/nomask.png'
        old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0)
        im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
        old_im = pro.reshape(im)
        height, width = old_im.shape
        root_area, all_areas = la.get_page_layout(im)
        window = tk.Tk()
        window.title('The OCR Results')
        window.geometry('%dx%d' % (width, height))
        labels = range(len(all_areas))
        result = ''
        for i in range(len(all_areas)):
            text, porp = area_analyzer.get_sub_area_text(
                all_areas[i].get_sub_image())
            result = result + text + '\n'
            l = tk.Label(
                window,
                text=text,
            )
            l.place(x=all_areas[i].tl_x,
                    y=all_areas[i].tl_y,
                    width=all_areas[i].width,
                    height=all_areas[i].height)
        end = time.perf_counter()
        print('Running time: %10.10s Seconds' % (end - start))
        lb.config(text='计算时间: %10.5s 秒' % (end - start))
        cv2.destroyAllWindows()
        pass
    else:
        lb.config(text='您没有选择任何文件')
Ejemplo n.º 2
0
def method1():
    filename = tkinter.filedialog.askopenfilename()
    start = time.perf_counter()
    if filename != '':
        la = LayoutAnalyzerEx()
        area_analyzer = TextAreaAnalyzerEx()
        img_path = filename
        old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0)
        im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
        height, width = old_im.shape
        root_area, all_areas = la.get_page_layout(im)
        window = tk.Tk()
        window.title('The OCR Results')
        window.geometry('%dx%d' % (width, height))
        result = ''
        for i in range(len(all_areas)):
            text, porp = area_analyzer.get_sub_area_text(
                all_areas[i].get_sub_image())
            result = result + text + '\n'
            l = tk.Label(
                window,
                text=text,
            )
            l.place(x=all_areas[i].tl_x,
                    y=all_areas[i].tl_y,
                    width=all_areas[i].width,
                    height=all_areas[i].height)
        end = time.perf_counter()
        print('Running time: %10.10s Seconds' % (end - start))
        f = open('C:/ml_dir/result.txt', 'w')
        f.write(result)
        f.close()
        lb.config(text='计算时间: %10.5s 秒' % (end - start))
        cv2.destroyAllWindows()

    else:
        lb.config(text='您没有选择任何文件')
Ejemplo n.º 3
0
    #结果是三元组,[字,类型,概率]
    def get_result_from_json_string(self, json_str):
        obj = json.loads(json_str)
        #obj = demjson.decode(json_str)
        results = []
        for res in obj:
            results.append([res['word'], res['type'], float(res['prop'])])          
        #按照概率进行排序
        print("Results:", results, type(results))
        results.sort(key=lambda x:x[2], reverse=True)
        return results
        
 
if __name__=='__main__':
    #测试识别结果
    la = LayoutAnalyzerEx()
    #img_path = 'D:\\ml_dir\\test\\027.tif'
    img_path = 'C:/ml_dir/test/1.png'
    old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)

    gray = cv2.cvtColor(old_im, cv2.COLOR_BGR2GRAY)
    im = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU|cv2.THRESH_BINARY)[1]

    cv2.namedWindow('src_img',0)
    cv2.imshow('src_img',old_im)

    root_area, all_areas = la.get_page_layout(im)

    area_analyzer = TextAreaAnalyzerEx()
    #text, porp = area_analyzer.get_sub_area_text(all_areas[0].get_sub_image())
    #print(text, str(porp))
Ejemplo n.º 4
0
 def __init__(self):
     self.layout_analysis = LayoutAnalyzerEx()
     self.text_area_analyzer = TextAreaAnalyzerEx()
     pass
Ejemplo n.º 5
0
class PageAnalysis(object):
    def __init__(self):
        self.layout_analysis = LayoutAnalyzerEx()
        self.text_area_analyzer = TextAreaAnalyzerEx()
        pass

    def get_page_text(self, page_img_file):
        #read file
        original_image = cv2.imdecode(
            np.fromfile(page_img_file, dtype=np.uint8), 1)
        gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
        im = cv2.threshold(gray, 0, 255,
                           cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1]
        t1 = time.perf_counter()
        root_area, all_areas = self.layout_analysis.get_page_layout(im.copy())
        t2 = time.perf_counter()
        print('time for layout analysis: ', t2 - t1)

        #显示布局
        aa = im.copy()
        for area in all_areas:
            cv2.rectangle(aa, (area.tl_x, area.tl_y),
                          (area.tl_x + area.width, area.tl_y + area.height),
                          [0, 0, 0], 1)
        cv2.imshow('x', aa)
        cv2.waitKey()

        text, prop = self.get_area_text_ex1(root_area, original_image)
        t3 = time.clock()
        print('time for text analysis: ', t3 - t2)

        return text, prop

    def get_area_text(self, text_area, original_image):
        text = ''
        if text_area.layout == LAYOUT_UNKNOWN:
            text, prop = self.text_area_analyzer.get_text(
                text_area, original_image)
        elif text_area.layout == LAYOUT_HORIZONTAL:
            for i in range(len(text_area.sub_areas)):
                ret_text, prop = self.get_area_text(text_area.sub_areas[i],
                                                    original_image)
                if ret_text == '' or ret_text is None:
                    continue
                text = text + ' ' + ret_text  # + '('+str(prop)+')'
        elif text_area.layout == LAYOUT_VERTICAL:
            for i in range(len(text_area.sub_areas)):
                ret_text, prop = self.get_area_text(text_area.sub_areas[i],
                                                    original_image)
                if ret_text == '' or ret_text is None:
                    continue
                text = text + '\r\n' + ret_text  # + '('+str(prop)+')'
        return text, prop

    #获取区域文字(扩展实现1)
    #主要处理方法:
    #1、对于独立区域:
    #1.1 如果该区域无左右邻接区域,则直接识别
    #1.2 对于存在邻接区域的情况:
    #1.2.1 如果是非边缘区域,且左右邻接区域为相似行,则参照行高分析
    #(1)如果该独立区域可能为标点符号或单字,则比照相邻行高进行识别
    #(2)对于非(1)的情况,直接识别
    #1.2.2 如果是边缘区域,则参照相邻区域行高进行分析
    #(1)如果该独立区域可能为标点符号或单字,则分别按照实际高度、参照行高进行识别,选取可能性较高的结果
    #(2)对于非(1)的情况,直接识别
    #2、对于水平分布的区域,建立邻接关系,逐步识别
    #3、对于垂直分布的区域:
    #3.1 如果符合组合识别的条件,则比照组合识别和单独识别的可能性,按较高的情况予以认定
    #3.2 对于不符合组合识别条件的情况,则直接识别
    def get_area_text_ex1(self, text_area, original_image):
        text = ''
        total_prop = 0
        if text_area.layout == LAYOUT_HORIZONTAL:
            #对于水平分布的区域,建立邻接关系,逐步识别
            for i in range(len(text_area.sub_areas)):
                #建立左右区域关系
                if i == 0 and len(text_area.sub_areas) > 1:
                    text_area.sub_areas[i].set_neighbors_area(
                        None, text_area.sub_areas[i + 1])
                if i == len(text_area.sub_areas) - 1 and len(
                        text_area.sub_areas) > 1:
                    text_area.sub_areas[i].set_neighbors_area(
                        text_area.sub_areas[i - 1], None)
                if i > 0 and i < len(text_area.sub_areas) - 1:
                    text_area.sub_areas[i].set_neighbors_area(
                        text_area.sub_areas[i - 1], text_area.sub_areas[i + 1])
                ret_text, prop = self.get_area_text_ex1(
                    text_area.sub_areas[i], original_image)
                if ret_text == '' or ret_text is None:
                    continue
                total_prop = total_prop + prop
                if i == 0:
                    text = text + ret_text
                else:
                    text = text + ' ' + ret_text
            total_prop = total_prop / len(text_area.sub_areas)
            return text, total_prop
        elif text_area.layout == LAYOUT_VERTICAL:
            #3、对于垂直分布的区域:
            cmb_text = ''
            cmb_prop = 0
            if len(text_area.sub_areas) <= 3 and len(text_area.sub_areas) > 1:
                #3.1 如果符合组合识别的条件,则比照组合识别和单独识别的可能性,按较高的情况予以认定
                #要求所有子区域都是叶区域
                all_leaf_areas = True
                tl_point = []
                br_point = []
                for idx in range(len(text_area.sub_areas)):
                    if len(text_area.sub_areas[idx].sub_areas) != 0:
                        #all_leaf_areas = False
                        #break
                        pass
                    if idx == 0:
                        tl_point = [
                            text_area.sub_areas[idx].tl_x,
                            text_area.sub_areas[idx].tl_y
                        ]
                        br_point = [
                            text_area.sub_areas[idx].get_br_point()[0],
                            text_area.sub_areas[idx].get_br_point()[1]
                        ]
                    else:
                        if tl_point[0] > text_area.sub_areas[idx].tl_x:
                            tl_point[0] = text_area.sub_areas[idx].tl_x
                        if tl_point[1] > text_area.sub_areas[idx].tl_y:
                            tl_point[1] = text_area.sub_areas[idx].tl_y
                        if br_point[0] < text_area.sub_areas[idx].get_br_point(
                        )[0]:
                            br_point[0] = text_area.sub_areas[
                                idx].get_br_point()[0]
                        if br_point[1] < text_area.sub_areas[idx].get_br_point(
                        )[1]:
                            br_point[1] = text_area.sub_areas[
                                idx].get_br_point()[1]
                if all_leaf_areas and ((br_point[0] - tl_point[0] + 1) * 1.0 /
                                       (br_point[1] - tl_point[1] + 1)) <= 1.2:
                    #可能是字或符号,组合识别,如果组合后概率高于分开后的概率
                    new_area = TextArea(text_area.sub_areas[0].image,
                                        tl_point[0], tl_point[1],
                                        br_point[1] - tl_point[1] + 1,
                                        br_point[0] - tl_point[0] + 1,
                                        text_area.sub_areas[0].parent_area)
                    new_area.set_neighbors_area(text_area.left_area,
                                                text_area.right_area)
                    '''
                    cv2.imshow('ss', new_area.get_sub_image())
                    cv2.waitKey()
                    '''
                    new_area.layout = LAYOUT_UNKNOWN
                    cmb_text, cmb_prop = self.get_area_text_ex1(
                        new_area, original_image)
            #3.2 直接识别
            split_text = ''
            for i in range(len(text_area.sub_areas)):
                ret_text, prop = self.get_area_text_ex1(
                    text_area.sub_areas[i], original_image)
                if ret_text == '' or ret_text is None:
                    continue
                total_prop = total_prop + prop
                if i == 0:
                    split_text = split_text + ret_text
                else:
                    split_text = split_text + '\r\n' + ret_text
            total_prop = total_prop / len(text_area.sub_areas)
            #判断采用组合结果,或者分割识别后的结果
            if cmb_prop < total_prop:
                text = split_text
            else:
                text = cmb_text
                total_prop = cmb_prop
            return text, total_prop
        elif text_area.layout == LAYOUT_UNKNOWN:
            #1、对于独立区域:
            #1.1 如果该区域无左右邻接区域,则直接识别
            if text_area.left_area is None and text_area.right_area is None:
                text, total_prop = self.text_area_analyzer.get_text(
                    text_area, original_image)
                return text, total_prop
            #1.2 对于存在邻接区域的情况:
            #1.2.1 如果是非边缘区域,且左右邻接区域为相似行,则参照行高分析
            if text_area.left_area is not None and text_area.right_area is not None:
                lr_avg_h = (text_area.left_area.height +
                            text_area.right_area.height) / 2.0
                #(1)如果该独立区域可能为标点符号或单字,则比照相邻行高进行识别(需要符合区域高度小于两边平均高度的条件)
                #如果左右高度相差较小,且基本平齐(高度差小于平均高度20%,中心点纵坐标差小于平均高度10%)
                if ((abs(text_area.left_area.height -
                         text_area.right_area.height) / lr_avg_h) < 0.2 and
                    (abs(text_area.left_area.get_center_point()[1] -
                         text_area.right_area.get_center_point()[1]) / lr_avg_h
                     < 0.1) and text_area.height < lr_avg_h):
                    if ((text_area.width * 1.0 / text_area.height) < 1.2
                            and text_area.height < lr_avg_h * 0.4) or (
                                text_area.width / lr_avg_h < 1.2):
                        #判定是标点符号或汉字,重设左上角纵坐标和高度
                        new_tl_y = (text_area.left_area.tl_y +
                                    text_area.right_area.tl_y) / 2
                        text_area.set_area_params(text_area.tl_x, new_tl_y,
                                                  lr_avg_h, text_area.width)
                #直接识别
                text, total_prop = self.text_area_analyzer.get_text(
                    text_area, original_image)
            #1.2.2 如果是边缘区域,则参照相邻区域行高进行分析
            #(1)如果该独立区域可能为标点符号或单字,则分别按照实际高度、参照行高进行识别,选取可能性较高的结果
            #(2)对于非(1)的情况,直接识别
            else:
                nb_height = 0
                nb_area = None
                ext_text = ''
                ext_prop = 0
                if text_area.left_area is not None:
                    nb_height = text_area.left_area.height
                    nb_area = text_area.left_area
                else:
                    nb_height = text_area.right_area.height
                    nb_area = text_area.right_area
                #当高度明显较小的情况,尝试调整高度识别
                if text_area.height < nb_height * 0.7 and (
                    ((text_area.width * 1.0 / text_area.height) < 1.2
                     and text_area.height < nb_height * 0.4) or
                    (text_area.width / nb_height < 1.2)):
                    new_tl_y = nb_area.tl_y
                    new_area = TextArea(text_area.image, text_area.tl_x,
                                        new_tl_y, nb_height, text_area.width,
                                        text_area.parent_area)
                    ext_text, ext_prop = self.text_area_analyzer.get_text(
                        new_area, original_image)
                #直接识别的结果
                text, total_prop = self.text_area_analyzer.get_text(
                    text_area, original_image)
                if total_prop < ext_prop:
                    text = ext_text
                    total_prop = ext_prop
            return text, total_prop
        else:
            return None, 0