コード例 #1
0
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph = self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, im_size):
        # tp=text proposal
        tp_groups = self.group_text_proposals(text_proposals, scores, im_size)
        text_lines = np.zeros((len(tp_groups), 8), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]
            num = np.size(text_line_boxes)
            X = (text_line_boxes[:, 0] + text_line_boxes[:, 2]) / 2
            Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2
            z1 = np.polyfit(X, Y, 1)
            p1 = np.poly1d(z1)

            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])

            offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5

            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score = scores[list(tp_indices)].sum() / float(len(tp_indices))

            text_lines[index, 0] = x0
            text_lines[index, 1] = min(lt_y, rt_y)
            text_lines[index, 2] = x1
            text_lines[index, 3] = max(lb_y, rb_y)
            text_lines[index, 4] = score
            text_lines[index, 5] = z1[0]
            text_lines[index, 6] = z1[1]
            height = np.mean((text_line_boxes[:, 3] - text_line_boxes[:, 1]))
            text_lines[index, 7] = height + 2.5
        #text_lines=clip_boxes(text_lines, im_size)

        return text_lines
コード例 #2
0
ファイル: text_proposal_connector.py プロジェクト: ttyhu/ocr
class TextProposalConnector:
    def __init__(self):
        self.graph_builder=TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X)!=0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X==X[0])==len(X):
            return Y[0], Y[0]
        p=np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, im_size):
        # tp=text proposal
        tp_groups=self.group_text_proposals(text_proposals, scores, im_size)
        text_lines=np.zeros((len(tp_groups), 5), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes=text_proposals[list(tp_indices)]

            x0=np.min(text_line_boxes[:, 0])
            x1=np.max(text_line_boxes[:, 2])

            offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5

            lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
            lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score=scores[list(tp_indices)].sum()/float(len(tp_indices))

            text_lines[index, 0]=x0
            text_lines[index, 1]=min(lt_y, rt_y)
            text_lines[index, 2]=x1
            text_lines[index, 3]=max(lb_y, rb_y)
            text_lines[index, 4]=score

        text_lines=clip_boxes(text_lines, im_size)

        text_recs = np.zeros((len(text_lines), 9), np.float)
        index = 0
        for line in text_lines:
            xmin,ymin,xmax,ymax=line[0],line[1],line[2],line[3]
            text_recs[index, 0] = xmin
            text_recs[index, 1] = ymin
            text_recs[index, 2] = xmax
            text_recs[index, 3] = ymin
            text_recs[index, 4] = xmin
            text_recs[index, 5] = ymax
            text_recs[index, 6] = xmax
            text_recs[index, 7] = ymax
            text_recs[index, 8] = line[4]
            index = index + 1

        return text_recs
コード例 #3
0
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder=TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X)!=0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X==X[0])==len(X):
            return Y[0], Y[0]
        p=np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, im_size):
        # tp=text proposal
        tp_groups=self.group_text_proposals(text_proposals, scores, im_size)
        text_lines=np.zeros((len(tp_groups), 8), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes=text_proposals[list(tp_indices)]
            num = np.size(text_line_boxes)
            X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2
            Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
            z1 = np.polyfit(X,Y,1)
            p1 = np.poly1d(z1)


            x0=np.min(text_line_boxes[:, 0])
            x1=np.max(text_line_boxes[:, 2])

            offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5

            lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
            lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score=scores[list(tp_indices)].sum()/float(len(tp_indices))

            text_lines[index, 0]=x0
            text_lines[index, 1]=min(lt_y, rt_y)
            text_lines[index, 2]=x1
            text_lines[index, 3]=max(lb_y, rb_y)
            text_lines[index, 4]=score
            text_lines[index, 5]=z1[0]
            text_lines[index, 6]=z1[1]
            height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )
            text_lines[index, 7]= height + 2.5
        #text_lines=clip_boxes(text_lines, im_size)


        return text_lines
コード例 #4
0
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph = self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    #leastaq
    def get_text_lines_leastaq(self, text_proposals, scores, im_size):
        tp_groups = self.group_text_proposals(text_proposals, scores, im_size)
        text_lines = np.zeros((len(tp_groups), 9), np.float32)
        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]
            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])
            p0 = [x0, x1]
            Para1 = leastsq(error,
                            p0,
                            args=(np.array(text_line_boxes[:, 0]),
                                  np.array(text_line_boxes[:, 1])))
            Para2 = leastsq(error,
                            p0,
                            args=(np.array(text_line_boxes[:, 2]),
                                  np.array(text_line_boxes[:, 3])))
            k1, b1 = Para1[0]
            k2, b2 = Para2[0]
            y10 = k1 * x0 + b1
            y11 = k1 * x1 + b1
            y20 = k2 * x0 + b2
            y21 = k2 * x1 + b2
            score = scores[list(tp_indices)].sum() / float(len(tp_indices))
            text_lines[index, 0] = x0
            text_lines[index, 1] = y10
            text_lines[index, 2] = x1
            text_lines[index, 3] = y11
            text_lines[index, 4] = x0
            text_lines[index, 5] = y20
            text_lines[index, 6] = x1
            text_lines[index, 7] = y21
            text_lines[index, 8] = score
        return text_lines

    def get_text_lines(self, text_proposals, scores, im_size):
        # tp=text proposal
        tp_groups = self.group_text_proposals(text_proposals, scores, im_size)
        text_lines = np.zeros((len(tp_groups), 5), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]

            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])

            offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5

            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score = scores[list(tp_indices)].sum() / float(len(tp_indices))

            text_lines[index, 0] = x0
            text_lines[index, 1] = min(lt_y, rt_y)
            text_lines[index, 2] = x1
            text_lines[index, 3] = max(lb_y, rb_y)
            text_lines[index, 4] = score

        text_lines = clip_boxes(text_lines, im_size)

        return text_lines
コード例 #5
0
class TextProposalConnectorOriented:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph = self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, im_size):
        """
        text_proposals:boxes
        
        """
        # tp=text proposal
        tp_groups = self.group_text_proposals(text_proposals, scores,
                                              im_size)  #首先还是建图,获取到文本行由哪几个小框构成

        text_lines = np.zeros((len(tp_groups), 8), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]  #每个文本行的全部小框
            X = (text_line_boxes[:, 0] +
                 text_line_boxes[:, 2]) / 2  # 求每一个小框的中心x,y坐标
            Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2

            z1 = np.polyfit(X, Y, 1)  #多项式拟合,根据之前求的中心店拟合一条直线(最小二乘)

            x0 = np.min(text_line_boxes[:, 0])  #文本行x坐标最小值
            x1 = np.max(text_line_boxes[:, 2])  #文本行x坐标最大值

            offset = (text_line_boxes[0, 2] -
                      text_line_boxes[0, 0]) * 0.5  #小框宽度的一半

            # 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            # 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            score = scores[list(tp_indices)].sum() / float(
                len(tp_indices))  #求全部小框得分的均值作为文本行的均值

            text_lines[index, 0] = x0
            text_lines[index, 1] = min(lt_y, rt_y)  #文本行上端 线段 的y坐标的小值
            text_lines[index, 2] = x1
            text_lines[index, 3] = max(lb_y, rb_y)  #文本行下端 线段 的y坐标的大值
            text_lines[index, 4] = score  #文本行得分
            text_lines[index, 5] = z1[0]  #根据中心点拟合的直线的k,b
            text_lines[index, 6] = z1[1]
            height = np.mean(
                (text_line_boxes[:, 3] - text_line_boxes[:, 1]))  #小框平均高度
            text_lines[index, 7] = height + 2.5

        text_recs = np.zeros((len(text_lines), 9), np.float)
        index = 0
        for line in text_lines:
            b1 = line[6] - line[7] / 2  # 根据高度和文本行中心线,求取文本行上下两条线的b值
            b2 = line[6] + line[7] / 2
            x1 = line[0]
            y1 = line[5] * line[0] + b1  # 左上
            x2 = line[2]
            y2 = line[5] * line[2] + b1  # 右上
            x3 = line[0]
            y3 = line[5] * line[0] + b2  # 左下
            x4 = line[2]
            y4 = line[5] * line[2] + b2  # 右下
            disX = x2 - x1
            disY = y2 - y1
            width = np.sqrt(disX * disX + disY * disY)  # 文本行宽度

            fTmp0 = y3 - y1  # 文本行高度
            fTmp1 = fTmp0 * disY / width
            x = np.fabs(fTmp1 * disX / width)  # 做补偿
            y = np.fabs(fTmp1 * disY / width)
            if line[5] < 0:
                x1 -= x
                y1 += y
                x4 += x
                y4 -= y
            else:
                x2 += x
                y2 += y
                x3 -= x
                y3 -= y
            text_recs[index, 0] = x1
            text_recs[index, 1] = y1
            text_recs[index, 2] = x2
            text_recs[index, 3] = y2
            text_recs[index, 4] = x3
            text_recs[index, 5] = y3
            text_recs[index, 6] = x4
            text_recs[index, 7] = y4
            text_recs[index, 8] = line[4]
            index = index + 1

        return text_recs
コード例 #6
0
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, char_boxes, char_labels, im_size):
        graph = self.graph_builder.build_graph(char_boxes, char_labels,
                                               im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, char_boxes, char_labels, im_size):
        # tp=text proposal
        # the subgraph of graph, type is 'list of list '
        tp_groups = self.group_text_proposals(char_boxes, char_labels, im_size)
        text_lines = np.zeros((len(tp_groups), 4), np.float32)

        text_lines_label = [''] * len(text_lines)
        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = char_boxes[list(tp_indices)]

            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])

            offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5

            #rectify the text proposals to a line which uses ployfit's method, then enlarge
            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line

            y0 = min(lt_y, rt_y)
            y1 = max(lb_y, rb_y)
            if np.arctan(
                (text_line_boxes[-1, 1] - y0) /
                (text_line_boxes[-1, 2] - x0)) > pi * cfg.MAX_ANGLE / 180:
                print 'current angle is', np.arctan(
                    (text_line_boxes[-1, 3] - y0) /
                    (text_line_boxes[-1, 2] - x0)) / pi * 180
                x0, y0, x1, y1 = 0, 0, 0, 0
            text_lines[index, 0] = x0
            text_lines[index, 1] = y0
            text_lines[index, 2] = x1
            text_lines[index, 3] = y1

            for tp_indice in tp_indices:
                text_lines_label[index] += char_labels[tp_indice]

        text_lines = clip_boxes(text_lines, im_size)

        return text_lines, text_lines_label
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph = self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, im_size, sides):
        # tp=text proposal
        tp_groups = self.group_text_proposals(text_proposals, scores, im_size)
        text_lines = np.zeros((len(tp_groups), 5), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]
            line_side = sides[list(tp_indices)]

            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])

            for i in range(len(text_line_boxes)):
                if text_line_boxes[i][0] == x0:
                    x0_index = i
                if text_line_boxes[i][2] == x1:
                    x1_index = i

            x0 = line_side[x0_index] * cfg.TEXT_PROPOSALS_WIDTH + (
                text_line_boxes[x0_index][0] +
                text_line_boxes[x0_index][2]) / 2.0
            x1 = line_side[x1_index] * cfg.TEXT_PROPOSALS_WIDTH + (
                text_line_boxes[x1_index][0] +
                text_line_boxes[x1_index][2]) / 2.0

            x0 = int(x0)
            x1 = int(x1)

            offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5

            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0 + offset, x1 - offset)
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0 + offset, x1 - offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score = scores[list(tp_indices)].sum() / float(len(tp_indices))

            text_lines[index, 0] = x0
            text_lines[index, 1] = min(lt_y, rt_y)
            text_lines[index, 2] = x1
            text_lines[index, 3] = max(lb_y, rb_y)
            text_lines[index, 4] = score

        text_lines = clip_boxes(text_lines, im_size)

        return text_lines
コード例 #8
0
class TextProposalConnector:
    """
        Connect text proposals into text lines
    """
    def __init__(self):
        self.graph_builder = TextProposalGraphBuilder()

    def group_text_proposals(self, text_proposals, scores, im_size):
        graph = self.graph_builder.build_graph(text_proposals, scores, im_size)
        return graph.sub_graphs_connected()

    def fit_y(self, X, Y, x1, x2):
        len(X) != 0
        # if X only include one point, the function will get line y=Y[0]
        if np.sum(X == X[0]) == len(X):
            return Y[0], Y[0]
        p = np.poly1d(np.polyfit(X, Y, 1))
        return p(x1), p(x2)

    def get_text_lines(self, text_proposals, scores, xsides, im_size):
        tp_groups = self.group_text_proposals(text_proposals, scores, im_size)
        text_lines = np.zeros((len(tp_groups), 5), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes = text_proposals[list(tp_indices)]
            left_anchor = text_proposals[tp_indices[0]]
            right_anchor = text_proposals[tp_indices[-1]]

            w_a_left = left_anchor[2] - left_anchor[0] + 1.
            w_a_right = right_anchor[2] - right_anchor[0] + 1.
            ctr_a_left = left_anchor[0] + w_a_left * 0.5
            ctr_a_right = right_anchor[0] + w_a_right * 0.5

            left_xside_pre = xsides[tp_indices[0]][0]
            right_xside_pre = xsides[tp_indices[-1]][0]
            x0_refined = left_xside_pre * w_a_left + ctr_a_left
            x1_refined = right_xside_pre * w_a_right + ctr_a_right

            x0 = np.min(text_line_boxes[:, 0])
            x1 = np.max(text_line_boxes[:, 2])

            offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5
            lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           1],
                                    x0_refined + offset, x1_refined - offset)
            lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], text_line_boxes[:,
                                                                           3],
                                    x0_refined + offset, x1_refined - offset)

            # the score of a text line is the average score of the scores
            # of all text proposals contained in the text line
            score = scores[list(tp_indices)].sum() / float(len(tp_indices))

            text_lines[index, 0] = x0_refined
            text_lines[index, 1] = min(lt_y, rt_y)
            text_lines[index, 2] = x1_refined
            text_lines[index, 3] = max(lb_y, rb_y)
            text_lines[index, 4] = score

        text_lines = clip_boxes(text_lines, im_size)

        return text_lines