Esempio n. 1
0
def crop_words(img, boxes, height, width=None, grayscale=True):
    """
    
    # Note
        make sure that the vertices of all boxes are inside the image
    """

    words = []
    #소영추가
    vac = []
    for j in range(len(boxes)):
        h, w = img.shape[:2]

        # polygon case
        box = np.reshape(boxes[j], (-1, 2))
        rbox = polygon_to_rbox(box)
        if len(rbox) == 1:
            vac.append(j)
            continue

        word_w, word_h = rbox[2] * w, rbox[3] * h

        word_ar = word_w / word_h
        word_h = int(height)
        word_w = int(round(height * word_ar))

        src = np.asarray(box * [w, h], np.float32)
        dst = np.array([[0, 0], [word_w, 0], [word_w, word_h], [0, word_h]],
                       dtype=np.float32)
        M = cv2.getPerspectiveTransform(src, dst)

        word = cv2.warpPerspective(img,
                                   M, (word_w, word_h),
                                   flags=cv2.INTER_CUBIC)

        if grayscale:
            word = cv2.cvtColor(word, cv2.COLOR_BGR2GRAY)
            word = cv2.normalize(word,
                                 word,
                                 alpha=0,
                                 beta=255,
                                 norm_type=cv2.NORM_MINMAX)
            word = word[:, :, None]

        word = word.astype(np.float32)

        if width is not None:
            tmp_word = word[:, :width, :]
            word = np.ones([height, width, tmp_word.shape[2]])
            word[:, slice(0, tmp_word.shape[1]), :] = tmp_word

        words.append(word)
    return words, vac
Esempio n. 2
0
def crop_words(img, boxes, height, width=None, grayscale=True):
    """
    
    # Note
        make sure that the vertices of all boxes are inside the image
    """

    #plt.figure(figsize=[12]*2)
    #plt.imshow(img[:, :, (2,1,0)]/255)
    #self.plot_gt(i, show_labels=False)
    #plt.show()

    words = []
    for j in range(len(boxes)):
        h, w = img.shape[:2]
        if boxes.shape[1] == 4:
            # box case
            box = np.round(boxes[j] * [w, h, w, h]).astype(np.int32)
            xmin, ymin, xmax, ymax = box
            word_w, word_h = xmax - xmin, ymax - ymin
            word_ar = word_w / word_h
            word_h = int(height)
            word_w = int(round(height * word_ar))

            word = img[ymin:ymax, xmin:xmax, :]
            word = cv2.resize(word, (word_w, word_h),
                              interpolation=cv2.INTER_CUBIC)
        else:
            # polygon case
            box = np.reshape(boxes[j], (-1, 2))
            rbox = polygon_to_rbox(box)
            word_w, word_h = rbox[2] * w, rbox[3] * h
            word_ar = word_w / word_h
            word_h = int(height)
            word_w = int(round(height * word_ar))

            src = np.asarray(box * [w, h], np.float32)
            dst = np.array(
                [[0, 0], [word_w, 0], [word_w, word_h], [0, word_h]],
                dtype=np.float32)
            M = cv2.getPerspectiveTransform(src, dst)

            word = cv2.warpPerspective(img,
                                       M, (word_w, word_h),
                                       flags=cv2.INTER_CUBIC)

        if grayscale:
            word = cv2.cvtColor(word, cv2.COLOR_BGR2GRAY)
            word = cv2.normalize(word,
                                 word,
                                 alpha=0,
                                 beta=255,
                                 norm_type=cv2.NORM_MINMAX)
            word = word[:, :, None]

        word = word.astype(np.float32)

        if width is not None:
            tmp_word = word[:, :width, :]
            word = np.zeros([height, width, tmp_word.shape[2]])
            word[:, slice(0, tmp_word.shape[1]), :] = tmp_word

        words.append(word)
    return words
Esempio n. 3
0
    def encode(self, gt_data, debug=False):
        """Encode ground truth polygones to segments and links for local classification and regression.
        
        # Arguments
            gt_data: shape (boxes, 4 xy + classes)
        
        # Return
            shape (priors, 2 segment_labels + 5 segment_offsets + 2*8 inter_layer_links_labels + 2*4 cross_layer_links_labels)
        """

        rboxes = []
        polygons = []
        for word in gt_data:
            xy = np.reshape(word[:8], (-1, 2))
            xy = np.copy(xy) * (self.image_w, self.image_h)
            polygons.append(xy)
            rbox = polygon_to_rbox(xy)
            rboxes.append(rbox)
        rboxes = self.gt_rboxes = np.array(rboxes)
        polygnos = self.gt_polygons = np.array(polygons)

        # compute segments
        for i in range(len(self.prior_maps)):
            m = self.prior_maps[i]

            # compute priors
            #m.compute_priors()

            num_priors = len(m.priors)

            # assigne gt to priors
            a_l = m.minmax_size[0]
            match_indices = np.full(num_priors, -1, dtype=np.int32)
            min_lhs_eq_11 = np.full(num_priors, 1e6, dtype=np.float32)
            for j in range(len(rboxes)):  # ~12.9 ms
                cx, cy, w, h, theta = rboxes[j]
                c = rboxes[j, :2]
                # constraint on ratio between box size and word height, equation (11)
                lhs_eq_11 = max(a_l / h, h / a_l)
                if lhs_eq_11 <= 1.5:
                    R = rot_matrix(theta)
                    for k in range(num_priors):  # hurts
                        # is center of prior is in gt rbox
                        d = np.abs(np.dot(m.priors_xy[k] - c, R.T))
                        if d[0] < w / 2. and d[1] < h / 2.:
                            # is lhs of equation (11) minimal for prior
                            if lhs_eq_11 < min_lhs_eq_11[k]:
                                min_lhs_eq_11[k] = lhs_eq_11
                                match_indices[k] = j
            m.match_indices = match_indices

            segment_mask = match_indices != -1

            # segment labels
            m.segment_labels = np.empty((num_priors, 2), dtype=np.int8)
            m.segment_labels[:, 0] = np.logical_not(segment_mask)
            m.segment_labels[:, 1] = segment_mask

            # compute offsets only for assigned boxes
            m.segment_offsets = np.zeros((num_priors, 5))
            pos_segment_idxs = np.nonzero(segment_mask)[0]
            for j in pos_segment_idxs:  # box_idx # ~4 ms
                gt_idx = match_indices[j]
                rbox = rboxes[gt_idx]
                polygon = polygons[gt_idx]
                cx, cy, w, h, theta = rbox
                R = rot_matrix(theta)
                prior_x, prior_y = m.priors_xy[j]
                prior_w, prior_h = m.priors_wh[j]

                # step 2 figuer 5, rotate word anticlockwise around the center of prior
                d = rbox[:2] - m.priors_xy[j]
                #poly_loc = rbox_to_polygon([*d, w, h, theta])
                poly_loc = rbox_to_polygon(list(d) + [w, h, theta])
                poly_loc_easy = polygon - m.priors_xy[j]

                poly_loc_rot = np.dot(poly_loc, R.T)

                # step 3 figure 5, crop word to left and right of prior
                poly_loc_croped = np.copy(poly_loc_rot)
                poly_loc_croped[:, 0] = np.clip(poly_loc_croped[:, 0],
                                                -prior_w / 2., prior_w / 2.)

                # step 4 figure 5, rotate croped word box clockwisely
                poly_loc_rot_back = np.dot(poly_loc_croped, R)
                rbox_loc_rot_back = polygon_to_rbox(poly_loc_rot_back)

                # encode, solve (3) to (7) to get local offsets
                #offset = np.array([*(rbox_loc_rot_back[:2]/a_l),
                #                   *(np.log(rbox_loc_rot_back[2:4]/a_l)),
                #                   rbox_loc_rot_back[4]])
                offset = np.array(
                    list(rbox_loc_rot_back[:2] / a_l) +
                    list(np.log(rbox_loc_rot_back[2:4] / a_l)) +
                    [rbox_loc_rot_back[4]])
                offset[:4] /= m.priors[j, -4:]  # variances
                m.segment_offsets[j] = offset

                # for debugging local geometry
                if debug:
                    prior_poly_loc = np.array(
                        [[-prior_w, +prior_h], [+prior_w, +prior_h],
                         [+prior_w, -prior_h], [-prior_w, -prior_h]]) / 2.
                    plt.figure(figsize=[10] * 2)
                    ax = plt.gca()
                    ax.add_patch(
                        plt.Polygon(prior_poly_loc,
                                    fill=False,
                                    edgecolor='r',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc,
                                    fill=False,
                                    edgecolor='b',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(np.dot(poly_loc, R.T),
                                    fill=False,
                                    edgecolor='k',
                                    linewidth=1))
                    #ax.add_patch(plt.Polygon(poly_loc_easy, fill=False, edgecolor='r', linewidth=1))
                    #ax.add_patch(plt.Polygon(np.dot(poly_loc_easy, R.T), fill=False, edgecolor='y', linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc_croped,
                                    fill=False,
                                    edgecolor='c',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc_rot_back,
                                    fill=False,
                                    edgecolor='y',
                                    linewidth=1))
                    lim = 50
                    plt.xlim(-lim, lim)
                    plt.ylim(-lim, lim)
                    plt.grid()
                    plt.show()
                    break

            # compute link labels
            m.inter_layer_links_labels = np.zeros((num_priors, 16),
                                                  dtype=np.int8)
            m.cross_layer_links_labels = np.zeros((num_priors, 8),
                                                  dtype=np.int8)
            if i > 0:
                previous_map = self.prior_maps[i - 1]
            # we only have to check neighbors if we are positive
            for idx in pos_segment_idxs:
                neighbor_idxs = m.inter_layer_neighbors_idxs[idx]
                for n, neighbor_idx in enumerate(neighbor_idxs):
                    # valid neighbors
                    if m.inter_layer_neighbors_valid[idx, n]:
                        # neighbor matched to the same word
                        if match_indices[idx] == match_indices[neighbor_idx]:
                            # since we are positive and match to the same word, neighbor has to be positive
                            m.inter_layer_links_labels[idx, n * 2 + 1] = 1
                # would be nice, but we refere to invalid neighbors
                #label = m.inter_layer_neighbors_valid[idx] & (match_indices[neighbor_idxs] == match_indices[idx])
                #m.inter_layer_links_labels[idx, 1::2] = label

                if i > 0:
                    neighbor_idxs = m.cross_layer_neighbors_idxs[idx]
                    for n, neighbor_idx in enumerate(neighbor_idxs):
                        # cross layer neighbors are always valid
                        if match_indices[idx] == previous_map.match_indices[
                                neighbor_idx]:
                            m.cross_layer_links_labels[idx, n * 2 + 1] = 1

            m.inter_layer_links_labels[:, ::2] = np.logical_not(
                m.inter_layer_links_labels[:, 1::2])
            m.cross_layer_links_labels[:, ::2] = np.logical_not(
                m.cross_layer_links_labels[:, 1::2])

        # collect encoded ground truth
        maps = self.prior_maps
        segment_labels = np.concatenate([m.segment_labels for m in maps])
        segment_offsets = np.concatenate([m.segment_offsets for m in maps])
        inter_layer_links_labels = np.concatenate(
            [m.inter_layer_links_labels for m in maps])
        cross_layer_links_labels = np.concatenate(
            [m.cross_layer_links_labels for m in maps])
        return np.concatenate([
            segment_labels, segment_offsets, inter_layer_links_labels,
            cross_layer_links_labels
        ],
                              axis=1)