Ejemplo n.º 1
0
 def plot_gt(self):
     ax = plt.gca()
     # groud truth polygones
     for p in self.gt_polygons:
         ax.add_patch(plt.Polygon(p, fill=False, edgecolor='y', linewidth=4))
     # groud truth rboxes
     rboxes = self.gt_rboxes
     for rbox in rboxes:
         box = rbox_to_polygon(rbox)
         ax.add_patch(plt.Polygon(box, fill=False, edgecolor='b', linewidth=2))
     plt.plot(rboxes[:,0], rboxes[:,1], 'go',  markersize=4)
Ejemplo n.º 2
0
 def plot_results(self, results=None, show_labels=False, color='r'):
     """Draw the combined bounding boxes."""
     if results is None:
         results = self.results
     ax = plt.gca()
     for r in results:
         rbox = r[:5]
         xy_rec = rbox_to_polygon(rbox)
         xy_rec = np.flip(xy_rec, axis=0) # TODO: fix this
         ax.add_patch(plt.Polygon(xy_rec, fill=False, edgecolor=color, linewidth=2))
         if show_labels:
             label_name = '%.2f' % (r[5],)
             plt.text(xy_rec[0,0], xy_rec[0,1],
                      label_name, rotation=rbox[4]/np.pi*180,
                      bbox={'facecolor':color, 'alpha':0.5})
Ejemplo n.º 3
0
def plot_rbox(box, color='r', linewidth=1):
    xy_rec = rbox_to_polygon(box)
    ax = plt.gca()
    ax.add_patch(
        plt.Polygon(xy_rec, fill=False, edgecolor=color, linewidth=linewidth))
Ejemplo n.º 4
0
    def encode(self, gt_data, debug=False):
        """Encode ground truth polygones to segments and links for local classification and regression.
        
        # Arguments
            gt_data: shape (boxes, 4 xy + classes)
        
        # Return
            shape (priors, 2 segment_labels + 5 segment_offsets + 2*8 inter_layer_links_labels + 2*4 cross_layer_links_labels)
        """

        rboxes = []
        polygons = []
        for word in gt_data:
            xy = np.reshape(word[:8], (-1, 2))
            xy = np.copy(xy) * (self.image_w, self.image_h)
            polygons.append(xy)
            rbox = polygon_to_rbox(xy)
            rboxes.append(rbox)
        rboxes = self.gt_rboxes = np.array(rboxes)
        polygnos = self.gt_polygons = np.array(polygons)

        # compute segments
        for i in range(len(self.prior_maps)):
            m = self.prior_maps[i]

            # compute priors
            #m.compute_priors()

            num_priors = len(m.priors)

            # assigne gt to priors
            a_l = m.minmax_size[0]
            match_indices = np.full(num_priors, -1, dtype=np.int32)
            min_lhs_eq_11 = np.full(num_priors, 1e6, dtype=np.float32)
            for j in range(len(rboxes)):  # ~12.9 ms
                cx, cy, w, h, theta = rboxes[j]
                c = rboxes[j, :2]
                # constraint on ratio between box size and word height, equation (11)
                lhs_eq_11 = max(a_l / h, h / a_l)
                if lhs_eq_11 <= 1.5:
                    R = rot_matrix(theta)
                    for k in range(num_priors):  # hurts
                        # is center of prior is in gt rbox
                        d = np.abs(np.dot(m.priors_xy[k] - c, R.T))
                        if d[0] < w / 2. and d[1] < h / 2.:
                            # is lhs of equation (11) minimal for prior
                            if lhs_eq_11 < min_lhs_eq_11[k]:
                                min_lhs_eq_11[k] = lhs_eq_11
                                match_indices[k] = j
            m.match_indices = match_indices

            segment_mask = match_indices != -1

            # segment labels
            m.segment_labels = np.empty((num_priors, 2), dtype=np.int8)
            m.segment_labels[:, 0] = np.logical_not(segment_mask)
            m.segment_labels[:, 1] = segment_mask

            # compute offsets only for assigned boxes
            m.segment_offsets = np.zeros((num_priors, 5))
            pos_segment_idxs = np.nonzero(segment_mask)[0]
            for j in pos_segment_idxs:  # box_idx # ~4 ms
                gt_idx = match_indices[j]
                rbox = rboxes[gt_idx]
                polygon = polygons[gt_idx]
                cx, cy, w, h, theta = rbox
                R = rot_matrix(theta)
                prior_x, prior_y = m.priors_xy[j]
                prior_w, prior_h = m.priors_wh[j]

                # step 2 figuer 5, rotate word anticlockwise around the center of prior
                d = rbox[:2] - m.priors_xy[j]
                #poly_loc = rbox_to_polygon([*d, w, h, theta])
                poly_loc = rbox_to_polygon(list(d) + [w, h, theta])
                poly_loc_easy = polygon - m.priors_xy[j]

                poly_loc_rot = np.dot(poly_loc, R.T)

                # step 3 figure 5, crop word to left and right of prior
                poly_loc_croped = np.copy(poly_loc_rot)
                poly_loc_croped[:, 0] = np.clip(poly_loc_croped[:, 0],
                                                -prior_w / 2., prior_w / 2.)

                # step 4 figure 5, rotate croped word box clockwisely
                poly_loc_rot_back = np.dot(poly_loc_croped, R)
                rbox_loc_rot_back = polygon_to_rbox(poly_loc_rot_back)

                # encode, solve (3) to (7) to get local offsets
                #offset = np.array([*(rbox_loc_rot_back[:2]/a_l),
                #                   *(np.log(rbox_loc_rot_back[2:4]/a_l)),
                #                   rbox_loc_rot_back[4]])
                offset = np.array(
                    list(rbox_loc_rot_back[:2] / a_l) +
                    list(np.log(rbox_loc_rot_back[2:4] / a_l)) +
                    [rbox_loc_rot_back[4]])
                offset[:4] /= m.priors[j, -4:]  # variances
                m.segment_offsets[j] = offset

                # for debugging local geometry
                if debug:
                    prior_poly_loc = np.array(
                        [[-prior_w, +prior_h], [+prior_w, +prior_h],
                         [+prior_w, -prior_h], [-prior_w, -prior_h]]) / 2.
                    plt.figure(figsize=[10] * 2)
                    ax = plt.gca()
                    ax.add_patch(
                        plt.Polygon(prior_poly_loc,
                                    fill=False,
                                    edgecolor='r',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc,
                                    fill=False,
                                    edgecolor='b',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(np.dot(poly_loc, R.T),
                                    fill=False,
                                    edgecolor='k',
                                    linewidth=1))
                    #ax.add_patch(plt.Polygon(poly_loc_easy, fill=False, edgecolor='r', linewidth=1))
                    #ax.add_patch(plt.Polygon(np.dot(poly_loc_easy, R.T), fill=False, edgecolor='y', linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc_croped,
                                    fill=False,
                                    edgecolor='c',
                                    linewidth=1))
                    ax.add_patch(
                        plt.Polygon(poly_loc_rot_back,
                                    fill=False,
                                    edgecolor='y',
                                    linewidth=1))
                    lim = 50
                    plt.xlim(-lim, lim)
                    plt.ylim(-lim, lim)
                    plt.grid()
                    plt.show()
                    break

            # compute link labels
            m.inter_layer_links_labels = np.zeros((num_priors, 16),
                                                  dtype=np.int8)
            m.cross_layer_links_labels = np.zeros((num_priors, 8),
                                                  dtype=np.int8)
            if i > 0:
                previous_map = self.prior_maps[i - 1]
            # we only have to check neighbors if we are positive
            for idx in pos_segment_idxs:
                neighbor_idxs = m.inter_layer_neighbors_idxs[idx]
                for n, neighbor_idx in enumerate(neighbor_idxs):
                    # valid neighbors
                    if m.inter_layer_neighbors_valid[idx, n]:
                        # neighbor matched to the same word
                        if match_indices[idx] == match_indices[neighbor_idx]:
                            # since we are positive and match to the same word, neighbor has to be positive
                            m.inter_layer_links_labels[idx, n * 2 + 1] = 1
                # would be nice, but we refere to invalid neighbors
                #label = m.inter_layer_neighbors_valid[idx] & (match_indices[neighbor_idxs] == match_indices[idx])
                #m.inter_layer_links_labels[idx, 1::2] = label

                if i > 0:
                    neighbor_idxs = m.cross_layer_neighbors_idxs[idx]
                    for n, neighbor_idx in enumerate(neighbor_idxs):
                        # cross layer neighbors are always valid
                        if match_indices[idx] == previous_map.match_indices[
                                neighbor_idx]:
                            m.cross_layer_links_labels[idx, n * 2 + 1] = 1

            m.inter_layer_links_labels[:, ::2] = np.logical_not(
                m.inter_layer_links_labels[:, 1::2])
            m.cross_layer_links_labels[:, ::2] = np.logical_not(
                m.cross_layer_links_labels[:, 1::2])

        # collect encoded ground truth
        maps = self.prior_maps
        segment_labels = np.concatenate([m.segment_labels for m in maps])
        segment_offsets = np.concatenate([m.segment_offsets for m in maps])
        inter_layer_links_labels = np.concatenate(
            [m.inter_layer_links_labels for m in maps])
        cross_layer_links_labels = np.concatenate(
            [m.cross_layer_links_labels for m in maps])
        return np.concatenate([
            segment_labels, segment_offsets, inter_layer_links_labels,
            cross_layer_links_labels
        ],
                              axis=1)