def generate_rbox(im_size, polys, tags): """ score map is (128, 128, 1) with shrinked poly poly mask is (128, 128, 1) with differnt colors geo map is (128, 128, 5) with """ h, w = im_size poly_mask = np.zeros((h, w), dtype=np.uint8) score_map = np.zeros((h, w), dtype=np.uint8) geo_map = np.zeros((h, w, 5), dtype=np.float32) # mask used during traning, to ignore some hard areas training_mask = np.ones((h, w), dtype=np.uint8) for poly_idx, poly_tag in enumerate(zip(polys, tags)): poly = poly_tag[0] tag = poly_tag[1] poly = np.array(poly) tag = np.array(tag) r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) # score map shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :] cv2.fillPoly(score_map, shrinked_poly, 1) # use different color to draw poly mask cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) # if the poly is too small, then ignore it during training poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) # if min(poly_h, poly_w) < FLAGS.min_text_size: if min(poly_h, poly_w) < 10: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) if tag: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) # if geometry == 'RBOX': # 对任意两个顶点的组合生成一个平行四边形 fitted_parallelograms = [] for i in range(4): p0 = poly[i] p1 = poly[(i + 1) % 4] p2 = poly[(i + 2) % 4] p3 = poly[(i + 3) % 4] # fit_line ([x1, x2], [y1, y2]) return k, -1, b just a line edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) # p0, p1 backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) # p0, p3 forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) # p1, p2 # select shorter line if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3): # 平行线经过p2 if edge[1] == 0: # verticle edge_opposite = [1, 0, -p2[0]] else: edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]] else: # 经过p3 if edge[1] == 0: edge_opposite = [1, 0, -p3[0]] else: edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]] # move forward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p2 = line_cross_point(forward_edge, edge_opposite) if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line( p1, new_p2, p3): # across p0 if forward_edge[1] == 0: forward_opposite = [1, 0, -p0[0]] else: forward_opposite = [ forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0] ] else: # across p3 if forward_edge[1] == 0: forward_opposite = [1, 0, -p3[0]] else: forward_opposite = [ forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0] ] new_p0 = line_cross_point(forward_opposite, edge) new_p3 = line_cross_point(forward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) # or move backward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p3 = line_cross_point(backward_edge, edge_opposite) if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2): # across p1 if backward_edge[1] == 0: backward_opposite = [1, 0, -p1[0]] else: backward_opposite = [ backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0] ] else: # across p2 if backward_edge[1] == 0: backward_opposite = [1, 0, -p2[0]] else: backward_opposite = [ backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0] ] new_p1 = line_cross_point(backward_opposite, edge) new_p2 = line_cross_point(backward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) areas = [Polygon(t).area for t in fitted_parallelograms] parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32) # sort thie polygon parallelogram_coord_sum = np.sum(parallelogram, axis=1) min_coord_idx = np.argmin(parallelogram_coord_sum) parallelogram = parallelogram[[ min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4 ]] rectange = rectangle_from_parallelogram(parallelogram) rectange, rotate_angle = sort_rectangle(rectange) # print('parallel {} rectangle {}'.format(parallelogram, rectange)) p0_rect, p1_rect, p2_rect, p3_rect = rectange # this is one area of many """ for y, x in xy_in_poly: point = np.array([x, y], dtype=np.float32) # top geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point) # right geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point) # down geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point) # left geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point) # angle geo_map[y, x, 4] = rotate_angle """ gen_geo_map.gen_geo_map(geo_map, xy_in_poly, rectange, rotate_angle) ###sum up # score_map , in shrinked poly is 1 # geo_map, corresponding to score map # training map is less than geo_map return score_map, geo_map, training_mask
def generate_rbox(im_size, polys, tags): h, w = im_size poly_mask = np.zeros((h, w), dtype=np.uint8) score_map = np.zeros((h, w), dtype=np.uint8) geo_map = np.zeros((h, w, 5), dtype=np.float32) # mask used during traning, to ignore some hard areas training_mask = np.ones((h, w), dtype=np.uint8) for poly_idx, poly_tag in enumerate(zip(polys, tags)): poly = poly_tag[0] tag = poly_tag[1] r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) # score map shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :] cv2.fillPoly(score_map, shrinked_poly, 1) cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) # if the poly is too small, then ignore it during training poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) if min(poly_h, poly_w) < FLAGS.min_text_size: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) if tag: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) # if geometry == 'RBOX': # 对任意两个顶点的组合生成一个平行四边形 - generate a parallelogram for any combination of two vertices fitted_parallelograms = [] for i in range(4): p0 = poly[i] p1 = poly[(i + 1) % 4] p2 = poly[(i + 2) % 4] p3 = poly[(i + 3) % 4] edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3): # 平行线经过p2 - parallel lines through p2 if edge[1] == 0: edge_opposite = [1, 0, -p2[0]] else: edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]] else: # 经过p3 - after p3 if edge[1] == 0: edge_opposite = [1, 0, -p3[0]] else: edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]] # move forward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p2 = line_cross_point(forward_edge, edge_opposite) if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line( p1, new_p2, p3): # across p0 if forward_edge[1] == 0: forward_opposite = [1, 0, -p0[0]] else: forward_opposite = [ forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0] ] else: # across p3 if forward_edge[1] == 0: forward_opposite = [1, 0, -p3[0]] else: forward_opposite = [ forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0] ] new_p0 = line_cross_point(forward_opposite, edge) new_p3 = line_cross_point(forward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) # or move backward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p3 = line_cross_point(backward_edge, edge_opposite) if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2): # across p1 if backward_edge[1] == 0: backward_opposite = [1, 0, -p1[0]] else: backward_opposite = [ backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0] ] else: # across p2 if backward_edge[1] == 0: backward_opposite = [1, 0, -p2[0]] else: backward_opposite = [ backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0] ] new_p1 = line_cross_point(backward_opposite, edge) new_p2 = line_cross_point(backward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) areas = [Polygon(t).area for t in fitted_parallelograms] parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32) # sort thie polygon parallelogram_coord_sum = np.sum(parallelogram, axis=1) min_coord_idx = np.argmin(parallelogram_coord_sum) parallelogram = parallelogram[[ min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4 ]] rectange = rectangle_from_parallelogram(parallelogram) rectange, rotate_angle = sort_rectangle(rectange) p0_rect, p1_rect, p2_rect, p3_rect = rectange gen_geo_map.gen_geo_map(geo_map, xy_in_poly, rectange, rotate_angle) return score_map, geo_map, training_mask
def generate_rbox(im_size, polys, tags, min_text_size=10): """ Generate rbox. """ h, w = im_size poly_mask = np.zeros((h, w), dtype=np.uint8) score_map = np.zeros((h, w), dtype=np.uint8) geo_map = np.zeros((h, w, 5), dtype=np.float32) # mask used during traning, to ignore some hard areas training_mask = np.ones((h, w), dtype=np.uint8) for poly_idx, poly_tag in enumerate(zip(polys, tags)): poly = poly_tag[0] tag = poly_tag[1] r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) # score map shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :] cv2.fillPoly(score_map, shrinked_poly, 1) cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) # if the poly is too small, then ignore it during training poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) if min(poly_h, poly_w) < min_text_size: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) if tag: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) # if geometry == 'RBOX': # Generate a parallelogram for each pair of vertices. fitted_parallelograms = [] for i in range(4): p0 = poly[i] p1 = poly[(i + 1) % 4] p2 = poly[(i + 2) % 4] p3 = poly[(i + 3) % 4] edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3): # Pass through p2 if edge[1] == 0: edge_opposite = [1, 0, -p2[0]] else: edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]] else: # Pass through p3 if edge[1] == 0: edge_opposite = [1, 0, -p3[0]] else: edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]] # Move forward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p2 = line_cross_point(forward_edge, edge_opposite) if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line( p1, new_p2, p3): # Pass through p0 if forward_edge[1] == 0: forward_opposite = [1, 0, -p0[0]] else: forward_opposite = [ forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0] ] else: # Pass through p3 if forward_edge[1] == 0: forward_opposite = [1, 0, -p3[0]] else: forward_opposite = [ forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0] ] new_p0 = line_cross_point(forward_opposite, edge) new_p3 = line_cross_point(forward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) # or move backward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p3 = line_cross_point(backward_edge, edge_opposite) if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2): # Pass through p1 if backward_edge[1] == 0: backward_opposite = [1, 0, -p1[0]] else: backward_opposite = [ backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0] ] else: # Pass through p2 if backward_edge[1] == 0: backward_opposite = [1, 0, -p2[0]] else: backward_opposite = [ backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0] ] new_p1 = line_cross_point(backward_opposite, edge) new_p2 = line_cross_point(backward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) areas = [Polygon(t).area for t in fitted_parallelograms] parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32) # sort the polygon parallelogram_coord_sum = np.sum(parallelogram, axis=1) min_coord_idx = np.argmin(parallelogram_coord_sum) parallelogram = parallelogram[[min_coord_idx, \ (min_coord_idx + 1) % 4, \ (min_coord_idx + 2) % 4, \ (min_coord_idx + 3) % 4]] rectange = rectangle_from_parallelogram(parallelogram) rectange, rotate_angle = sort_rectangle(rectange) p0_rect, p1_rect, p2_rect, p3_rect = rectange # for y, x in xy_in_poly: # point = np.array([x, y], dtype=np.float32) # # top # geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point) # # right # geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point) # # down # geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point) # # left # geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point) # # geo_map[y, x, 0] = abs(point[1] - p1_rect[1]) # # geo_map[y, x, 1] = abs(point[0] - p2_rect[0]) # # geo_map[y, x, 2] = abs(point[1] - p3_rect[1]) # # geo_map[y, x, 3] = abs(point[0] - p0_rect[0]) # # angle # geo_map[y, x, 4] = rotate_angle gen_geo_map.gen_geo_map(geo_map, xy_in_poly, rectange, rotate_angle) ## 用cython编写预处理,实现加速 return score_map, geo_map, training_mask
def generate_rbox(FLAGS, im_size, polys, tags): h, w = im_size shrinked_poly_mask = np.zeros((h, w), dtype=np.uint8) orig_poly_mask = np.zeros((h, w), dtype=np.uint8) score_map = np.zeros((h, w), dtype=np.uint8) geo_map = np.zeros((h, w, 5), dtype=np.float32) # mask used during traning, to ignore some hard areas overly_small_text_region_training_mask = np.ones((h, w), dtype=np.uint8) for poly_idx, poly_data in enumerate(zip(polys, tags)): poly = poly_data[0] tag = poly_data[1] r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) # score map shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :] cv2.fillPoly(score_map, shrinked_poly, 1) cv2.fillPoly(shrinked_poly_mask, shrinked_poly, poly_idx + 1) cv2.fillPoly(orig_poly_mask, poly.astype(np.int32)[np.newaxis, :, :], 1) # if the poly is too small, then ignore it during training poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) if min(poly_h, poly_w) < FLAGS.min_text_size: cv2.fillPoly(overly_small_text_region_training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) if tag: cv2.fillPoly(overly_small_text_region_training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) xy_in_poly = np.argwhere(shrinked_poly_mask == (poly_idx + 1)) # if geometry == 'RBOX': # generate a parallelogram for any combination of two vertices fitted_parallelograms = [] for i in range(4): p0 = poly[i] p1 = poly[(i + 1) % 4] p2 = poly[(i + 2) % 4] p3 = poly[(i + 3) % 4] edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3): # parallel lines through p2 if edge[1] == 0: edge_opposite = [1, 0, -p2[0]] else: edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]] else: # after p3 if edge[1] == 0: edge_opposite = [1, 0, -p3[0]] else: edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]] # move forward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p2 = line_cross_point(FLAGS, forward_edge, edge_opposite) if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line( p1, new_p2, p3): # across p0 if forward_edge[1] == 0: forward_opposite = [1, 0, -p0[0]] else: forward_opposite = [ forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0] ] else: # across p3 if forward_edge[1] == 0: forward_opposite = [1, 0, -p3[0]] else: forward_opposite = [ forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0] ] new_p0 = line_cross_point(FLAGS, forward_opposite, edge) new_p3 = line_cross_point(FLAGS, forward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) # or move backward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p3 = line_cross_point(FLAGS, backward_edge, edge_opposite) if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2): # across p1 if backward_edge[1] == 0: backward_opposite = [1, 0, -p1[0]] else: backward_opposite = [ backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0] ] else: # across p2 if backward_edge[1] == 0: backward_opposite = [1, 0, -p2[0]] else: backward_opposite = [ backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0] ] new_p1 = line_cross_point(FLAGS, backward_opposite, edge) new_p2 = line_cross_point(FLAGS, backward_opposite, edge_opposite) fitted_parallelograms.append( [new_p0, new_p1, new_p2, new_p3, new_p0]) areas = [Polygon(t).area for t in fitted_parallelograms] parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32) # sort thie polygon parallelogram_coord_sum = np.sum(parallelogram, axis=1) min_coord_idx = np.argmin(parallelogram_coord_sum) parallelogram = parallelogram[[ min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4 ]] rectangle = rectangle_from_parallelogram(FLAGS, parallelogram) rectangle, rotate_angle = sort_rectangle(rectangle) #rectangle, rotate_angle, is_right = sort_rectangle(rectangle) #if not is_right: # cv2.fillPoly(score_map, ex_poly, 2) # continue p0_rect, p1_rect, p2_rect, p3_rect = rectangle # for y, x in xy_in_poly: # point = np.array([x, y], dtype=np.float32) # # top # geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point) # # right # geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point) # # down # geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point) # # left # geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point) # # angle # geo_map[y, x, 4] = rotate_angle #geo_map = caclulate_geo_map(geo_map, xy_in_poly, rectangle, rotate_angle) gen_geo_map.gen_geo_map(geo_map, xy_in_poly, rectangle, rotate_angle) shrinked_poly_mask = (shrinked_poly_mask > 0).astype('uint8') text_region_boundary_training_mask = 1 - (orig_poly_mask - shrinked_poly_mask) return score_map, geo_map, overly_small_text_region_training_mask, text_region_boundary_training_mask