def extend(line, by=6, size_decay=0.9, confidence_decay=0.825): confidences = [1] for i in range(by - 1): confidences.append(confidences[-1] * confidence_decay) confidences.reverse() for index in range(by): if len(line.steps) >= 2: direction = angle_between_points(line.steps[-2].base_point, line.steps[-1].base_point) else: direction = angle_between_points(line.steps[-1].base_point, line.steps[-1].upper_point) - 90 upper_height = line.steps[-1].calculate_upper_height() lower_height = line.steps[-1].calculate_lower_height() next_base_point = get_new_point(line.steps[-1].base_point, direction, upper_height) next_upper_point = get_new_point(next_base_point, direction - 90.0, upper_height * size_decay) next_lower_point = get_new_point(next_base_point, direction + 90.0, lower_height * size_decay) step_data = { "base_point": [next_base_point.x, next_base_point.y], "lower_point": [next_lower_point.x, next_lower_point.y], "upper_point": [next_upper_point.x, next_upper_point.y], } step = TrainingStep(line, step_data) step.stop_confidence = confidences[index] line.steps.append(step)
def intersections(self): # For each segment of the baseline, get its intersections # and pass the rest to the next segment segments_intersections = [] rest = 0 relevant_segments = list( filter(lambda x: x.segment.intersects(self.line.polygon.get()), self.segments)) # Trim leading segments while len(relevant_segments ) > 0 and relevant_segments[0].get_first_section() is None: relevant_segments = relevant_segments[1:] for i in range(0, len(relevant_segments)): segment = relevant_segments[i] try: this_segment_intersections, rest = segment.intersections( offset=rest, start_of_line=(i == 0)) segments_intersections += this_segment_intersections except NoStartOfLine as e: print("[Segment #" + str(i) + "] [Does not have a start of line]") relevant_segments.reverse() for i, segment in enumerate(relevant_segments): last_section = segment.get_last_section() if last_section is not None: segments_intersections = list( filter( lambda x: x.segment != segment or x.distance < last_section.distance, segments_intersections)) # segments_intersections.append(last_section) break segment_count = len(segments_intersections) if segment_count >= 1: last_section = segments_intersections[-1] reference = last_section last_angle = (90.0 + angle_between_points(reference.p1, reference.p2)) distance_multipler = 2 / 3 p1 = get_new_point(reference.p1, last_angle, distance_multipler * reference.height()) p2 = get_new_point(reference.p2, last_angle, distance_multipler * reference.height()) new_section = VirtualSegmentSection(p1, p2, reference.confidence) segments_intersections.append(new_section) segments_intersections[-1].confidence = 0.0 return segments_intersections
def normalize_points(points): for i in range(len(points) - 1): angle_to_next = angle_between_points(points[i]["base_point"], points[i + 1]["base_point"]) points[i]["angle"] = angle_to_next height = points[i]["base_point"].distance(points[i]["upper_point"]) lower_height = points[i]["base_point"].distance( points[i]["lower_point"]) points[i]["upper_point"] = get_new_point(points[i]["base_point"], angle_to_next - 90, height) points[i]["lower_point"] = get_new_point(points[i]["base_point"], angle_to_next + 90, lower_height) return points
def shift_touching_upper_points(ground_truth): # Enforce a height of 2 to the ground truth lower polygon for index, step in enumerate(ground_truth): upper_as_point = Point(step[0][0].item(), step[0][1].item()) base_as_point = Point(step[1][0].item(), step[1][1].item()) lower_as_point = Point(step[2][0].item(), step[2][1].item()) step_angle = angle_between_points(base_as_point, upper_as_point) lower_height = base_as_point.distance(lower_as_point) if lower_height < 2: lower_as_point = get_new_point(base_as_point, step_angle - 180, 2) # if index == 0: # If sol, push it backwards # angle_to_next = angle_between_points(base_as_point, Point(ground_truth[index + 1][1][0].item(), # ground_truth[index + 1][1][1].item())) # base_as_point = get_new_point(base_as_point, angle_to_next + 180, 2) # lower_as_point = get_new_point(lower_as_point, angle_to_next + 180, 2) # ground_truth[index][1][0] = torch.tensor(base_as_point.x).cuda() # ground_truth[index][1][1] = torch.tensor(base_as_point.y).cuda() ground_truth[index][2][0] = torch.tensor(lower_as_point.x).cuda() ground_truth[index][2][1] = torch.tensor(lower_as_point.y).cuda() return ground_truth
def starting_window(self, parameters): sol_angle = angle_between_points(self.sol[1], self.sol[0]) + 90 sol_height = self.sol[0].distance(self.sol[1]) backward_projected = get_new_point(self.sol[1], sol_angle - 180, sol_height) return ViewingWindow(parameters, self.image, backward_projected, sol_height, sol_angle)
def absolute(self, point): relative_distance_to_point = Point(0, 0).distance(point) relative_angle_to_point = angle_between_points(Point(0, 0), point) actual_distance_to_point = relative_distance_to_point * self.ratio actual_angle_to_point = relative_angle_to_point + self.angle predicted_point = get_new_point(self.origin, actual_angle_to_point, actual_distance_to_point) return predicted_point
def __init__(self, parameters, image, origin, size, angle): self.parameters = parameters self.source_image = image self.origin = origin self.focus = get_new_point(origin, angle, self.parameters.patch_ratio * size / 2) self.size = size self.angle = angle self.ratio = self.parameters.patch_ratio * self.size / self.parameters.patch_size
def section(self): interpolated_baseline = [] ending_angle = self.steps[0].angle - 90 for step_index, step in enumerate(self.steps[:-1]): next_step = self.steps[step_index + 1] this_angle = (step.angle - 90) next_angle = (next_step.angle - 90) starting_angle = ending_angle middle_angle = this_angle ending_angle = self.angleLerp(this_angle, next_angle, 0.5) # Add first point itself # interpolated_baseline.append(step.base_point) angle = angle_between_points(step.base_point, next_step.base_point) distance = step.base_point.distance(next_step.base_point) walked = 0 step_size = 1 while walked < distance: percent_walked = walked / distance if percent_walked < 0.5: intersection_angle = self.angleLerp( starting_angle, middle_angle, percent_walked * 2) else: assert 0 <= (percent_walked - 0.5) * 2 <= 1 intersection_angle = self.angleLerp( middle_angle, ending_angle, (percent_walked - 0.5) * 2) baseline_point = get_new_point(step.base_point, angle, walked) upper_point = get_new_point(baseline_point, intersection_angle, 80) lower_point = get_new_point(baseline_point, intersection_angle - 180, 20) interpolated_baseline.append( [upper_point, baseline_point, lower_point]) walked += step_size return interpolated_baseline
def __init__(self, segment, distance=0): self.distance = distance self.segment = segment self.confidence = 1.0 interpolation_distance = min(self.distance, self.segment.length) self.point = self.segment.interpolate(interpolation_distance) scan_p1 = get_new_point(self.point, self.segment.bearing + 90, self.MAXIMUM_SCAN_HEIGHT) scan_p2 = get_new_point(self.point, self.segment.bearing - 90, self.MAXIMUM_SCAN_HEIGHT) scan_line = LineString([scan_p1, scan_p2]) try: self.intersection_line = scan_line.intersection( self.segment.baseline.line.polygon.get()) self.p1 = Point(self.intersection_line.coords[0][0], self.intersection_line.coords[0][1]) self.p2 = Point(self.intersection_line.coords[1][0], self.intersection_line.coords[1][1]) except Exception as e: self.intersection_line = None
def extend_backwards(line): if len(line.steps) < 2: return direction = angle_between_points(line.steps[0].base_point, line.steps[1].base_point) upper_height = line.steps[0].calculate_upper_height() lower_height = line.steps[0].calculate_lower_height() next_base_point = get_new_point(line.steps[0].base_point, direction - 180, upper_height) next_upper_point = get_new_point(next_base_point, direction - 90.0, upper_height) next_lower_point = get_new_point(next_base_point, direction + 90.0, lower_height) step_data = { "base_point": [next_base_point.x, next_base_point.y], "lower_point": [next_lower_point.x, next_lower_point.y], "upper_point": [next_upper_point.x, next_upper_point.y], } step = TrainingStep(line, step_data) step.stop_confidence = 0.0 line.steps = [step] + line.steps
def draw_viewing_window(self, step, ratio, line_color=(1, 0, 1, 1), fill_color=(1, 0, 1, 0.15), line_width=3): focus = get_new_point(step.base_point, step.angle, ratio * step.calculate_upper_height() / 2) points = viewing_window_points(focus, step.calculate_upper_height(), step.angle, ratio=ratio) self.draw_area(points, fill_color=fill_color, line_color=line_color, line_width=line_width)
def upper_concat(self, img): upper_image = None steps = self.valid_steps(img) box_height = max([step.calculate_upper_height() for step in steps]) for step in steps: if step.calculate_upper_height() < 16: step.upper_point = get_new_point(step.base_point, step.angle - 90, 16) for step_index, step in enumerate(steps[:-1]): next_step = steps[step_index + 1] angle = angle_between_points(step.base_point, next_step.base_point) width = step.base_point.distance(next_step.base_point) left_upper_height = step.calculate_upper_height() right_upper_height = next_step.calculate_upper_height() # Trapezoids upper_src = np.array( [[step.upper_point.x, step.upper_point.y], [next_step.upper_point.x, next_step.upper_point.y], [next_step.base_point.x, next_step.base_point.y], [step.base_point.x, step.base_point.y]]) # Destination rectangles upper_dst = np.array([[0, box_height - left_upper_height], [width, box_height - right_upper_height], [width, box_height], [0.0, box_height]]) # White background upper_background = np.ones((int(box_height), int(width), 3), np.uint8) upper_perspective, _ = cv2.findHomography(upper_src, upper_dst) upper_out = cv2.warpPerspective( img, upper_perspective, (upper_background.shape[1], upper_background.shape[0])) upper_image = upper_out if upper_image is None else hconcat_resize_min( [upper_image, upper_out]) # cv2.waitKey(0) # cv2.destroyAllWindows() return upper_image
def enforce_minimum_height(line, minimum_height=16): ignored = [] for step_index in range(len(line.steps) - 1): if line.steps[step_index] in ignored: continue if step_index < len(line.steps) - 1: next_step = line.steps[step_index + 1] distance = line.steps[step_index].base_point.distance(next_step.base_point) if distance < minimum_height / 2: ignored.append(next_step) if line.steps[step_index].calculate_upper_height() < minimum_height: new_upper_point = get_new_point(line.steps[step_index].base_point, line.steps[step_index].angle - 90, minimum_height) line.steps[step_index].upper_point = new_upper_point line.steps = [l for l in line.steps if l not in ignored]
def relative(self, point): actual_distance = self.origin.distance(point) actual_angle = angle_between_points(self.origin, point) scaled_down_distance = actual_distance * (1 / self.ratio) rotated_angle = actual_angle - self.angle return get_new_point(Point(0, 0), rotated_angle, scaled_down_distance)
def to_steps(data, pairs, visualize=True): result = { "images": [] } for i, page_index in enumerate(data): pair = pairs[i] print("Stepping pair #" + str(pair.index)) image_data = { "index": pair.index, "filename": pair.img, "lines": [] } image = cairo.ImageSurface.create_from_png(pair.img) context = cairo.Context(image) if visualize: for component in pair.get_components(): context.rectangle(component["x"], component["y"], component["width"], component["height"], ) context.set_source_rgba(0, 0, 1, 0.1) context.fill() for line_index in data[page_index]: line = data[page_index][line_index] baseline = line["baseline"] hull = line["hull"] line_data = { "text": line["text"], "steps": [] } line_slope = slope(baseline) start_point = baseline[0] distance_walked = 0 total_distance = distance(baseline[0], baseline[1]) height_threshold = 20 context.set_operator(cairo.OPERATOR_MULTIPLY) context.set_line_width(5) upper_points = [] lower_points = [] baseline_points = [] while distance_walked < total_distance: intersecting_line = perpendicular(start_point, baseline) intersection = intersecting_line.intersection(hull) upper_point = None lower_point = None if isinstance(intersection, MultiPoint) and len(intersection.bounds) == 4: upper_point = [intersection.bounds[0], intersection.bounds[1]] lower_point = [intersection.bounds[2], intersection.bounds[3]] elif isinstance(intersection, LineString) and len(intersection.bounds) == 4: upper_point = [intersection.bounds[0], intersection.bounds[1]] lower_point = [intersection.bounds[2], intersection.bounds[3]] elif isinstance(intersection, Point): print("Intersection was point, moving forward") start_point = walk(start_point, line_slope, 4) continue else: if distance_walked == 0: start_point = walk(start_point, line_slope, 4) else: print("No intersection, skipping line " + str(line_index) + " of " + str( pair.index) + " after walking" + str(distance_walked)) distance_walked = total_distance continue if upper_point is not None and lower_point is not None: upper_points.append(upper_point) lower_points.append(lower_point) baseline_intersection = LineString( [Point(upper_point[0], upper_point[1]), Point(lower_point[0], lower_point[1])]) \ .intersection(LineString(to_points(baseline))) baseline_point = None if isinstance(baseline_intersection, Point) and len(baseline_intersection.bounds) > 1: baseline_point = [baseline_intersection.bounds[0], baseline_intersection.bounds[1]] else: baseline_point = lower_point baseline_points.append(baseline_point) height = distance(upper_point, baseline_point) if height < height_threshold and distance_walked == 0: # The first point doesnt have a height if distance_walked == 0: angle = angle_between_points(to_points(baseline)[0], to_points(baseline)[1]) new_upper_point = get_new_point(to_points(baseline)[0], angle - 90, height_threshold) upper_point = [new_upper_point.x, new_upper_point.y] if height < height_threshold: height = height_threshold context.set_source_rgba(1, 0, 1, 1) context.move_to(upper_point[0], upper_point[1]) context.line_to(lower_point[0], lower_point[1]) context.stroke() context.set_source_rgba(0, 0, 1, 0.1) context.move_to(start_point[0], start_point[1]) start_point = walk(start_point, line_slope, height) distance_walked += height context.line_to(start_point[0], start_point[1]) context.stroke() else: distance_walked = total_distance for pc in [baseline_points, upper_points, lower_points]: if len(pc) == 0: continue context.set_source_rgba(1, 0, 1, 0.3) context.move_to(pc[0][0], pc[0][1]) for bp in pc: context.line_to(bp[0], bp[1]) context.stroke() for i in range(len(baseline_points)): line_data["steps"].append({ "upper_point": upper_points[i], "lower_point": lower_points[i], "base_point": baseline_points[i], }) line_data["index"] = line_index image_data["lines"].append(line_data) result["images"].append(image_data) save_path = os.path.join(pair.base, "json", str(image_data["index"]) + ".json") save_to_json(image_data, save_path) if visualize: visualization_path = os.path.join(pair.base, "stepped", str(pair.index) + ".png") create_folders(visualization_path) image.write_to_png(visualization_path) return result