Beispiel #1
0
 def folder(self, folder_name):
     path = os.path.join(folder_name, str(self.index) + ".png")
     create_folders(path)
     return path
Beispiel #2
0
parser.add_argument("--tsa_size", default=5)
parser.add_argument("--patch_ratio", default=5)
parser.add_argument("--patch_size", default=64)
parser.add_argument("--min_height", default=8)

# Training techniques
parser.add_argument("--name", default="test-mean")
parser.add_argument("--reset-threshold", default=32)
parser.add_argument("--max_steps", default=6)
parser.add_argument("--random-sol", default=True)

parser.add_argument("--output", default="scripts/new/snapshots/lol")
args = parser.parse_args()

args_filename = os.path.join(args.output, args.name, 'args.json')
create_folders(args_filename)
with open(args_filename, 'w') as fp:
    json.dump(args.__dict__, fp, indent=4)

print("[Training Line-Outliner] Model: SFRS | Dataset: ", args.dataset)

data_folder = os.getenv("DATA_FOLDER") if os.getenv("DATA_FOLDER") else "data"

target_folder = os.path.join(data_folder, "sfrs", args.dataset)
pages_folder = os.path.join(target_folder, "pages")
char_set_path = os.path.join(pages_folder, "character_set.json")

training_set_list_path = os.path.join(pages_folder, "training.json")
training_set_list = load_file_list_direct(training_set_list_path)
train_dataset = LolDataset(training_set_list, augmentation=True)
train_dataloader = DataLoader(train_dataset,
            # index, base_folder, img_filename, xml_filename
            pair = ImageXmlPair(page_index, database_original_folder, img_path,
                                xml_path)
            pairs.append(pair)

        pairs = pairs[0:2]

        # For each pair
        # Create a folder with its name
        # Extract lines
        # Create JSON with line information
        # Add to dataset json data
        for pair in pairs:
            folder_path = os.path.join(target_folder, "pages", "data",
                                       pair.index)
            create_folders(os.path.join(folder_path, "something.txt"))
            image_json_path = os.path.join(target_folder, "pages", "data",
                                           pair.index, pair.index + ".json")
            image_json = []
            pair.set_height_threshold(0.1)
            image_data = run_transformation_approach(pair, alpha=0.0025)
            steps = to_steps({pair.index: image_data}, [pair])
            image_steps = steps["images"][0]
            dataset_json_data.append(
                [image_json_path, image_steps["filename"]])
            image = TrainingImage(image_steps)

            for line in image.lines:
                LineAugmentation.normalize(line)
                LineAugmentation.extend_backwards(line)
                LineAugmentation.extend(line,
Beispiel #4
0
def to_steps(data, pairs, visualize=True):
    result = {"images": []}

    for i, page_index in enumerate(data):
        pair = pairs[i]

        print("Stepping pair #" + str(pair.index))

        image_data = {"index": pair.index, "filename": pair.img, "lines": []}

        image = cairo.ImageSurface.create_from_png(pair.img)
        context = cairo.Context(image)

        if visualize:
            for component in pair.get_components():
                context.rectangle(
                    component["x"],
                    component["y"],
                    component["width"],
                    component["height"],
                )
                context.set_source_rgba(0, 0, 1, 0.1)
                context.fill()

        for line_index in data[page_index]:

            line = data[page_index][line_index]
            baseline = line["baseline"]
            hull = line["hull"]
            line_data = {"text": line["text"], "steps": []}

            line_slope = slope(baseline)
            start_point = baseline[0]
            distance_walked = 0
            total_distance = distance(baseline[0], baseline[1])

            height_threshold = 20
            context.set_operator(cairo.OPERATOR_MULTIPLY)
            context.set_line_width(5)
            upper_points = []
            lower_points = []
            baseline_points = []

            while distance_walked < total_distance:
                intersecting_line = perpendicular(start_point, baseline)
                intersection = intersecting_line.intersection(hull)

                upper_point = None
                lower_point = None

                if isinstance(intersection, MultiPoint) and len(
                        intersection.bounds) == 4:
                    upper_point = [
                        intersection.bounds[0], intersection.bounds[1]
                    ]
                    lower_point = [
                        intersection.bounds[2], intersection.bounds[3]
                    ]
                elif isinstance(intersection, LineString) and len(
                        intersection.bounds) == 4:
                    upper_point = [
                        intersection.bounds[0], intersection.bounds[1]
                    ]
                    lower_point = [
                        intersection.bounds[2], intersection.bounds[3]
                    ]
                elif isinstance(intersection, Point):
                    print("Intersection was point, moving forward")
                    start_point = walk(start_point, line_slope, 4)
                    continue
                else:
                    if distance_walked == 0:
                        start_point = walk(start_point, line_slope, 4)
                    else:
                        print("No intersection, skipping line " +
                              str(line_index) + " of " + str(pair.index) +
                              " after walking" + str(distance_walked))
                        distance_walked = total_distance
                    continue

                if upper_point is not None and lower_point is not None:

                    upper_points.append(upper_point)
                    lower_points.append(lower_point)

                    baseline_intersection = LineString(
                        [Point(upper_point[0], upper_point[1]), Point(lower_point[0], lower_point[1])]) \
                        .intersection(LineString(to_points(baseline)))

                    baseline_point = None
                    if isinstance(
                            baseline_intersection,
                            Point) and len(baseline_intersection.bounds) > 1:
                        baseline_point = [
                            baseline_intersection.bounds[0],
                            baseline_intersection.bounds[1]
                        ]
                    else:
                        baseline_point = lower_point
                    baseline_points.append(baseline_point)

                    height = distance(upper_point, baseline_point)

                    if height < height_threshold and distance_walked == 0:
                        # The first point doesnt have a height
                        if distance_walked == 0:
                            angle = angle_between_points(
                                to_points(baseline)[0],
                                to_points(baseline)[1])
                            new_upper_point = get_new_point(
                                to_points(baseline)[0], angle - 90,
                                height_threshold)
                            upper_point = [
                                new_upper_point.x, new_upper_point.y
                            ]

                    if height < height_threshold:
                        height = height_threshold

                    context.set_source_rgba(1, 0, 1, 1)
                    context.move_to(upper_point[0], upper_point[1])
                    context.line_to(lower_point[0], lower_point[1])
                    context.stroke()

                    context.set_source_rgba(0, 0, 1, 0.1)
                    context.move_to(start_point[0], start_point[1])
                    start_point = walk(start_point, line_slope, height)
                    distance_walked += height
                    context.line_to(start_point[0], start_point[1])
                    context.stroke()

                else:
                    distance_walked = total_distance

            for pc in [baseline_points, upper_points, lower_points]:
                if len(pc) == 0:
                    continue
                context.set_source_rgba(1, 0, 1, 0.3)
                context.move_to(pc[0][0], pc[0][1])
                for bp in pc:
                    context.line_to(bp[0], bp[1])
                context.stroke()

            for i in range(len(baseline_points)):
                line_data["steps"].append({
                    "upper_point": upper_points[i],
                    "lower_point": lower_points[i],
                    "base_point": baseline_points[i],
                })

            line_data["index"] = line_index
            image_data["lines"].append(line_data)

        result["images"].append(image_data)
        save_path = os.path.join(pair.base, "json",
                                 str(image_data["index"]) + ".json")
        save_to_json(image_data, save_path)
        if visualize:
            visualization_path = os.path.join(pair.base, "stepped",
                                              str(pair.index) + ".png")
            create_folders(visualization_path)
            image.write_to_png(visualization_path)

    return result
Beispiel #5
0
def paint_model_run(model_path, dataloader, destination="screenshots/run.png"):
    dtype = torch.cuda.FloatTensor

    img_path = None
    painter = None

    lol = LineOutlinerTsa(path=model_path)
    lol.cuda()

    for index, x in enumerate(dataloader):
        x = x[0]

        if img_path is None:
            painter = Painter(path=x["img_path"])
            img_path = x["img_path"]

        belongs = img_path == x["img_path"]

        if not belongs:
            continue

        img = x['img'].type(dtype)[None, ...]
        ground_truth = x["steps"]

        sol = ground_truth[0]

        predicted_steps, length, _ = lol(img,
                                         sol,
                                         ground_truth,
                                         max_steps=30,
                                         disturb_sol=False)

        # img_nps = []
        # for img_tensor in input:
        #     img_np = img_tensor.clone().detach().cpu().numpy().transpose()
        #     img_np = (img_np + 1) * 128
        #     img_nps.append(img_np)
        # input_img = cv2.hconcat(img_nps)
        # cv2.imwrite(os.path.join("screenshots", str(counter) + ".png"), input_img)
        # counter += 1

        ground_truth_upper_steps = [
            Point(step[0][0].item(), step[0][1].item())
            for step in ground_truth
        ]
        ground_truth_baseline_steps = [
            Point(step[1][0].item(), step[1][1].item())
            for step in ground_truth
        ]
        ground_truth_lower_steps = [
            Point(step[2][0].item(), step[2][1].item())
            for step in ground_truth
        ]

        upper_steps = [
            Point(step[0][0].item(), step[0][1].item())
            for step in predicted_steps
        ]
        baseline_steps = [
            Point(step[1][0].item(), step[1][1].item())
            for step in predicted_steps
        ]
        lower_steps = [
            Point(step[2][0].item(), step[2][1].item())
            for step in predicted_steps
        ]
        confidences = [step[3][0].item() for step in predicted_steps]
        for i in range(len(ground_truth_upper_steps)):
            painter.draw_line([
                ground_truth_upper_steps[i], ground_truth_baseline_steps[i],
                ground_truth_lower_steps[i]
            ],
                              color=(0, 0, 0, 1),
                              line_width=2)

        painter.draw_line(ground_truth_upper_steps,
                          line_width=4,
                          color=(0, 0, 0, 0.5))
        painter.draw_line(ground_truth_baseline_steps,
                          line_width=4,
                          color=(0, 0, 0, 0.5))
        painter.draw_line(ground_truth_lower_steps,
                          line_width=4,
                          color=(0, 0, 0, 0.5))

        for i in range(len(baseline_steps)):
            painter.draw_line(
                [upper_steps[i], baseline_steps[i], lower_steps[i]],
                color=(0, 0, 1, 1),
                line_width=2)

        for index, step in enumerate(baseline_steps[:-1]):
            upper = upper_steps[index]
            lower = lower_steps[index]
            next_step = baseline_steps[index + 1]
            next_upper = upper_steps[index + 1]
            next_lower = lower_steps[index + 1]
            # confidence = confidences[index]
            # painter.draw_area([upper, next_upper, next_step, next_lower, lower, step], line_color=(0, 0, 0, 0),
            #                  line_width=0,
            #                 fill_color=(1, 0, 0, confidence))

        painter.draw_line(baseline_steps, line_width=4, color=(0, 0, 1, 1))
        painter.draw_line(upper_steps, line_width=4, color=(1, 0, 1, 1))
        painter.draw_line(lower_steps, line_width=4, color=(1, 0, 1, 1))
        for step in baseline_steps:
            painter.draw_point(step, radius=6)

        upper_line = [
            Point(i[0][0].item(), i[0][1].item()) for i in predicted_steps
        ]
        baseline = [
            Point(i[1][0].item(), i[1][1].item()) for i in predicted_steps
        ]
        lower_line = [
            Point(i[2][0].item(), i[2][1].item()) for i in predicted_steps
        ]

        painter.draw_line(baseline, color=(0, 0, 1, 1), line_width=3)
        painter.draw_line(upper_line, color=(1, 0, 1, 1), line_width=3)
        painter.draw_line(lower_line, color=(1, 0, 1, 1), line_width=3)

        sol = {
            "upper_point": ground_truth[0][0],
            "base_point": ground_truth[0][1],
            "angle": ground_truth[0][3][0],
        }

        sol_upper = Point(sol["upper_point"][0].item(),
                          sol["upper_point"][1].item())
        sol_lower = Point(sol["base_point"][0].item(),
                          sol["base_point"][1].item())

        painter.draw_line([sol_lower, sol_upper],
                          color=(0, 1, 0, 1),
                          line_width=5)
        painter.draw_point(sol_lower, color=(0, 1, 0, 1), radius=6)
        painter.draw_point(sol_upper, color=(0, 1, 0, 1), radius=6)

        for i in range(len(upper_line) - 1):
            opacity = confidences[i]

            painter.draw_area([
                upper_line[i], upper_line[i + 1], baseline[i + 1],
                lower_line[i + 1], lower_line[i], baseline[i]
            ],
                              fill_color=(1, 0, 0, opacity))

            painter.draw_line([baseline[i], upper_line[i]],
                              line_width=2,
                              color=(0, 0, 0, 0.1))
            painter.draw_line([baseline[i], lower_line[i]],
                              line_width=2,
                              color=(0, 0, 0, 0.1))

    create_folders(destination)
    painter.save(destination)
Beispiel #6
0
def run_transformation_approach(pair, alpha=0.004, visualization_path=None):
    print("Transforming pair #" + str(pair.index))

    image = cairo.ImageSurface.create_from_png(pair.img)
    context = cairo.Context(image)

    for component in pair.get_components():
        context.rectangle(
            component["x"],
            component["y"],
            component["width"],
            component["height"],
        )
        context.set_source_rgba(0, 0, 1, 0.1)
        context.fill()
    size_threshold = 50

    line_components = {}
    for component in pair.get_components():
        data = pointsOf(component)
        if data["index"] not in line_components:
            line_components[data["index"]] = []
        line_components[data["index"]].append(data["top_left"])
        line_components[data["index"]].append(data["top_right"])
        line_components[data["index"]].append(data["bottom_right"])
        line_components[data["index"]].append(data["bottom_left"])

    multi = 0

    context.set_operator(cairo.OPERATOR_MULTIPLY)
    context.set_line_width(5)
    context.set_source_rgba(1, 0, 0, 1)

    transformation = pair.get_transformation()

    used_data = {}
    amount_of_lines = len(transformation["lines"])

    for line in transformation["lines"]:
        baseline = line["baseline"]
        start = baseline[0]
        end = baseline[1]
        context.move_to(start[0], start[1])
        context.line_to(end[0], end[1])
        context.stroke()

    for line_index in line_components:
        points = line_components[line_index]

        # The following proves that I don't know numpy in the slightest
        x = [p[0] for p in points]
        y = [p[1] for p in points]

        coords = [Point(p[0], p[1]) for p in points]
        concave_hull, edge_points = alpha_shape(coords, alpha=alpha)

        if isinstance(concave_hull, MultiPolygon):
            continue

        for exterior in [concave_hull.exterior]:
            context.set_operator(cairo.OPERATOR_MULTIPLY)
            context.set_line_width(3)
            context.set_source_rgba(0, 1, 0.3, 1)
            context.move_to(exterior.coords[0][0], exterior.coords[0][1])
            for point in exterior.coords:
                context.line_to(point[0], point[1])
            context.stroke()

        used_data[line_index] = {
            "index": line_index,
            "hull": concave_hull.exterior,
            "baseline": transformation["lines"][line_index]["baseline"],
            "text": transformation["lines"][line_index]["gt"],
        }

    if visualization_path is not None:
        visualization_path = os.path.join(visualization_path,
                                          str(pair.index) + ".png")
        create_folders(visualization_path)
        image.write_to_png(visualization_path)
    return used_data
Beispiel #7
0
        folder = os.path.join("screenshots")

        np_img = img[0].clone().detach().cpu().numpy().transpose()
        np_img = (np_img + 1) * 128.0

        cv2.imwrite(os.path.join(folder, "full.png"), np_img)

        for step_index, step in enumerate(ground_truth):
            patch_name = os.path.join(folder, str(step_index) + ".png")

            initial_step = step
            current_height = torch.dist(initial_step[1], initial_step[0]).cuda()
            current_scale = 64 / (current_height * 5).cuda()
            current_angle = initial_step[3][0].cuda()

            current_base = torch.stack([initial_step[1][0], initial_step[1][1]]).cuda()
            patch_parameters = torch.stack([current_base[0],  # x
                                            current_base[1],  # y
                                            torch.deg2rad(current_angle),
                                            # torch.remainder(current_angle, torch.tensor(360)),  # angle
                                            current_height]).unsqueeze(0)
            patch_parameters = patch_parameters.cuda()
            patch = extract_tensor_patch(img, patch_parameters, size=64)
            patch = patch[0].clone().detach().cpu().numpy().transpose()

            patch = (patch + 1) * 128.0
            create_folders(patch_name)
            cv2.imwrite(patch_name, patch)

        sys.exit(0)
Beispiel #8
0
 def save(self, path="test.png"):
     create_folders(path)
     self.surface.write_to_png(path)
Beispiel #9
0
    painter.draw_line(upper_line, color=(1, 0, 1, 1), line_width=3)
    painter.draw_line(lower_line, color=(1, 0, 1, 1), line_width=3)

    # for intersection in lower_intersections:
    #     painter.draw_area([Point(p[0].item(), p[1].item()) for p in intersection], fill_color=(0, 1, 0, 0.5),
    #                       line_width=1, line_color=(1, 1, 0, 1))
    #

    #
    # ### DRAWS TSA
    # vertical_concats = []
    # for tsa_line in input:
    #     for tsa_image in tsa_line:
    #         horizontal_concats = []
    #         for tsa_section in tsa_image:
    #             img_np = tsa_section.clone().detach().cpu().numpy().transpose()
    #             img_np = (img_np + 1) * 128
    #             horizontal_concats.append(img_np)
    #             horizontal_concats.append(np.zeros((64, 2, 3), dtype=np.float32))
    #         vertical_concats.append(cv2.hconcat(horizontal_concats))
    #         vertical_concats.append(np.zeros((2, (args.tsa_size * 2) + (64 * args.tsa_size), 3), dtype=np.float32))
    # s_path = os.path.join("screenshots", "tsa", str(counter) + ".png")
    # cv2.imwrite(s_path, cv2.vconcat(vertical_concats))
    # create_folders(s_path)
    counter += 1
    break

destination = os.path.join("screenshots", "tsa", "full.png")
create_folders(destination)
painter.save(destination)