예제 #1
0
    def test_possible_pairs(self):
        schedule = Schedule()

        with open("res/test_pair_2.json", "r", encoding="utf-8") as file:
            json_pair_2 = json.load(file)

        with open("res/test_pair_3.json", "r", encoding="utf-8") as file:
            json_pair_3 = json.load(file)

        schedule.add_pair(StudentPair.from_json(json_pair_2))
        schedule.add_pair(StudentPair.from_json(json_pair_3))
예제 #2
0
    def test_impossible_pairs(self):
        schedule = Schedule()

        with open("res/test_pair_1.json", "r", encoding="utf-8") as file:
            json_pair_1 = json.load(file)

        with open("res/test_pair_2.json", "r", encoding="utf-8") as file:
            json_pair_2 = json.load(file)

        schedule.add_pair(StudentPair.from_json(json_pair_1))

        self.assertRaises(AlongTwoPairsException, schedule.add_pair,
                          StudentPair.from_json(json_pair_2))
예제 #3
0
    def test_impossible_intersect(self):
        schedule = Schedule()

        with open("res/test_pair_4.json", "r", encoding="utf-8") as file:
            json_pair_4 = json.load(file)

        with open("res/test_pair_5.json", "r", encoding="utf-8") as file:
            json_pair_5 = json.load(file)

        with open("res/test_pair_6.json", "r", encoding="utf-8") as file:
            json_pair_6 = json.load(file)

        schedule.add_pair(StudentPair.from_json(json_pair_4))
        schedule.add_pair(StudentPair.from_json(json_pair_5))

        self.assertRaises(AlongTwoPairsException, schedule.add_pair,
                          StudentPair.from_json(json_pair_6))
def import_from_pdf(process_id, manager: ImportManager) -> None:
    tesseract = TesseractWrapper(tesseract_path=manager.tesseract_path)
    while not manager.queue.empty():
        try:
            file_path = manager.queue.get(True, 1)
            file = QFileInfo(file_path)

            # convert from pdf to PIL image
            img_pdf = pdf2image.convert_from_path(
                file.absoluteFilePath(),
                dpi=manager.dpi,
                poppler_path=manager.poppler_path)
            img_pdf = img_pdf[0].convert('RGB')

            # convert to NumPy array
            img = np.array(img_pdf)
            img = img[:, :, ::-1].copy()

            # set mask
            grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            thresh = cv.threshold(grey, 127, 255, 0)[1]

            if manager.flags["stop"]:
                break

            # found contours
            contours = cv.findContours(thresh, cv.RETR_TREE,
                                       cv.CHAIN_APPROX_SIMPLE)[0]

            height, width = img.shape[:2]
            # max and min area of rect
            max_area = height * width / 20
            min_area = max_area / 40

            cells = []
            time_cells = dict()

            contours_number = len(contours)
            number = 0

            for k, contour in enumerate(contours, 1):
                rect = cv.minAreaRect(contour)
                area = int(rect[1][0] * rect[1][1])
                if min_area < area < max_area:
                    if manager.flags["stop"]:
                        break

                    x, y, w, h = cv.boundingRect(contour)
                    crop_img = img[int(y):int(y + h), int(x):int(x + w)]

                    txt = tesseract.to_string(crop_img)

                    found = False
                    for i in range(8):
                        if TimePair.time_starts(
                        )[i] in txt and TimePair.time_ends()[i]:
                            time_cells[i] = (x, x + w)
                            found = True
                            break

                    if not found:
                        cells.append((x, x + w, " ".join(txt.split())))

                    # draw debug rect with number
                    if manager.debug_image:
                        box = cv.boxPoints(rect)
                        box = np.int0(box)
                        blue_color = (255, 0, 0)
                        center = (int(rect[0][0]), int(rect[0][1]))

                        cv.drawContours(img, [box], 0, blue_color, 2)
                        cv.putText(img, str(number),
                                   (center[0] - 100, center[1] - 40),
                                   cv.FONT_HERSHEY_SIMPLEX, 3, blue_color, 12)

                    number += 1
                    process = int(k / contours_number * 70)
                    manager.progress_value_list[process_id] = process
                    manager.progress_text_list[process_id] = "{} {}%".format(
                        file.baseName(), process)
            if manager.debug_image:
                cv.imwrite(file_path[0:-4] + "-debug.jpg", img)

            if manager.flags["stop"]:
                break

            schedule = Schedule()
            cells_number = len(cells)

            for k, cell in enumerate(cells):
                if manager.flags["stop"]:
                    break

                start_x, end_x, text = cell
                first_start_time, first_end_time = time_cells[0]

                if not abs(end_x - first_start_time) < abs(start_x -
                                                           first_start_time):
                    text = "\n".join(re.findall(r".*?\]", text))
                    while True:
                        try:
                            pairs = parse_pair(manager, text)
                            break
                        except InvalidDatePair as ex:
                            text = confuse_loop(
                                process_id, manager,
                                ConfuseSituationException(
                                    file.absoluteFilePath(),
                                    text,
                                    confuse=str(ex)))
                        except ConfuseSituationException as ex:
                            ex.filename = file.absoluteFilePath(
                            )[0:-4] + "-debug.jpg"
                            ex.cell = k
                            ex.context = text
                            if ex.maybe_answer == "":
                                ex.maybe_answer = text

                            text = confuse_loop(process_id, manager, ex)

                    if len(pairs) != 0:
                        diff_start = abs(start_x - first_start_time)
                        diff_end = abs(end_x - first_end_time)
                        start, end = 0, 0

                        for number, (start_time,
                                     end_time) in time_cells.items():
                            diff = abs(start_x - start_time)
                            if diff < diff_start:
                                diff_start = diff
                                start = number

                            diff = abs(end_x - end_time)
                            if diff < diff_end:
                                diff_end = diff
                                end = number

                        for pair in pairs:
                            pair["time"].set_time(
                                TimePair.time_starts()[start],
                                TimePair.time_ends()[end])
                            schedule.add_pair(pair)

                    process = int(70 + k / cells_number * 30)
                    manager.progress_value_list[process_id] = process
                    manager.progress_text_list[process_id] = "{} {}%".format(
                        file.baseName(), process)

            schedule.save(file.absoluteFilePath()[0:-4] + ".json")
            print(file.absoluteFilePath()[0:-4] + ".json")

            if manager.flags["stop"]:
                break

            if manager.weekly:
                export_weeks_to_pdf(
                    schedule, file.baseName(), True,
                    file.absoluteFilePath()[0:-4] + "-weekly.pdf",
                    manager.font_name, manager.font_path, manager.encoding,
                    manager.start, manager.end, manager.color_a,
                    manager.color_b)

            if manager.full:
                export_full_to_pdf(schedule, file.baseName(),
                                   file.absoluteFilePath()[0:-4] + "-full.pdf",
                                   manager.font_name, manager.font_path,
                                   manager.encoding)

        except Exception as ex:
            print("Exception, process:", process_id, "is:", ex)
            traceback.print_exc()

    manager.progress_value_list[process_id] = 100
    manager.progress_text_list[process_id] = "Work complete"