def test_possible_pairs(self): schedule = Schedule() with open("res/test_pair_2.json", "r", encoding="utf-8") as file: json_pair_2 = json.load(file) with open("res/test_pair_3.json", "r", encoding="utf-8") as file: json_pair_3 = json.load(file) schedule.add_pair(StudentPair.from_json(json_pair_2)) schedule.add_pair(StudentPair.from_json(json_pair_3))
def test_impossible_pairs(self): schedule = Schedule() with open("res/test_pair_1.json", "r", encoding="utf-8") as file: json_pair_1 = json.load(file) with open("res/test_pair_2.json", "r", encoding="utf-8") as file: json_pair_2 = json.load(file) schedule.add_pair(StudentPair.from_json(json_pair_1)) self.assertRaises(AlongTwoPairsException, schedule.add_pair, StudentPair.from_json(json_pair_2))
def test_impossible_intersect(self): schedule = Schedule() with open("res/test_pair_4.json", "r", encoding="utf-8") as file: json_pair_4 = json.load(file) with open("res/test_pair_5.json", "r", encoding="utf-8") as file: json_pair_5 = json.load(file) with open("res/test_pair_6.json", "r", encoding="utf-8") as file: json_pair_6 = json.load(file) schedule.add_pair(StudentPair.from_json(json_pair_4)) schedule.add_pair(StudentPair.from_json(json_pair_5)) self.assertRaises(AlongTwoPairsException, schedule.add_pair, StudentPair.from_json(json_pair_6))
def import_from_pdf(process_id, manager: ImportManager) -> None: tesseract = TesseractWrapper(tesseract_path=manager.tesseract_path) while not manager.queue.empty(): try: file_path = manager.queue.get(True, 1) file = QFileInfo(file_path) # convert from pdf to PIL image img_pdf = pdf2image.convert_from_path( file.absoluteFilePath(), dpi=manager.dpi, poppler_path=manager.poppler_path) img_pdf = img_pdf[0].convert('RGB') # convert to NumPy array img = np.array(img_pdf) img = img[:, :, ::-1].copy() # set mask grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) thresh = cv.threshold(grey, 127, 255, 0)[1] if manager.flags["stop"]: break # found contours contours = cv.findContours(thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)[0] height, width = img.shape[:2] # max and min area of rect max_area = height * width / 20 min_area = max_area / 40 cells = [] time_cells = dict() contours_number = len(contours) number = 0 for k, contour in enumerate(contours, 1): rect = cv.minAreaRect(contour) area = int(rect[1][0] * rect[1][1]) if min_area < area < max_area: if manager.flags["stop"]: break x, y, w, h = cv.boundingRect(contour) crop_img = img[int(y):int(y + h), int(x):int(x + w)] txt = tesseract.to_string(crop_img) found = False for i in range(8): if TimePair.time_starts( )[i] in txt and TimePair.time_ends()[i]: time_cells[i] = (x, x + w) found = True break if not found: cells.append((x, x + w, " ".join(txt.split()))) # draw debug rect with number if manager.debug_image: box = cv.boxPoints(rect) box = np.int0(box) blue_color = (255, 0, 0) center = (int(rect[0][0]), int(rect[0][1])) cv.drawContours(img, [box], 0, blue_color, 2) cv.putText(img, str(number), (center[0] - 100, center[1] - 40), cv.FONT_HERSHEY_SIMPLEX, 3, blue_color, 12) number += 1 process = int(k / contours_number * 70) manager.progress_value_list[process_id] = process manager.progress_text_list[process_id] = "{} {}%".format( file.baseName(), process) if manager.debug_image: cv.imwrite(file_path[0:-4] + "-debug.jpg", img) if manager.flags["stop"]: break schedule = Schedule() cells_number = len(cells) for k, cell in enumerate(cells): if manager.flags["stop"]: break start_x, end_x, text = cell first_start_time, first_end_time = time_cells[0] if not abs(end_x - first_start_time) < abs(start_x - first_start_time): text = "\n".join(re.findall(r".*?\]", text)) while True: try: pairs = parse_pair(manager, text) break except InvalidDatePair as ex: text = confuse_loop( process_id, manager, ConfuseSituationException( file.absoluteFilePath(), text, confuse=str(ex))) except ConfuseSituationException as ex: ex.filename = file.absoluteFilePath( )[0:-4] + "-debug.jpg" ex.cell = k ex.context = text if ex.maybe_answer == "": ex.maybe_answer = text text = confuse_loop(process_id, manager, ex) if len(pairs) != 0: diff_start = abs(start_x - first_start_time) diff_end = abs(end_x - first_end_time) start, end = 0, 0 for number, (start_time, end_time) in time_cells.items(): diff = abs(start_x - start_time) if diff < diff_start: diff_start = diff start = number diff = abs(end_x - end_time) if diff < diff_end: diff_end = diff end = number for pair in pairs: pair["time"].set_time( TimePair.time_starts()[start], TimePair.time_ends()[end]) schedule.add_pair(pair) process = int(70 + k / cells_number * 30) manager.progress_value_list[process_id] = process manager.progress_text_list[process_id] = "{} {}%".format( file.baseName(), process) schedule.save(file.absoluteFilePath()[0:-4] + ".json") print(file.absoluteFilePath()[0:-4] + ".json") if manager.flags["stop"]: break if manager.weekly: export_weeks_to_pdf( schedule, file.baseName(), True, file.absoluteFilePath()[0:-4] + "-weekly.pdf", manager.font_name, manager.font_path, manager.encoding, manager.start, manager.end, manager.color_a, manager.color_b) if manager.full: export_full_to_pdf(schedule, file.baseName(), file.absoluteFilePath()[0:-4] + "-full.pdf", manager.font_name, manager.font_path, manager.encoding) except Exception as ex: print("Exception, process:", process_id, "is:", ex) traceback.print_exc() manager.progress_value_list[process_id] = 100 manager.progress_text_list[process_id] = "Work complete"