def ocr_for_all_image(): path = "../static" image_path_list = get_file_from_folder(path) for image in image_path_list: file_name = basename(image).split('.')[0] + ".txt" file_text = ocr(image) save_file(name=file_name, text=file_text, path="../Data/ocr_text/")
def create_empty_file(): path = "../static" image_path_list = get_file_from_folder(path) for image in image_path_list: file_name = basename(image).split('.')[0] f1 = open(f"../Data/correct_text/{file_name}.txt", "w") f2 = open(f"../Data/ocr_text/{file_name}.txt", "w") f1.close() f2.close()
def get_problems(): def read_file(path="", _type="txt"): file = open(path, "r", encoding="utf8") data = file.read() file.close() return data image_path_list = get_file_from_folder('./static') result = {} for image in image_path_list: image_name = basename(image) image_name_without_addition = image_name.split('.')[0] image_path = f'http://localhost:3001/static/{image_name}' image_ocr_text_file_path = f'./Data/ocr_text/{image_name_without_addition}.txt' image_correct_text_file_path = f'./Data/correct_text/{image_name_without_addition}.txt' image_nlp_mahad_ocr_text_file_path = f'./Data/nlp_ocr_text/mahad/{image_name_without_addition}.txt' image_nlp_mahad_correct_text_file_path = f'./Data/nlp_correct_text/mahad/{image_name_without_addition}.txt' image_nlp_mahrous_ocr_text_file_path = f'./Data/nlp_ocr_text/mahrous/{image_name_without_addition}.txt' image_nlp_mahrous_correct_text_file_path = f'./Data/nlp_correct_text/mahrous/{image_name_without_addition}.txt' image_solve_file_path = f'./Data/solve/{image_name_without_addition}.txt' ocr_text = read_file(image_ocr_text_file_path) correct_text = read_file(image_correct_text_file_path) nlp_mahad_ocr_text = json.loads(read_file(image_nlp_mahad_ocr_text_file_path)) nlp_mahad_correct_text = json.loads(read_file(image_nlp_mahad_correct_text_file_path)) nlp_mahrous_ocr_text = json.loads(read_file(image_nlp_mahrous_ocr_text_file_path)) nlp_mahrous_correct_text = json.loads(read_file(image_nlp_mahrous_correct_text_file_path)) solve = json.loads(read_file(image_solve_file_path)) result[image_name_without_addition] = { "correct_text": correct_text, "ocr_text": ocr_text, "image_path": image_path, "image_name": image_name, "nlp_mahad_ocr_text": nlp_mahad_ocr_text, "nlp_mahad_correct_text": nlp_mahad_correct_text, "nlp_mahrous_ocr_text": nlp_mahrous_ocr_text, "nlp_mahrous_correct_text": nlp_mahrous_correct_text, "solve": solve } return jsonify(result) pass
import json from ntpath import basename from HELP.file_tool import get_file_from_folder from SOLVE.solveTree import SolveTree def main(data): result = [] for item in data: tree = SolveTree(item) parse = tree.parser() result.append(tree.parser()) return result if __name__ == '__main__': files = get_file_from_folder("../Data/nlp_correct_text/mahrous") for path in files: file_name = basename(path) file = open(path, "r", encoding="utf8") text = file.read() file.close() data = json.loads(text) if "data" in data: solve = main(data['data']) with open(f'../Data/solve/{file_name}', 'w', encoding="utf8") as json_file: json.dump(solve, json_file)
data = [{ "type": "نص السوال" if i == 0 else f"{i} الطلب", "line": array[i], "keyword": array_arabic[i], "symbols": array_symbols[i] } for i in range(len(array))] data = link_keyword_with_symbols(data) data = [handle_special_cases(item) for item in data] problem_type = problem_classification(data) return {"data": data, "problem_type": problem_type} if __name__ == '__main__': ocr_files = get_file_from_folder("../Data/ocr_text") correct_files = get_file_from_folder("../Data/correct_text") for path in ocr_files: file_name = basename(path) ocr_file = open(path, "r", encoding="utf8") ocr_text = ocr_file.read() ocr_file.close() ocr_json = main(ocr_text) with open(f'../Data/nlp_ocr_text/mahrous/{file_name}', 'w', encoding="utf8") as json_file: json.dump(ocr_json, json_file) for path in correct_files: file_name = basename(path) correct_file = open(path, "r", encoding="utf8") correct_text = correct_file.read()