def generate_hash_tables(self, file_path: str, excel_filepath: str, sheet_name: str = None, header: bool = True) -> None: """ This function processes the wikified output file uploaded by the user to build self.other dictionary :param file_path: :param excel_filepath: :param sheet_name: :param header: :return: """ cell_to_qnode = dict() value_to_qnode = dict() with open(file_path, encoding='utf-8') as file: csv_reader = csv.reader(file, delimiter=',') for row in csv_reader: if header: header = False continue if not check_if_empty(row[0]) and not check_if_empty(row[1]): cell_to_qnode[(int(row[0]), int(row[1]))] = row[3] if row[2] is not None: value_to_qnode[str(row[2]).strip()] = row[3] sheet = pyexcel.get_sheet(sheet_name=sheet_name, file_name=excel_filepath) for cell, qnode in cell_to_qnode.items(): try: cell_value = str(sheet[cell[1], cell[0]]).strip() if not check_if_empty( cell_value) and cell_value not in value_to_qnode: value_to_qnode[cell_value] = qnode except IndexError: pass for row in range(len(sheet)): for col in range(len(sheet[0])): try: cell_value = str(sheet[row, col]).strip() if value_to_qnode.get(cell_value, None): cell_to_qnode[(col, row)] = value_to_qnode[cell_value] except IndexError: pass cell_to_qnode = self.serialize_cell_to_qnode(cell_to_qnode) self.other["qnodes"] = cell_to_qnode self.other["region"] = list(cell_to_qnode.keys())
def wikify_region(region: str, excel_filepath: str, sheet_name: str = None) -> dict: """ This function parses the cell range, creates the temporary csv file and calls the wikifier service on that csv to get the cell qnode map. cell qnode map is then processed to omit non empty cells and is then returned. :param region: :param excel_filepath: :param sheet_name: :return: """ cell_range = parse_cell_range(region) file_path = create_temporary_csv_file(cell_range, excel_filepath, sheet_name) cell_qnode_map = call_wikifiy_service(file_path, cell_range[0][0], cell_range[0][1]) response = dict() sheet = pyexcel.get_sheet(sheet_name=sheet_name, file_name=excel_filepath) for col in range(cell_range[0][0], cell_range[1][0] + 1): for row in range(cell_range[0][1], cell_range[1][1] + 1): try: cell_index = get_actual_cell_index((col, row)) if not check_if_empty(sheet[row, col]): if cell_index in cell_qnode_map: response[cell_index] = cell_qnode_map[cell_index] else: response[cell_index] = "" except IndexError: pass except KeyError: pass return response
def upload_yaml(): """ This function process the yaml :return: """ user_id = request.form["id"] yaml_data = request.values["yaml"] os.makedirs("uploads", exist_ok=True) user = app.config['users'].get_user(user_id) user.reset('yaml') yaml_configuration = user.get_yaml_data() excel_data_filepath = user.get_excel_data().get_file_location() response = dict() if check_if_empty(yaml_data): response['error'] = "YAML file is either empty or not valid" else: sheet_name = user.get_excel_data().get_sheet_name() filename = str(Path(app.config['UPLOAD_FOLDER']) / user_id) + ".yaml" with open(filename, "w") as f: f.write(yaml_data) yaml_configuration.set_file_location(filename) region, template = load_yaml_data(filename) yaml_configuration.set_region(region) yaml_configuration.set_template(template) item_table = user.get_wikifier_output_data().get_item_table() template = yaml_configuration.get_template() response['region'] = highlight_region(item_table, excel_data_filepath, sheet_name, region, template) return json.dumps(response, indent=3)
def remove_empty_and_invalid_cells(region: Region) -> None: """ This functions searches for empty or invalid strings in the region and remove those cells from the region :param region: :return: """ for col in range(bindings["$left"] + 1, bindings["$right"]): for row in range(bindings["$top"] + 1, bindings["$bottom"]): if check_if_empty(str(bindings['excel_sheet'][row, col])): region.add_hole(row, col, col)
def excel_uploader(user: UserData, sheet_name: str): """ This function helps in processing the data file :param user: :param sheet_name: :return: """ user_data = user.get_excel_data() data = {"error": ""} if sheet_name and not check_if_empty(user_data.get_file_location()): file_path = user_data.get_file_location() data = excel_to_json(file_path, sheet_name) user_data.set_sheet_name(sheet_name) else: if 'file' not in request.files: data["error"] = 'No file part' else: file = request.files['file'] if file.filename == '': data["error"] = 'No file selected for uploading' if file and allowed_file(file.filename): filename = secure_filename(file.filename) filename = user.get_user_id() + "_excel_file." + get_file_extension(filename) file_path = str(Path(app.config['UPLOAD_FOLDER']) / filename) file.save(file_path) data = excel_to_json(file_path, sheet_name) if not sheet_name: try: sheet_name = data['sheetNames'][0] except KeyError: sheet_name = None user_data.set_file_location(file_path) user_data.set_sheet_name(sheet_name) else: data["error"] = 'This file type is currently not supported' return data