def upload_wikifier_output(): """ This function uploads the wikifier output :return: """ if 'uid' in session: response = dict() user_id = session['uid'] project_id = request.form['pid'] project_meta = dict() error = wikified_output_uploader(user_id, project_id) project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) file_name, sheet_name = project.get_current_file_and_sheet() if file_name: region_map, region_file_name = get_region_mapping(user_id, project_id, project, file_name, sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") data_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / file_name) build_item_table(item_table, wikifier_output_filepath, data_filepath, sheet_name) response.update(item_table.get_region_qnodes()) update_wikifier_region_file(user_id, project_id, region_file_name, response) project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][file_name] = dict() project_meta["wikifierRegionMapping"][file_name][sheet_name] = region_file_name response['error'] = error return json.dumps(response, indent=3)
def upload_excel(): """ This function uploads the data file :return: """ user_id = request.form["id"] is_new_upload = True if request.form["is_new_upload"] == "True" else False user = app.config['users'].get_user(user_id) sheet_name = request.form.get("sheet_name") user.reset('yaml') if is_new_upload: user.reset('excel') user.get_wikifier_output_data().reset() user.get_wikifier_output_data().reset_item_table() os.makedirs("uploads", exist_ok=True) response = excel_uploader(user, sheet_name) excel_data_filepath = user.get_excel_data().get_file_location() wikifier_output_filepath = user.get_wikifier_output_data().get_file_location() if excel_data_filepath and excel_data_filepath and wikifier_output_filepath: item_table = user.get_wikifier_output_data().get_item_table() if not item_table: item_table = ItemTable() user.get_wikifier_output_data().set_item_table(item_table) build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name) response.update(item_table.get_region_qnodes()) return json.dumps(response, indent=3)
def get_project_files(): """ This function fetches the last session of the last opened files in a project when that project is reopened later. :return: """ response = { "tableData": None, "yamlData": None, "wikifierData": None, "settings": {"endpoint": None} } if 'uid' in session: user_id = session["uid"] project_id = request.form['pid'] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_id, sheet_name = project.get_current_file_and_sheet() if data_file_id: file_extension = get_file_extension(data_file_id) response["tableData"] = dict() response["tableData"]["isCSV"] = True if file_extension.lower() == "csv" else False response["tableData"]["filename"] = project.get_file_name_by_id(data_file_id) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_id) response["tableData"].update(excel_to_json(data_file_path, sheet_name, True)) if response["tableData"]["isCSV"]: response["tableData"]["currSheetName"] = None response["tableData"]["sheetNames"] = None else: response["tableData"] = None wikifier_config_file_name = project.get_wikifier_region_filename() if wikifier_config_file_name: wikifier_config = deserialize_wikifier_config(user_id, project_id, wikifier_config_file_name) item_table = ItemTable(wikifier_config) region_qnodes = item_table.get_region_qnodes() response["wikifierData"] = region_qnodes else: response["wikifierData"] = None item_table = ItemTable() yaml_file_id = project.get_yaml_file_id(data_file_id, sheet_name) if yaml_file_id: response["yamlData"] = dict() yaml_file_name = yaml_file_id + ".yaml" yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path) if data_file_id: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_id) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, sheet_name, region, template) else: response["yamlData"] = None response["settings"]["endpoint"] = project.get_sparql_endpoint() response_json = json.dumps(response) return response_json
def build_item_table(item_table: ItemTable, wikifier_output_filepath: str, excel_data_filepath: str, sheet_name: str) -> ItemTable: """ This function builds the ItemTable using the wikified output file uploaded by the user :param item_table: :param wikifier_output_filepath: :param excel_data_filepath: :param sheet_name: :return: """ if excel_data_filepath: item_table.generate_hash_tables(wikifier_output_filepath, excel_data_filepath, sheet_name) return item_table
def wikifier(item_table: ItemTable, region: str, excel_filepath: str, sheet_name: str) -> dict: """ This function processes the calls to the wikifier service and adds the output to the ItemTable object :param item_table: :param region: :param excel_filepath: :param sheet_name: :return: """ if not item_table: item_table = ItemTable() cell_qnode_map = wikify_region(region, excel_filepath, sheet_name) item_table.add_region(region, cell_qnode_map) return item_table.get_region_qnodes()
def downloader(): """ This functions initiates the download :return: """ user_id = session["uid"] filetype = request.form["type"] project_id = request.form["pid"] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name, sheet_name = project.get_current_file_and_sheet() data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name) yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() region_map, region_file_name = get_region_mapping(user_id, project_id, project) item_table = ItemTable(region_map) sparql_endpoint = project.get_sparql_endpoint() response = generate_download_file(user_id, item_table, data_file_path, sheet_name, region, template, filetype, sparql_endpoint) return json.dumps(response, indent=3)
def get_cell_statement(): """ This function returns the statement of a particular cell :return: """ user_id = session["uid"] project_id = request.form["pid"] column = get_excel_column_index(request.form["col"]) row = get_excel_row_index(request.form["row"]) project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name, sheet_name = project.get_current_file_and_sheet() yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name) if yaml_file_id: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) yaml_config = load_yaml_config(yaml_config_file_path) data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) template = yaml_config.get_template() region = yaml_config.get_region() region_map, region_file_name = get_region_mapping(user_id, project_id, project) item_table = ItemTable(region_map) sparql_endpoint = project.get_sparql_endpoint() data = resolve_cell(item_table, data_file_path, sheet_name, region, template, column, row, sparql_endpoint) else: data = {"error": "YAML file not found"} return json.dumps(data)
def upload_yaml(): """ This function process the yaml :return: """ user_id = session['uid'] project_id = request.form['pid'] yaml_data = request.form["yaml"] project_meta = dict() project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name, sheet_name = project.get_current_file_and_sheet() yaml_configuration = YAMLFile() data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) response = {'error': None} if check_if_string_is_invalid(yaml_data): response['error'] = "YAML file is either empty or not valid" else: yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name) if not yaml_file_id: yaml_file_id = generate_id() yaml_file_name = yaml_file_id + ".yaml" yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) with open(yaml_file_path, "w", newline='') as f: f.write(yaml_data) yaml_configuration.set_file_location(yaml_file_path) project.add_yaml_file(data_file_name, sheet_name, yaml_file_id) if data_file_name: wikifier_config_file_name = project.get_or_create_wikifier_region_filename( data_file_name, sheet_name) wikifier_config = deserialize_wikifier_config( user_id, project_id, wikifier_config_file_name) item_table = ItemTable(wikifier_config) region, template, created_by = load_yaml_data( yaml_file_path, item_table, data_file_path, sheet_name) yaml_configuration.set_region(region) yaml_configuration.set_template(template) yaml_configuration.set_created_by(created_by) save_yaml_config(yaml_config_file_path, yaml_configuration) template = yaml_configuration.get_template() response['yamlRegions'] = highlight_region(item_table, data_file_path, sheet_name, region, template) project_meta["yamlMapping"] = dict() project_meta["yamlMapping"][data_file_name] = dict() project_meta["yamlMapping"][data_file_name][ sheet_name] = yaml_file_id project.update_project_config(project_meta) else: response['yamlRegions'] = None response['error'] = "Upload data file before applying YAML." return json.dumps(response, indent=3)
def run_t2wml( data_file_path: str, wikified_output_path: str, t2wml_spec: str, output_directory: str, sheet_name: str = None, sparql_endpoint: str = "http://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql"): try: yaml_configuration = YAMLFile() yaml_configuration.set_file_location(t2wml_spec) region, template = load_yaml_data(t2wml_spec) yaml_configuration.set_region(region) yaml_configuration.set_template(template) except: logging.error("Invalid YAML File") return try: item_table = ItemTable() build_item_table(item_table, wikified_output_path, data_file_path, sheet_name) except: logging.error("Invalid Wikfied Output File") return filetype = "ttl" response = generate_download_file(None, item_table, data_file_path, sheet_name, region, template, filetype, sparql_endpoint) file_name = Path(data_file_path).name result_directory = '.'.join(file_name.split(".")[:-1]) try: file_extension = file_name.split(".")[-1] except: logging.error("Data file has no extension") return output_path = Path() if file_extension == "csv": output_path = Path(output_directory) / result_directory elif file_extension == "xls" or file_extension == "xlsx": if not sheet_name: sheet_name = get_first_sheet_name(data_file_path) output_path = Path(output_directory) / result_directory / sheet_name Path.mkdir(output_path, parents=True, exist_ok=True) with open(str(output_path / "results.ttl"), "w") as fp: fp.write(response["data"]) with open(str(output_path / "changes.tsv"), "w") as fp: serialize_change_record(fp)
def upload_wikified_output(): """ This function uploads the wikifier output :return: """ user_id = request.form["id"] os.makedirs("uploads", exist_ok=True) user = app.config['users'].get_user(user_id) user.reset('wikifier_output') response = wikified_output_uploader(user) excel_data_filepath = user.get_excel_data().get_file_location() sheet_name = user.get_excel_data().get_sheet_name() wikifier_output_filepath = user.get_wikifier_output_data().get_file_location() if excel_data_filepath and excel_data_filepath: item_table = user.get_wikifier_output_data().get_item_table() if not item_table: item_table = ItemTable() user.get_wikifier_output_data().set_item_table(item_table) build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name) response = item_table.get_region_qnodes() return json.dumps(response, indent=3)
def wikify_region(): """ This function perfoms three tasks; calls the wikifier service to wikifiy a region, delete a region's wikification result and update the wikification result. :return: """ user_id = session["uid"] project_id = request.form["pid"] action = request.form["action"] region = request.form["region"] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name, sheet_name = project.get_current_file_and_sheet() data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) region_map, region_file_name = get_region_mapping(user_id, project_id, project) item_table = ItemTable(region_map) data = dict() if action == "add_region": if not data_file_path: data['error'] = "No excel file to wikify" else: data = wikifier(item_table, region, data_file_path, sheet_name) wikifier_region_file_name = project.get_or_create_wikifier_region_filename() update_wikifier_region_file(user_id, project_id, wikifier_region_file_name, data) elif action == "delete_region": item_table.delete_region(region) data = item_table.get_region_qnodes() wikifier_region_file_name = project.get_or_create_wikifier_region_filename() update_wikifier_region_file(user_id, project_id, wikifier_region_file_name, data) elif action == "update_qnode": cell = request.form["cell"] qnode = request.form["qnode"] apply_to = int(request.form["apply_to"]) if apply_to == 0: item_table.update_cell(region, cell, qnode) elif apply_to == 1: item_table.update_all_cells_within_region(region, cell, qnode, data_file_path, sheet_name) elif apply_to == 2: item_table.update_all_cells_in_all_region(cell, qnode, data_file_path, sheet_name) data = item_table.get_region_qnodes() wikifier_region_file_name = project.get_or_create_wikifier_region_filename() update_wikifier_region_file(user_id, project_id, wikifier_region_file_name, data) if 'error' not in data: data['error'] = None project_meta = dict() project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][data_file_name] = dict() project_meta["wikifierRegionMapping"][data_file_name][sheet_name] = region_file_name project.update_project_config(project_meta) return json.dumps(data, indent=3)
def change_sheet(): """ This route is used when a user switches a sheet in an excel data file. :return: """ if 'uid' in session: response = { "tableData": dict(), "wikifierData": dict(), "yamlData": dict(), "error": None } project_meta = dict() user_id = session['uid'] new_sheet_name = request.form['sheet_name'] project_id = request.form['pid'] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_id, current_sheet_name = project.get_current_file_and_sheet() data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_id) data = excel_to_json(data_file_path, new_sheet_name) table_data = response["tableData"] table_data["filename"] = project.get_file_name_by_id(data_file_id) table_data["isCSV"] = False # because CSVs don't have sheets table_data["sheetNames"] = data["sheetNames"] table_data["currSheetName"] = data["currSheetName"] table_data["sheetData"] = data["sheetData"] project_meta["currentSheetName"] = data["currSheetName"] add_excel_file_to_bindings(data_file_path, new_sheet_name) region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_id, new_sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") if Path(wikifier_output_filepath).exists(): build_item_table(item_table, wikifier_output_filepath, data_file_path, new_sheet_name) region_qnodes = item_table.get_region_qnodes() response["wikifierData"] = region_qnodes project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][data_file_id] = dict() project_meta["wikifierRegionMapping"][data_file_id][new_sheet_name] = region_file_name update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes) yaml_file_id = project.get_yaml_file_id(data_file_id, new_sheet_name) if yaml_file_id: response["yamlData"] = dict() yaml_file_name = yaml_file_id + ".yaml" yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path) if data_file_id: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str( Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_id) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, new_sheet_name, region, template) project_meta["yamlMapping"] = dict() project_meta["yamlMapping"][data_file_id] = dict() project_meta["yamlMapping"][data_file_id][data["currSheetName"]] = yaml_file_id else: response["yamlData"] = None project.update_project_config(project_meta) return json.dumps(response, indent=3)
def upload_data_file(): """ This function uploads the data file :return: """ if 'uid' in session: response = { "tableData": dict(), "wikifierData": dict(), "yamlData": dict(), "error": None } project_meta = dict() user_id = session['uid'] project_id = request.form['pid'] data = data_file_uploader(user_id, project_id) if data["error"]: response["error"] = data["error"] else: table_data = response["tableData"] curr_data_file_id = data["currentDataFile"] project_meta["currentDataFile"] = curr_data_file_id curr_data_file_name = data["dataFileMapping"][curr_data_file_id] project_meta["dataFileMapping"] = data["dataFileMapping"] project_meta["mdate"] = int(time() * 1000) table_data["filename"] = curr_data_file_name table_data["isCSV"] = data["isCSV"] if not table_data["isCSV"]: table_data["sheetNames"] = data["sheetNames"] table_data["currSheetName"] = data["currSheetName"] project_meta["currentSheetName"] = data["currSheetName"] else: table_data["sheetNames"] = None table_data["currSheetName"] = None project_meta["currentSheetName"] = curr_data_file_id table_data["sheetData"] = data["sheetData"] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name = curr_data_file_id sheet_name = project_meta["currentSheetName"] region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_name, sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) add_excel_file_to_bindings(data_file_path, sheet_name) if Path(wikifier_output_filepath).exists(): build_item_table(item_table, wikifier_output_filepath, data_file_path, sheet_name) region_qnodes = item_table.get_region_qnodes() response["wikifierData"] = region_qnodes project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][data_file_name] = dict() project_meta["wikifierRegionMapping"][data_file_name][sheet_name] = region_file_name update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes) yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name) if yaml_file_id: response["yamlData"] = dict() yaml_file_name = yaml_file_id + ".yaml" yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path) if data_file_name: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str( Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_name) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, sheet_name, region, template) project_meta["yamlMapping"] = dict() project_meta["yamlMapping"][data_file_name] = dict() project_meta["yamlMapping"][data_file_name][data["currSheetName"]] = yaml_file_id else: response["yamlData"] = None project.update_project_config(project_meta) return json.dumps(response, indent=3) else: return redirect(url_for('index'))
def wikify_region(): """ This function perfoms three tasks; calls the wikifier service to wikifiy a region, delete a region's wikification result and update the wikification result. :return: """ user_id = request.form["id"] action = request.form["action"] region = request.form["region"] user = app.config['users'].get_user(user_id) data = "" if action == "add_region": excel_filepath = user.get_excel_data().get_file_location() sheet_name = user.get_excel_data().get_sheet_name() item_table = user.get_wikifier_output_data().get_item_table() if not item_table: item_table = ItemTable() user.get_wikifier_output_data().set_item_table(item_table) if not excel_filepath: data = "No excel file to wikify" else: data = wikifier(item_table, region, excel_filepath, sheet_name) elif action == "delete_region": item_table = user.get_wikifier_output_data().get_item_table() item_table.delete_region(region) data = item_table.get_region_qnodes() elif action == "update_qnode": cell = request.form["cell"] qnode = request.form["qnode"] apply_to = int(request.form["apply_to"]) item_table = user.get_wikifier_output_data().get_item_table() if apply_to == 0: item_table.update_cell(region, cell, qnode) elif apply_to == 1: excel_filepath = user.get_excel_data().get_file_location() sheet_name = user.get_excel_data().get_sheet_name() item_table.update_all_cells_within_region(region, cell, qnode, excel_filepath, sheet_name) elif apply_to == 2: excel_filepath = user.get_excel_data().get_file_location() sheet_name = user.get_excel_data().get_sheet_name() item_table.update_all_cells_in_all_region(cell, qnode, excel_filepath, sheet_name) data = item_table.get_region_qnodes() return json.dumps(data, indent=3)