def upload_wikifier_output(): """ This function uploads the wikifier output :return: """ if 'uid' in session: response = dict() user_id = session['uid'] project_id = request.form['pid'] project_meta = dict() error = wikified_output_uploader(user_id, project_id) project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) file_name, sheet_name = project.get_current_file_and_sheet() if file_name: region_map, region_file_name = get_region_mapping(user_id, project_id, project, file_name, sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") data_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / file_name) build_item_table(item_table, wikifier_output_filepath, data_filepath, sheet_name) response.update(item_table.get_region_qnodes()) update_wikifier_region_file(user_id, project_id, region_file_name, response) project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][file_name] = dict() project_meta["wikifierRegionMapping"][file_name][sheet_name] = region_file_name response['error'] = error return json.dumps(response, indent=3)
def upload_excel(): """ This function uploads the data file :return: """ user_id = request.form["id"] is_new_upload = True if request.form["is_new_upload"] == "True" else False user = app.config['users'].get_user(user_id) sheet_name = request.form.get("sheet_name") user.reset('yaml') if is_new_upload: user.reset('excel') user.get_wikifier_output_data().reset() user.get_wikifier_output_data().reset_item_table() os.makedirs("uploads", exist_ok=True) response = excel_uploader(user, sheet_name) excel_data_filepath = user.get_excel_data().get_file_location() wikifier_output_filepath = user.get_wikifier_output_data().get_file_location() if excel_data_filepath and excel_data_filepath and wikifier_output_filepath: item_table = user.get_wikifier_output_data().get_item_table() if not item_table: item_table = ItemTable() user.get_wikifier_output_data().set_item_table(item_table) build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name) response.update(item_table.get_region_qnodes()) return json.dumps(response, indent=3)
def run_t2wml( data_file_path: str, wikified_output_path: str, t2wml_spec: str, output_directory: str, sheet_name: str = None, sparql_endpoint: str = "http://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql"): try: yaml_configuration = YAMLFile() yaml_configuration.set_file_location(t2wml_spec) region, template = load_yaml_data(t2wml_spec) yaml_configuration.set_region(region) yaml_configuration.set_template(template) except: logging.error("Invalid YAML File") return try: item_table = ItemTable() build_item_table(item_table, wikified_output_path, data_file_path, sheet_name) except: logging.error("Invalid Wikfied Output File") return filetype = "ttl" response = generate_download_file(None, item_table, data_file_path, sheet_name, region, template, filetype, sparql_endpoint) file_name = Path(data_file_path).name result_directory = '.'.join(file_name.split(".")[:-1]) try: file_extension = file_name.split(".")[-1] except: logging.error("Data file has no extension") return output_path = Path() if file_extension == "csv": output_path = Path(output_directory) / result_directory elif file_extension == "xls" or file_extension == "xlsx": if not sheet_name: sheet_name = get_first_sheet_name(data_file_path) output_path = Path(output_directory) / result_directory / sheet_name Path.mkdir(output_path, parents=True, exist_ok=True) with open(str(output_path / "results.ttl"), "w") as fp: fp.write(response["data"]) with open(str(output_path / "changes.tsv"), "w") as fp: serialize_change_record(fp)
def upload_wikified_output(): """ This function uploads the wikifier output :return: """ user_id = request.form["id"] os.makedirs("uploads", exist_ok=True) user = app.config['users'].get_user(user_id) user.reset('wikifier_output') response = wikified_output_uploader(user) excel_data_filepath = user.get_excel_data().get_file_location() sheet_name = user.get_excel_data().get_sheet_name() wikifier_output_filepath = user.get_wikifier_output_data().get_file_location() if excel_data_filepath and excel_data_filepath: item_table = user.get_wikifier_output_data().get_item_table() if not item_table: item_table = ItemTable() user.get_wikifier_output_data().set_item_table(item_table) build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name) response = item_table.get_region_qnodes() return json.dumps(response, indent=3)
def change_sheet(): """ This route is used when a user switches a sheet in an excel data file. :return: """ if 'uid' in session: response = { "tableData": dict(), "wikifierData": dict(), "yamlData": dict(), "error": None } project_meta = dict() user_id = session['uid'] new_sheet_name = request.form['sheet_name'] project_id = request.form['pid'] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_id, current_sheet_name = project.get_current_file_and_sheet() data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_id) data = excel_to_json(data_file_path, new_sheet_name) table_data = response["tableData"] table_data["filename"] = project.get_file_name_by_id(data_file_id) table_data["isCSV"] = False # because CSVs don't have sheets table_data["sheetNames"] = data["sheetNames"] table_data["currSheetName"] = data["currSheetName"] table_data["sheetData"] = data["sheetData"] project_meta["currentSheetName"] = data["currSheetName"] add_excel_file_to_bindings(data_file_path, new_sheet_name) region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_id, new_sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") if Path(wikifier_output_filepath).exists(): build_item_table(item_table, wikifier_output_filepath, data_file_path, new_sheet_name) region_qnodes = item_table.get_region_qnodes() response["wikifierData"] = region_qnodes project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][data_file_id] = dict() project_meta["wikifierRegionMapping"][data_file_id][new_sheet_name] = region_file_name update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes) yaml_file_id = project.get_yaml_file_id(data_file_id, new_sheet_name) if yaml_file_id: response["yamlData"] = dict() yaml_file_name = yaml_file_id + ".yaml" yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path) if data_file_id: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str( Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_id) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, new_sheet_name, region, template) project_meta["yamlMapping"] = dict() project_meta["yamlMapping"][data_file_id] = dict() project_meta["yamlMapping"][data_file_id][data["currSheetName"]] = yaml_file_id else: response["yamlData"] = None project.update_project_config(project_meta) return json.dumps(response, indent=3)
def upload_data_file(): """ This function uploads the data file :return: """ if 'uid' in session: response = { "tableData": dict(), "wikifierData": dict(), "yamlData": dict(), "error": None } project_meta = dict() user_id = session['uid'] project_id = request.form['pid'] data = data_file_uploader(user_id, project_id) if data["error"]: response["error"] = data["error"] else: table_data = response["tableData"] curr_data_file_id = data["currentDataFile"] project_meta["currentDataFile"] = curr_data_file_id curr_data_file_name = data["dataFileMapping"][curr_data_file_id] project_meta["dataFileMapping"] = data["dataFileMapping"] project_meta["mdate"] = int(time() * 1000) table_data["filename"] = curr_data_file_name table_data["isCSV"] = data["isCSV"] if not table_data["isCSV"]: table_data["sheetNames"] = data["sheetNames"] table_data["currSheetName"] = data["currSheetName"] project_meta["currentSheetName"] = data["currSheetName"] else: table_data["sheetNames"] = None table_data["currSheetName"] = None project_meta["currentSheetName"] = curr_data_file_id table_data["sheetData"] = data["sheetData"] project_config_path = get_project_config_path(user_id, project_id) project = Project(project_config_path) data_file_name = curr_data_file_id sheet_name = project_meta["currentSheetName"] region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_name, sheet_name) item_table = ItemTable(region_map) wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv") data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name) add_excel_file_to_bindings(data_file_path, sheet_name) if Path(wikifier_output_filepath).exists(): build_item_table(item_table, wikifier_output_filepath, data_file_path, sheet_name) region_qnodes = item_table.get_region_qnodes() response["wikifierData"] = region_qnodes project_meta["wikifierRegionMapping"] = dict() project_meta["wikifierRegionMapping"][data_file_name] = dict() project_meta["wikifierRegionMapping"][data_file_name][sheet_name] = region_file_name update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes) yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name) if yaml_file_id: response["yamlData"] = dict() yaml_file_name = yaml_file_id + ".yaml" yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name) response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path) if data_file_name: yaml_config_file_name = yaml_file_id + ".pickle" yaml_config_file_path = str( Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name) data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_name) yaml_config = load_yaml_config(yaml_config_file_path) template = yaml_config.get_template() region = yaml_config.get_region() response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, sheet_name, region, template) project_meta["yamlMapping"] = dict() project_meta["yamlMapping"][data_file_name] = dict() project_meta["yamlMapping"][data_file_name][data["currSheetName"]] = yaml_file_id else: response["yamlData"] = None project.update_project_config(project_meta) return json.dumps(response, indent=3) else: return redirect(url_for('index'))