Ejemplo n.º 1
0
def upload_wikifier_output():
	"""
	This function uploads the wikifier output
	:return:
	"""
	if 'uid' in session:
		response = dict()
		user_id = session['uid']
		project_id = request.form['pid']
		project_meta = dict()
		error = wikified_output_uploader(user_id, project_id)
		project_config_path = get_project_config_path(user_id, project_id)
		project = Project(project_config_path)
		file_name, sheet_name = project.get_current_file_and_sheet()
		if file_name:
			region_map, region_file_name = get_region_mapping(user_id, project_id, project, file_name, sheet_name)
			item_table = ItemTable(region_map)
			wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv")
			data_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / file_name)
			build_item_table(item_table, wikifier_output_filepath, data_filepath, sheet_name)
			response.update(item_table.get_region_qnodes())
			update_wikifier_region_file(user_id, project_id, region_file_name, response)
			project_meta["wikifierRegionMapping"] = dict()
			project_meta["wikifierRegionMapping"][file_name] = dict()
			project_meta["wikifierRegionMapping"][file_name][sheet_name] = region_file_name
		response['error'] = error
		return json.dumps(response, indent=3)
Ejemplo n.º 2
0
def upload_excel():
	"""
	This function uploads the data file
	:return:
	"""
	user_id = request.form["id"]
	is_new_upload = True if request.form["is_new_upload"] == "True" else False
	user = app.config['users'].get_user(user_id)
	sheet_name = request.form.get("sheet_name")

	user.reset('yaml')
	if is_new_upload:
		user.reset('excel')
		user.get_wikifier_output_data().reset()
	user.get_wikifier_output_data().reset_item_table()
	os.makedirs("uploads", exist_ok=True)
	response = excel_uploader(user, sheet_name)
	excel_data_filepath = user.get_excel_data().get_file_location()
	wikifier_output_filepath = user.get_wikifier_output_data().get_file_location()
	if excel_data_filepath and excel_data_filepath and wikifier_output_filepath:
		item_table = user.get_wikifier_output_data().get_item_table()
		if not item_table:
			item_table = ItemTable()
			user.get_wikifier_output_data().set_item_table(item_table)
		build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name)
		response.update(item_table.get_region_qnodes())

	return json.dumps(response, indent=3)
Ejemplo n.º 3
0
def run_t2wml(
    data_file_path: str,
    wikified_output_path: str,
    t2wml_spec: str,
    output_directory: str,
    sheet_name: str = None,
    sparql_endpoint:
    str = "http://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql"):
    try:
        yaml_configuration = YAMLFile()
        yaml_configuration.set_file_location(t2wml_spec)
        region, template = load_yaml_data(t2wml_spec)
        yaml_configuration.set_region(region)
        yaml_configuration.set_template(template)
    except:
        logging.error("Invalid YAML File")
        return

    try:
        item_table = ItemTable()
        build_item_table(item_table, wikified_output_path, data_file_path,
                         sheet_name)
    except:
        logging.error("Invalid Wikfied Output File")
        return

    filetype = "ttl"

    response = generate_download_file(None, item_table, data_file_path,
                                      sheet_name, region, template, filetype,
                                      sparql_endpoint)
    file_name = Path(data_file_path).name
    result_directory = '.'.join(file_name.split(".")[:-1])
    try:
        file_extension = file_name.split(".")[-1]
    except:
        logging.error("Data file has no extension")
        return

    output_path = Path()
    if file_extension == "csv":
        output_path = Path(output_directory) / result_directory
    elif file_extension == "xls" or file_extension == "xlsx":
        if not sheet_name:
            sheet_name = get_first_sheet_name(data_file_path)
        output_path = Path(output_directory) / result_directory / sheet_name

    Path.mkdir(output_path, parents=True, exist_ok=True)

    with open(str(output_path / "results.ttl"), "w") as fp:
        fp.write(response["data"])

    with open(str(output_path / "changes.tsv"), "w") as fp:
        serialize_change_record(fp)
Ejemplo n.º 4
0
def upload_wikified_output():
	"""
	This function uploads the wikifier output
	:return:
	"""
	user_id = request.form["id"]
	os.makedirs("uploads", exist_ok=True)
	user = app.config['users'].get_user(user_id)
	user.reset('wikifier_output')
	response = wikified_output_uploader(user)
	excel_data_filepath = user.get_excel_data().get_file_location()
	sheet_name = user.get_excel_data().get_sheet_name()
	wikifier_output_filepath = user.get_wikifier_output_data().get_file_location()
	if excel_data_filepath and excel_data_filepath:
		item_table = user.get_wikifier_output_data().get_item_table()
		if not item_table:
			item_table = ItemTable()
			user.get_wikifier_output_data().set_item_table(item_table)
		build_item_table(item_table, wikifier_output_filepath, excel_data_filepath, sheet_name)
		response = item_table.get_region_qnodes()
	return json.dumps(response, indent=3)
Ejemplo n.º 5
0
def change_sheet():
	"""
	This route is used when a user switches a sheet in an excel data file.
	:return:
	"""
	if 'uid' in session:
		response = {
					"tableData": dict(),
					"wikifierData": dict(),
					"yamlData": dict(),
					"error": None
				}
		project_meta = dict()
		user_id = session['uid']
		new_sheet_name = request.form['sheet_name']
		project_id = request.form['pid']
		project_config_path = get_project_config_path(user_id, project_id)
		project = Project(project_config_path)
		data_file_id, current_sheet_name = project.get_current_file_and_sheet()
		data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_id)
		data = excel_to_json(data_file_path, new_sheet_name)
		table_data = response["tableData"]
		table_data["filename"] = project.get_file_name_by_id(data_file_id)
		table_data["isCSV"] = False # because CSVs don't have sheets
		table_data["sheetNames"] = data["sheetNames"]
		table_data["currSheetName"] = data["currSheetName"]
		table_data["sheetData"] = data["sheetData"]
		project_meta["currentSheetName"] = data["currSheetName"]

		add_excel_file_to_bindings(data_file_path, new_sheet_name)

		region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_id, new_sheet_name)
		item_table = ItemTable(region_map)
		wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv")
		if Path(wikifier_output_filepath).exists():
			build_item_table(item_table, wikifier_output_filepath, data_file_path, new_sheet_name)
		region_qnodes = item_table.get_region_qnodes()
		response["wikifierData"] = region_qnodes
		project_meta["wikifierRegionMapping"] = dict()
		project_meta["wikifierRegionMapping"][data_file_id] = dict()
		project_meta["wikifierRegionMapping"][data_file_id][new_sheet_name] = region_file_name
		update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes)

		yaml_file_id = project.get_yaml_file_id(data_file_id, new_sheet_name)
		if yaml_file_id:
			response["yamlData"] = dict()
			yaml_file_name = yaml_file_id + ".yaml"
			yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name)
			response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path)
			if data_file_id:
				yaml_config_file_name = yaml_file_id + ".pickle"
				yaml_config_file_path = str(
					Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name)
				data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_id)

				yaml_config = load_yaml_config(yaml_config_file_path)
				template = yaml_config.get_template()
				region = yaml_config.get_region()
				response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, new_sheet_name, region, template)
				project_meta["yamlMapping"] = dict()
				project_meta["yamlMapping"][data_file_id] = dict()
				project_meta["yamlMapping"][data_file_id][data["currSheetName"]] = yaml_file_id
		else:
			response["yamlData"] = None

		project.update_project_config(project_meta)
		return json.dumps(response, indent=3)
Ejemplo n.º 6
0
def upload_data_file():
	"""
	This function uploads the data file
	:return:
	"""
	if 'uid' in session:
		response = {
					"tableData": dict(),
					"wikifierData": dict(),
					"yamlData": dict(),
					"error": None
				}
		project_meta = dict()
		user_id = session['uid']
		project_id = request.form['pid']
		data = data_file_uploader(user_id, project_id)
		if data["error"]:
			response["error"] = data["error"]
		else:
			table_data = response["tableData"]
			curr_data_file_id = data["currentDataFile"]
			project_meta["currentDataFile"] = curr_data_file_id
			curr_data_file_name = data["dataFileMapping"][curr_data_file_id]
			project_meta["dataFileMapping"] = data["dataFileMapping"]
			project_meta["mdate"] = int(time() * 1000)
			table_data["filename"] = curr_data_file_name
			table_data["isCSV"] = data["isCSV"]
			if not table_data["isCSV"]:
				table_data["sheetNames"] = data["sheetNames"]
				table_data["currSheetName"] = data["currSheetName"]
				project_meta["currentSheetName"] = data["currSheetName"]
			else:
				table_data["sheetNames"] = None
				table_data["currSheetName"] = None
				project_meta["currentSheetName"] = curr_data_file_id
			table_data["sheetData"] = data["sheetData"]

		project_config_path = get_project_config_path(user_id, project_id)
		project = Project(project_config_path)

		data_file_name = curr_data_file_id
		sheet_name = project_meta["currentSheetName"]
		region_map, region_file_name = get_region_mapping(user_id, project_id, project, data_file_name, sheet_name)
		item_table = ItemTable(region_map)
		wikifier_output_filepath = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "wf" / "other.csv")
		data_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "df" / data_file_name)

		add_excel_file_to_bindings(data_file_path, sheet_name)


		if Path(wikifier_output_filepath).exists():
			build_item_table(item_table, wikifier_output_filepath, data_file_path, sheet_name)
		region_qnodes = item_table.get_region_qnodes()
		response["wikifierData"] = region_qnodes
		project_meta["wikifierRegionMapping"] = dict()
		project_meta["wikifierRegionMapping"][data_file_name] = dict()
		project_meta["wikifierRegionMapping"][data_file_name][sheet_name] = region_file_name
		update_wikifier_region_file(user_id, project_id, region_file_name, region_qnodes)

		yaml_file_id = project.get_yaml_file_id(data_file_name, sheet_name)
		if yaml_file_id:
			response["yamlData"] = dict()
			yaml_file_name = yaml_file_id + ".yaml"
			yaml_file_path = str(Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_file_name)
			response["yamlData"]["yamlFileContent"] = read_file(yaml_file_path)
			if data_file_name:
				yaml_config_file_name = yaml_file_id + ".pickle"
				yaml_config_file_path = str(
					Path.cwd() / "config" / "uploads" / user_id / project_id / "yf" / yaml_config_file_name)
				data_file_path = str(Path(app.config['UPLOAD_FOLDER']) / user_id / project_id / "df" / data_file_name)

				yaml_config = load_yaml_config(yaml_config_file_path)
				template = yaml_config.get_template()
				region = yaml_config.get_region()
				response["yamlData"]['yamlRegions'] = highlight_region(item_table, data_file_path, sheet_name, region, template)
				project_meta["yamlMapping"] = dict()
				project_meta["yamlMapping"][data_file_name] = dict()
				project_meta["yamlMapping"][data_file_name][data["currSheetName"]] = yaml_file_id
		else:
			response["yamlData"] = None

		project.update_project_config(project_meta)
		return json.dumps(response, indent=3)
	else:
		return redirect(url_for('index'))