Ejemplo n.º 1
0
    def generate_hash_tables(self,
                             file_path: str,
                             excel_filepath: str,
                             sheet_name: str = None,
                             header: bool = True) -> None:
        """
		This function processes the wikified output file uploaded by the user to build self.other dictionary
		:param file_path:
		:param excel_filepath:
		:param sheet_name:
		:param header:
		:return:
		"""
        cell_to_qnode = dict()
        value_to_qnode = dict()
        with open(file_path, encoding='utf-8') as file:
            csv_reader = csv.reader(file, delimiter=',')
            for row in csv_reader:
                if header:
                    header = False
                    continue
                if not check_if_empty(row[0]) and not check_if_empty(row[1]):
                    cell_to_qnode[(int(row[0]), int(row[1]))] = row[3]
                if row[2] is not None:
                    value_to_qnode[str(row[2]).strip()] = row[3]

        sheet = pyexcel.get_sheet(sheet_name=sheet_name,
                                  file_name=excel_filepath)
        for cell, qnode in cell_to_qnode.items():
            try:
                cell_value = str(sheet[cell[1], cell[0]]).strip()
                if not check_if_empty(
                        cell_value) and cell_value not in value_to_qnode:
                    value_to_qnode[cell_value] = qnode
            except IndexError:
                pass

        for row in range(len(sheet)):
            for col in range(len(sheet[0])):
                try:
                    cell_value = str(sheet[row, col]).strip()
                    if value_to_qnode.get(cell_value, None):
                        cell_to_qnode[(col, row)] = value_to_qnode[cell_value]
                except IndexError:
                    pass
        cell_to_qnode = self.serialize_cell_to_qnode(cell_to_qnode)
        self.other["qnodes"] = cell_to_qnode
        self.other["region"] = list(cell_to_qnode.keys())
Ejemplo n.º 2
0
def wikify_region(region: str,
                  excel_filepath: str,
                  sheet_name: str = None) -> dict:
    """
	This function parses the cell range, creates the temporary csv file and calls the wikifier service on that csv
	to get the cell qnode map. cell qnode map is then processed to omit non empty cells and is then returned.
	:param region:
	:param excel_filepath:
	:param sheet_name:
	:return:
	"""
    cell_range = parse_cell_range(region)
    file_path = create_temporary_csv_file(cell_range, excel_filepath,
                                          sheet_name)
    cell_qnode_map = call_wikifiy_service(file_path, cell_range[0][0],
                                          cell_range[0][1])
    response = dict()
    sheet = pyexcel.get_sheet(sheet_name=sheet_name, file_name=excel_filepath)
    for col in range(cell_range[0][0], cell_range[1][0] + 1):
        for row in range(cell_range[0][1], cell_range[1][1] + 1):
            try:
                cell_index = get_actual_cell_index((col, row))
                if not check_if_empty(sheet[row, col]):
                    if cell_index in cell_qnode_map:
                        response[cell_index] = cell_qnode_map[cell_index]
                    else:
                        response[cell_index] = ""
            except IndexError:
                pass
            except KeyError:
                pass
    return response
Ejemplo n.º 3
0
def upload_yaml():
	"""
	This function process the yaml
	:return:
	"""
	user_id = request.form["id"]
	yaml_data = request.values["yaml"]

	os.makedirs("uploads", exist_ok=True)
	user = app.config['users'].get_user(user_id)
	user.reset('yaml')
	yaml_configuration = user.get_yaml_data()
	excel_data_filepath = user.get_excel_data().get_file_location()
	response = dict()
	if check_if_empty(yaml_data):
		response['error'] = "YAML file is either empty or not valid"
	else:
		sheet_name = user.get_excel_data().get_sheet_name()
		filename = str(Path(app.config['UPLOAD_FOLDER']) / user_id) + ".yaml"
		with open(filename, "w") as f:
			f.write(yaml_data)
			yaml_configuration.set_file_location(filename)
		region, template = load_yaml_data(filename)
		yaml_configuration.set_region(region)
		yaml_configuration.set_template(template)

		item_table = user.get_wikifier_output_data().get_item_table()
		template = yaml_configuration.get_template()
		response['region'] = highlight_region(item_table, excel_data_filepath, sheet_name, region, template)

	return json.dumps(response, indent=3)
Ejemplo n.º 4
0
def remove_empty_and_invalid_cells(region: Region) -> None:
    """
	This functions searches for empty or invalid strings in the region and remove those cells from the region
	:param region:
	:return:
	"""
    for col in range(bindings["$left"] + 1, bindings["$right"]):
        for row in range(bindings["$top"] + 1, bindings["$bottom"]):
            if check_if_empty(str(bindings['excel_sheet'][row, col])):
                region.add_hole(row, col, col)
Ejemplo n.º 5
0
def excel_uploader(user: UserData, sheet_name: str):
	"""
	This function helps in processing the data file
	:param user:
	:param sheet_name:
	:return:
	"""
	user_data = user.get_excel_data()
	data = {"error": ""}
	if sheet_name and not check_if_empty(user_data.get_file_location()):
		file_path = user_data.get_file_location()
		data = excel_to_json(file_path, sheet_name)
		user_data.set_sheet_name(sheet_name)
	else:
		if 'file' not in request.files:
			data["error"] = 'No file part'
		else:
			file = request.files['file']
			if file.filename == '':
				data["error"] = 'No file selected for uploading'
			if file and allowed_file(file.filename):
				filename = secure_filename(file.filename)
				filename = user.get_user_id() + "_excel_file." + get_file_extension(filename)
				file_path = str(Path(app.config['UPLOAD_FOLDER']) / filename)
				file.save(file_path)
				data = excel_to_json(file_path, sheet_name)
				if not sheet_name:
					try:
						sheet_name = data['sheetNames'][0]
					except KeyError:
						sheet_name = None
				user_data.set_file_location(file_path)
				user_data.set_sheet_name(sheet_name)
			else:
				data["error"] = 'This file type is currently not supported'
	return data