def evaluate_template(template: dict) -> dict: """ This function resolves the template by parsing the T2WML expressions and replacing them by the class trees of those expressions :param template: :return: """ response = dict() for key, value in template.items(): if key == 'qualifier': response[key] = [] for i in range(len(template[key])): temp_dict = dict() for k, v in template[key][i].items(): if isinstance( v, (ItemExpression, ValueExpression, BooleanEquation)): col, row, temp_dict[k] = v.evaluate_and_get_cell( bindings) temp_dict['cell'] = get_actual_cell_index((col, row)) else: temp_dict[k] = v response[key].append(temp_dict) else: if isinstance(value, (ItemExpression, ValueExpression, BooleanEquation)): col, row, response[key] = value.evaluate_and_get_cell(bindings) if key == "item": response['cell'] = get_actual_cell_index((col, row)) else: response[key] = value return response
def generate_download_file(user_id: str, item_table: ItemTable, excel_data_filepath: str, sheet_name: str, region_specification: dict, template: dict, filetype: str, sparql_endpoint: str) -> str: """ This function generates the download files based on the filetype :param user_id: :param item_table: :param excel_data_filepath: :param sheet_name: :param region_specification: :param template: :param filetype: :param sparql_endpoint: :return: """ update_bindings(item_table, region_specification, excel_data_filepath, sheet_name) region = region_specification['region_object'] response = [] error = [] head = region.get_head() bindings["$col"] = head[0] bindings["$row"] = head[1] while region.sheet.get((bindings["$col"], bindings["$row"]), None) is not None: try: statement = evaluate_template(template) response.append({ 'cell': get_actual_cell_index((bindings["$col"], bindings["$row"])), 'statement': statement }) except Exception as e: error.append({ 'cell': get_actual_cell_index((bindings["$col"], bindings["$row"])), 'error': str(e) }) if region.sheet[(bindings["$col"], bindings["$row"])].next is not None: bindings["$col"], bindings["$row"] = region.sheet[( bindings["$col"], bindings["$row"])].next else: bindings["$col"], bindings["$row"] = None, None if filetype == 'json': json_response = json.dumps(response, indent=3) return json_response elif filetype == 'ttl': try: json_response = generate_triples(user_id, response, sparql_endpoint, filetype) return json_response except Exception as e: return str(e)
def wikify_region(region: str, excel_filepath: str, sheet_name: str = None) -> dict: """ This function parses the cell range, creates the temporary csv file and calls the wikifier service on that csv to get the cell qnode map. cell qnode map is then processed to omit non empty cells and is then returned. :param region: :param excel_filepath: :param sheet_name: :return: """ cell_range = parse_cell_range(region) file_path = create_temporary_csv_file(cell_range, excel_filepath, sheet_name) cell_qnode_map = call_wikifiy_service(file_path, cell_range[0][0], cell_range[0][1]) response = dict() sheet = pyexcel.get_sheet(sheet_name=sheet_name, file_name=excel_filepath) for col in range(cell_range[0][0], cell_range[1][0] + 1): for row in range(cell_range[0][1], cell_range[1][1] + 1): try: cell_index = get_actual_cell_index((col, row)) if not check_if_string_is_invalid(sheet[row, col]): if cell_index in cell_qnode_map: response[cell_index] = cell_qnode_map[cell_index] else: response[cell_index] = "" except IndexError: pass except KeyError: pass return response
def call_wikifiy_service(csv_filepath: str, col_offset: int, row_offset: int) -> dict: """ This function calls the wikifier service and creates a cell to qnode dictionary based on the response cell to qnode dictionary = { 'A4': 'Q383', 'B5': 'Q6892' } :param csv_filepath: :param col_offset: :param row_offset: :return: """ cell_qnode_map = dict() files = { 'file': ('', open(csv_filepath, 'r')), 'format': (None, 'ISWC'), 'type': (None, 'text/csv'), 'header': (None, 'False') } response = requests.post('https://dsbox02.isi.edu:8888/wikifier/wikify', files=files) if response.status_code == 200: data = response.content.decode("utf-8") data = json.loads(data)['data'] for line in data: i = line.split(',') cell_qnode_map[get_actual_cell_index( (int(i[0]) + col_offset, int(i[1]) + row_offset))] = i[2] return cell_qnode_map
def get_item(self, column: int, row: int) -> Union[str, Exception]: """ This function searches return the qnode of the value found at (column, row) cell. The catch here is that cell_to_qnode hash table is given preference over value_to_qnode dictionary. :param column: :param row: :return: qnode or exception """ cell_index = get_actual_cell_index((column, row)) if self.region_qnodes['qnodes'].get(cell_index, None): return self.region_qnodes['qnodes'][cell_index] elif self.other["qnodes"].get(cell_index, None): return self.other["qnodes"][cell_index] else: raise Exception('No QNode Exists for the cell: ', get_actual_cell_index((column, row)))
def serialize_cell_to_qnode(self, cell_to_qnode: dict) -> dict: """ This function serializes the cell_to_qnode dictionary :return: """ serialized_dict = dict() for cell, value in cell_to_qnode.items(): cell = get_actual_cell_index(cell) serialized_dict[cell] = value return serialized_dict
def evaluate_template(template: dict) -> dict: """ This function resolves the template by parsing the T2WML expressions and replacing them by the class trees of those expressions :param template: :return: """ response = dict() for key, value in template.items(): if key == 'qualifier': response[key] = [] for i in range(len(template[key])): temp_dict = dict() for k, v in template[key][i].items(): if isinstance(v, (ItemExpression, ValueExpression, BooleanEquation)): col, row, temp_dict[k] = v.evaluate_and_get_cell(bindings) temp_dict['cell'] = get_actual_cell_index((col, row)) else: temp_dict[k] = v if "property" in temp_dict and temp_dict["property"] == "P585": if "format" in temp_dict: try: datetime_string, precision = parse_datetime_string(temp_dict["value"], additional_formats=[temp_dict["format"]]) if "precision" not in temp_dict: temp_dict["precision"] = int(precision.value.__str__()) else: temp_dict["precision"] = translate_precision_to_integer(temp_dict["precision"]) temp_dict["value"] = datetime_string except Exception as e: raise e response[key].append(temp_dict) else: if isinstance(value, (ItemExpression, ValueExpression, BooleanEquation)): col, row, response[key] = value.evaluate_and_get_cell(bindings) if key == "item": response['cell'] = get_actual_cell_index((col, row)) else: response[key] = value return response
def highlight_region(item_table: ItemTable, excel_data_filepath: str, sheet_name: str, region_specification: dict, template: dict) -> dict: """ This function add holes in the region_object and builds up the list of data_region, item_region and qualifier_region :param item_table: :param excel_data_filepath: :param sheet_name: :param region_specification: :param template: :return: """ update_bindings(item_table, region_specification, excel_data_filepath, sheet_name) region = region_specification['region_object'] head = region.get_head() data = { "dataRegion": set(), "item": set(), "qualifierRegion": set(), 'error': dict() } bindings["$col"] = head[0] bindings["$row"] = head[1] try: item = template['item'] except KeyError: item = None try: qualifiers = template['qualifier'] except KeyError: qualifiers = None while region.sheet.get((bindings["$col"], bindings["$row"]), None) is not None: try: data_cell = get_actual_cell_index( (bindings["$col"], bindings["$row"])) data["dataRegion"].add(data_cell) if item and isinstance( item, (ItemExpression, ValueExpression, BooleanEquation)): try: if item.variables: variables = list(item.variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not item.evaluate(bindings): bindings[variables[0]] += 1 col, row, value = item.evaluate_and_get_cell( bindings) item_cell = get_actual_cell_index((col, row)) data["item"].add(item_cell) del bindings[variables[0]] else: item_cell = get_cell(item) item_cell = get_actual_cell_index(item_cell) data["item"].add(item_cell) except AttributeError: pass elif item and isinstance(item, (ColumnExpression, RowExpression)): try: item_cell = get_cell(item) item_cell = get_actual_cell_index(item_cell) data["item"].add(item_cell) except AttributeError: pass if qualifiers: qualifier_cells = set() for qualifier in qualifiers: if isinstance( qualifier["value"], (ItemExpression, ValueExpression, BooleanEquation)): try: if qualifier["value"].variables: variables = list(qualifier["value"].variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not qualifier["value"].evaluate( bindings): bindings[variables[0]] += 1 col, row, value = qualifier[ "value"].evaluate_and_get_cell( bindings) qualifier_cell = get_actual_cell_index( (col, row)) qualifier_cells.add(qualifier_cell) del bindings[variables[0]] else: qualifier_cell = get_cell(qualifier["value"]) qualifier_cell = get_actual_cell_index( qualifier_cell) qualifier_cells.add(qualifier_cell) except AttributeError: pass elif isinstance(qualifier["value"], (ColumnExpression, RowExpression)): try: qualifier_cell = get_cell(qualifier["value"]) qualifier_cell = get_actual_cell_index( qualifier_cell) qualifier_cells.add(qualifier_cell) except AttributeError: pass data["qualifierRegion"] |= qualifier_cells except Exception as e: data['error'][get_actual_cell_index( (bindings["$col"], bindings["$row"]))] = str(e) if region.sheet[(bindings["$col"], bindings["$row"])].next is not None: bindings["$col"], bindings["$row"] = region.sheet[( bindings["$col"], bindings["$row"])].next else: bindings["$col"], bindings["$row"] = None, None data['dataRegion'] = list(data['dataRegion']) data['item'] = list(data['item']) data['qualifierRegion'] = list(data['qualifierRegion']) return data
def evaluate_template(template: dict, sparql_endpoint: str) -> dict: """ This function resolves the template by parsing the T2WML expressions and replacing them by the class trees of those expressions :param template: :return: """ response = dict() for key, value in template.items(): if key == 'qualifier': response[key] = [] for i in range(len(template[key])): temp_dict = dict() for k, v in template[key][i].items(): if isinstance(v, (ItemExpression, ValueExpression)): if v.variables: variables = list(v.variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not v.evaluate_and_get_cell(bindings)[2]: bindings[variables[0]] += 1 col, row, temp_dict[ 'value'] = v.evaluate_and_get_cell( bindings) temp_dict['cell'] = get_actual_cell_index( (col, row)) del bindings[variables[0]] else: col, row, temp_dict[ 'value'] = v.evaluate_and_get_cell(bindings) temp_dict['cell'] = get_actual_cell_index( (col, row)) elif isinstance(v, BooleanEquation): if v.variables: variables = list(v.variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not v.evaluate(bindings): bindings[variables[0]] += 1 col, row, temp_dict[ 'value'] = v.evaluate_and_get_cell( bindings) temp_dict['cell'] = get_actual_cell_index( (col, row)) del bindings[variables[0]] else: col, row, temp_dict[ 'value'] = v.evaluate_and_get_cell(bindings) temp_dict['cell'] = get_actual_cell_index( (col, row)) else: temp_dict[k] = v if "property" in temp_dict and get_property_type( temp_dict["property"], sparql_endpoint) == "Time": if "format" in temp_dict: try: datetime_string, precision = parse_datetime_string( temp_dict["value"], additional_formats=[temp_dict["format"]]) if "precision" not in temp_dict: temp_dict["precision"] = int( precision.value.__str__()) else: temp_dict[ "precision"] = translate_precision_to_integer( temp_dict["precision"]) temp_dict["value"] = datetime_string except Exception as e: raise e response[key].append(temp_dict) else: if isinstance(value, (ItemExpression, ValueExpression)): if value.variables: variables = list(value.variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not value.evaluate_and_get_cell(bindings)[2]: bindings[variables[0]] += 1 col, row, response[key] = value.evaluate_and_get_cell( bindings) del bindings[variables[0]] else: col, row, response[key] = value.evaluate_and_get_cell( bindings) if key == "item": response['cell'] = get_actual_cell_index((col, row)) elif isinstance(value, BooleanEquation): if value.variables: variables = list(value.variables) num_of_variables = len(variables) if num_of_variables == 1: bindings[variables[0]] = 0 while not value.evaluate(bindings): bindings[variables[0]] += 1 col, row, response[key] = value.evaluate_and_get_cell( bindings) response['cell'] = get_actual_cell_index((col, row)) del bindings[variables[0]] else: col, row, response[key] = value.evaluate_and_get_cell( bindings) response['cell'] = get_actual_cell_index((col, row)) else: response[key] = value if get_property_type(response["property"], sparql_endpoint) == "Time": if "format" in response: try: datetime_string, precision = parse_datetime_string( response["value"], additional_formats=[response["format"]]) if "precision" not in response: response["precision"] = int(precision.value.__str__()) else: response["precision"] = translate_precision_to_integer( response["precision"]) response["value"] = datetime_string except Exception as e: raise e return response
def highlight_region(item_table: ItemTable, excel_data_filepath: str, sheet_name: str, region_specification: dict, template: dict) -> str: """ This function add holes in the region_object and builds up the list of data_region, item_region and qualifier_region :param item_table: :param excel_data_filepath: :param sheet_name: :param region_specification: :param template: :return: """ update_bindings(item_table, region_specification, excel_data_filepath, sheet_name) region = region_specification['region_object'] remove_empty_and_invalid_cells(region) head = region.get_head() data = { "data_region": set(), "item": set(), "qualifier_region": set(), 'error': dict() } bindings["$col"] = head[0] bindings["$row"] = head[1] holes = [] try: item = template['item'] except KeyError: item = None try: qualifiers = template['qualifier'] except KeyError: qualifiers = None while region.sheet.get((bindings["$col"], bindings["$row"]), None) is not None: try: row_be_skipped = False column_be_skipped = False cell_be_skipped = False if region_specification['skip_row']: for i in range(len(region_specification['skip_row'])): row_be_skipped = row_be_skipped or region_specification[ 'skip_row'][i].evaluate(bindings) if region_specification['skip_column']: for i in range(len(region_specification['skip_column'])): column_be_skipped = column_be_skipped or region_specification[ 'skip_column'][i].evaluate(bindings) if region_specification['skip_cell']: for i in range(len(region_specification['skip_cell'])): cell_be_skipped = cell_be_skipped or region_specification[ 'skip_cell'][i].evaluate(bindings) if not row_be_skipped and not column_be_skipped and not cell_be_skipped: data_cell = get_actual_cell_index( (bindings["$col"], bindings["$row"])) data["data_region"].add(data_cell) if item and isinstance( item, (ItemExpression, ValueExpression, BooleanEquation, ColumnExpression, RowExpression)): try: item_cell = get_cell(item) item_cell = get_actual_cell_index(item_cell) data["item"].add(item_cell) except AttributeError: pass if qualifiers: qualifier_cells = set() for qualifier in qualifiers: if isinstance( qualifier["value"], (ItemExpression, ValueExpression, BooleanEquation, ColumnExpression, RowExpression)): try: qualifier_cell = get_cell(qualifier["value"]) qualifier_cell = get_actual_cell_index( qualifier_cell) qualifier_cells.add(qualifier_cell) except AttributeError: pass data["qualifier_region"] |= qualifier_cells else: holes.append((bindings["$row"], bindings["$col"])) except Exception as e: data['error'][get_actual_cell_index( (bindings["$col"], bindings["$row"]))] = str(e) if region.sheet[(bindings["$col"], bindings["$row"])].next is not None: bindings["$col"], bindings["$row"] = region.sheet[( bindings["$col"], bindings["$row"])].next else: bindings["$col"], bindings["$row"] = None, None data['data_region'] = list(data['data_region']) data['item'] = list(data['item']) data['qualifier_region'] = list(data['qualifier_region']) for cell_index in holes: region.add_hole(cell_index[0], cell_index[1], cell_index[1]) return data