Beispiel #1
0
def evaluate_template(template: dict) -> dict:
    """
	This function resolves the template by parsing the T2WML expressions
	and replacing them by the class trees of those expressions
	:param template:
	:return:
	"""
    response = dict()
    for key, value in template.items():
        if key == 'qualifier':
            response[key] = []
            for i in range(len(template[key])):
                temp_dict = dict()
                for k, v in template[key][i].items():
                    if isinstance(
                            v,
                        (ItemExpression, ValueExpression, BooleanEquation)):
                        col, row, temp_dict[k] = v.evaluate_and_get_cell(
                            bindings)
                        temp_dict['cell'] = get_actual_cell_index((col, row))
                    else:
                        temp_dict[k] = v
                response[key].append(temp_dict)
        else:
            if isinstance(value,
                          (ItemExpression, ValueExpression, BooleanEquation)):
                col, row, response[key] = value.evaluate_and_get_cell(bindings)
                if key == "item":
                    response['cell'] = get_actual_cell_index((col, row))
            else:
                response[key] = value
    return response
Beispiel #2
0
def generate_download_file(user_id: str, item_table: ItemTable,
                           excel_data_filepath: str, sheet_name: str,
                           region_specification: dict, template: dict,
                           filetype: str, sparql_endpoint: str) -> str:
    """
	This function generates the download files based on the filetype
	:param user_id:
	:param item_table:
	:param excel_data_filepath:
	:param sheet_name:
	:param region_specification:
	:param template:
	:param filetype:
	:param sparql_endpoint:
	:return:
	"""
    update_bindings(item_table, region_specification, excel_data_filepath,
                    sheet_name)
    region = region_specification['region_object']
    response = []
    error = []
    head = region.get_head()
    bindings["$col"] = head[0]
    bindings["$row"] = head[1]
    while region.sheet.get((bindings["$col"], bindings["$row"]),
                           None) is not None:
        try:
            statement = evaluate_template(template)
            response.append({
                'cell':
                get_actual_cell_index((bindings["$col"], bindings["$row"])),
                'statement':
                statement
            })
        except Exception as e:
            error.append({
                'cell':
                get_actual_cell_index((bindings["$col"], bindings["$row"])),
                'error':
                str(e)
            })
        if region.sheet[(bindings["$col"], bindings["$row"])].next is not None:
            bindings["$col"], bindings["$row"] = region.sheet[(
                bindings["$col"], bindings["$row"])].next
        else:
            bindings["$col"], bindings["$row"] = None, None
    if filetype == 'json':
        json_response = json.dumps(response, indent=3)
        return json_response
    elif filetype == 'ttl':
        try:
            json_response = generate_triples(user_id, response,
                                             sparql_endpoint, filetype)
            return json_response
        except Exception as e:
            return str(e)
Beispiel #3
0
def wikify_region(region: str,
                  excel_filepath: str,
                  sheet_name: str = None) -> dict:
    """
    This function parses the cell range, creates the temporary csv file and calls the wikifier service on that csv
    to get the cell qnode map. cell qnode map is then processed to omit non empty cells and is then returned.
    :param region:
    :param excel_filepath:
    :param sheet_name:
    :return:
    """
    cell_range = parse_cell_range(region)
    file_path = create_temporary_csv_file(cell_range, excel_filepath,
                                          sheet_name)
    cell_qnode_map = call_wikifiy_service(file_path, cell_range[0][0],
                                          cell_range[0][1])
    response = dict()
    sheet = pyexcel.get_sheet(sheet_name=sheet_name, file_name=excel_filepath)
    for col in range(cell_range[0][0], cell_range[1][0] + 1):
        for row in range(cell_range[0][1], cell_range[1][1] + 1):
            try:
                cell_index = get_actual_cell_index((col, row))
                if not check_if_string_is_invalid(sheet[row, col]):
                    if cell_index in cell_qnode_map:
                        response[cell_index] = cell_qnode_map[cell_index]
                    else:
                        response[cell_index] = ""
            except IndexError:
                pass
            except KeyError:
                pass
    return response
Beispiel #4
0
def call_wikifiy_service(csv_filepath: str, col_offset: int,
                         row_offset: int) -> dict:
    """
    This function calls the wikifier service and creates a cell to qnode dictionary based on the response
    cell to qnode dictionary = { 'A4': 'Q383', 'B5': 'Q6892' }
    :param csv_filepath:
    :param col_offset:
    :param row_offset:
    :return:
    """
    cell_qnode_map = dict()
    files = {
        'file': ('', open(csv_filepath, 'r')),
        'format': (None, 'ISWC'),
        'type': (None, 'text/csv'),
        'header': (None, 'False')
    }

    response = requests.post('https://dsbox02.isi.edu:8888/wikifier/wikify',
                             files=files)
    if response.status_code == 200:
        data = response.content.decode("utf-8")
        data = json.loads(data)['data']
        for line in data:
            i = line.split(',')
            cell_qnode_map[get_actual_cell_index(
                (int(i[0]) + col_offset, int(i[1]) + row_offset))] = i[2]
    return cell_qnode_map
Beispiel #5
0
    def get_item(self, column: int, row: int) -> Union[str, Exception]:
        """
		This function searches return the qnode of the value found at (column, row) cell.
		The catch here is that cell_to_qnode hash table is given preference over value_to_qnode dictionary.
		:param column:
		:param row:
		:return: qnode or exception
		"""
        cell_index = get_actual_cell_index((column, row))
        if self.region_qnodes['qnodes'].get(cell_index, None):
            return self.region_qnodes['qnodes'][cell_index]
        elif self.other["qnodes"].get(cell_index, None):
            return self.other["qnodes"][cell_index]
        else:
            raise Exception('No QNode Exists for the cell: ',
                            get_actual_cell_index((column, row)))
Beispiel #6
0
    def serialize_cell_to_qnode(self, cell_to_qnode: dict) -> dict:
        """
		This function serializes the cell_to_qnode dictionary
		:return:
		"""
        serialized_dict = dict()
        for cell, value in cell_to_qnode.items():
            cell = get_actual_cell_index(cell)
            serialized_dict[cell] = value
        return serialized_dict
Beispiel #7
0
def evaluate_template(template: dict) -> dict:
	"""
	This function resolves the template by parsing the T2WML expressions
	and replacing them by the class trees of those expressions
	:param template:
	:return:
	"""
	response = dict()
	for key, value in template.items():
		if key == 'qualifier':
			response[key] = []
			for i in range(len(template[key])):
				temp_dict = dict()
				for k, v in template[key][i].items():
					if isinstance(v, (ItemExpression, ValueExpression, BooleanEquation)):
						col, row, temp_dict[k] = v.evaluate_and_get_cell(bindings)
						temp_dict['cell'] = get_actual_cell_index((col, row))
					else:
						temp_dict[k] = v
				if "property" in temp_dict and temp_dict["property"] == "P585":
					if "format" in temp_dict:
						try:
							datetime_string, precision = parse_datetime_string(temp_dict["value"], additional_formats=[temp_dict["format"]])
							if "precision" not in temp_dict:
								temp_dict["precision"] = int(precision.value.__str__())
							else:
								temp_dict["precision"] = translate_precision_to_integer(temp_dict["precision"])
							temp_dict["value"] = datetime_string
						except Exception as e:
							raise e
				response[key].append(temp_dict)
		else:
			if isinstance(value, (ItemExpression, ValueExpression, BooleanEquation)):
				col, row, response[key] = value.evaluate_and_get_cell(bindings)
				if key == "item":
					response['cell'] = get_actual_cell_index((col, row))
			else:
				response[key] = value
	return response
Beispiel #8
0
def highlight_region(item_table: ItemTable, excel_data_filepath: str,
                     sheet_name: str, region_specification: dict,
                     template: dict) -> dict:
    """
    This function add holes in the region_object and builds up the list of data_region, item_region and qualifier_region
    :param item_table:
    :param excel_data_filepath:
    :param sheet_name:
    :param region_specification:
    :param template:
    :return:
    """
    update_bindings(item_table, region_specification, excel_data_filepath,
                    sheet_name)
    region = region_specification['region_object']
    head = region.get_head()
    data = {
        "dataRegion": set(),
        "item": set(),
        "qualifierRegion": set(),
        'error': dict()
    }
    bindings["$col"] = head[0]
    bindings["$row"] = head[1]
    try:
        item = template['item']
    except KeyError:
        item = None

    try:
        qualifiers = template['qualifier']
    except KeyError:
        qualifiers = None

    while region.sheet.get((bindings["$col"], bindings["$row"]),
                           None) is not None:
        try:
            data_cell = get_actual_cell_index(
                (bindings["$col"], bindings["$row"]))
            data["dataRegion"].add(data_cell)
            if item and isinstance(
                    item, (ItemExpression, ValueExpression, BooleanEquation)):
                try:
                    if item.variables:
                        variables = list(item.variables)
                        num_of_variables = len(variables)
                        if num_of_variables == 1:
                            bindings[variables[0]] = 0
                            while not item.evaluate(bindings):
                                bindings[variables[0]] += 1
                            col, row, value = item.evaluate_and_get_cell(
                                bindings)
                            item_cell = get_actual_cell_index((col, row))
                            data["item"].add(item_cell)
                            del bindings[variables[0]]
                    else:
                        item_cell = get_cell(item)
                        item_cell = get_actual_cell_index(item_cell)
                        data["item"].add(item_cell)
                except AttributeError:
                    pass
            elif item and isinstance(item, (ColumnExpression, RowExpression)):
                try:
                    item_cell = get_cell(item)
                    item_cell = get_actual_cell_index(item_cell)
                    data["item"].add(item_cell)
                except AttributeError:
                    pass

            if qualifiers:
                qualifier_cells = set()
                for qualifier in qualifiers:
                    if isinstance(
                            qualifier["value"],
                        (ItemExpression, ValueExpression, BooleanEquation)):
                        try:
                            if qualifier["value"].variables:
                                variables = list(qualifier["value"].variables)
                                num_of_variables = len(variables)
                                if num_of_variables == 1:
                                    bindings[variables[0]] = 0
                                    while not qualifier["value"].evaluate(
                                            bindings):
                                        bindings[variables[0]] += 1
                                    col, row, value = qualifier[
                                        "value"].evaluate_and_get_cell(
                                            bindings)
                                    qualifier_cell = get_actual_cell_index(
                                        (col, row))
                                    qualifier_cells.add(qualifier_cell)
                                    del bindings[variables[0]]
                            else:
                                qualifier_cell = get_cell(qualifier["value"])
                                qualifier_cell = get_actual_cell_index(
                                    qualifier_cell)
                                qualifier_cells.add(qualifier_cell)
                        except AttributeError:
                            pass
                    elif isinstance(qualifier["value"],
                                    (ColumnExpression, RowExpression)):
                        try:
                            qualifier_cell = get_cell(qualifier["value"])
                            qualifier_cell = get_actual_cell_index(
                                qualifier_cell)
                            qualifier_cells.add(qualifier_cell)
                        except AttributeError:
                            pass
                data["qualifierRegion"] |= qualifier_cells
        except Exception as e:
            data['error'][get_actual_cell_index(
                (bindings["$col"], bindings["$row"]))] = str(e)

        if region.sheet[(bindings["$col"], bindings["$row"])].next is not None:
            bindings["$col"], bindings["$row"] = region.sheet[(
                bindings["$col"], bindings["$row"])].next
        else:
            bindings["$col"], bindings["$row"] = None, None

    data['dataRegion'] = list(data['dataRegion'])
    data['item'] = list(data['item'])
    data['qualifierRegion'] = list(data['qualifierRegion'])
    return data
Beispiel #9
0
def evaluate_template(template: dict, sparql_endpoint: str) -> dict:
    """
    This function resolves the template by parsing the T2WML expressions
    and replacing them by the class trees of those expressions
    :param template:
    :return:
    """
    response = dict()
    for key, value in template.items():
        if key == 'qualifier':
            response[key] = []
            for i in range(len(template[key])):
                temp_dict = dict()
                for k, v in template[key][i].items():
                    if isinstance(v, (ItemExpression, ValueExpression)):
                        if v.variables:
                            variables = list(v.variables)
                            num_of_variables = len(variables)
                            if num_of_variables == 1:
                                bindings[variables[0]] = 0
                                while not v.evaluate_and_get_cell(bindings)[2]:
                                    bindings[variables[0]] += 1
                                col, row, temp_dict[
                                    'value'] = v.evaluate_and_get_cell(
                                        bindings)
                                temp_dict['cell'] = get_actual_cell_index(
                                    (col, row))
                                del bindings[variables[0]]
                        else:
                            col, row, temp_dict[
                                'value'] = v.evaluate_and_get_cell(bindings)
                            temp_dict['cell'] = get_actual_cell_index(
                                (col, row))
                    elif isinstance(v, BooleanEquation):
                        if v.variables:
                            variables = list(v.variables)
                            num_of_variables = len(variables)
                            if num_of_variables == 1:
                                bindings[variables[0]] = 0
                                while not v.evaluate(bindings):
                                    bindings[variables[0]] += 1
                                col, row, temp_dict[
                                    'value'] = v.evaluate_and_get_cell(
                                        bindings)
                                temp_dict['cell'] = get_actual_cell_index(
                                    (col, row))
                                del bindings[variables[0]]
                        else:
                            col, row, temp_dict[
                                'value'] = v.evaluate_and_get_cell(bindings)
                            temp_dict['cell'] = get_actual_cell_index(
                                (col, row))
                    else:
                        temp_dict[k] = v
                if "property" in temp_dict and get_property_type(
                        temp_dict["property"], sparql_endpoint) == "Time":
                    if "format" in temp_dict:
                        try:
                            datetime_string, precision = parse_datetime_string(
                                temp_dict["value"],
                                additional_formats=[temp_dict["format"]])
                            if "precision" not in temp_dict:
                                temp_dict["precision"] = int(
                                    precision.value.__str__())
                            else:
                                temp_dict[
                                    "precision"] = translate_precision_to_integer(
                                        temp_dict["precision"])
                            temp_dict["value"] = datetime_string
                        except Exception as e:
                            raise e
                response[key].append(temp_dict)
        else:
            if isinstance(value, (ItemExpression, ValueExpression)):
                if value.variables:
                    variables = list(value.variables)
                    num_of_variables = len(variables)
                    if num_of_variables == 1:
                        bindings[variables[0]] = 0
                        while not value.evaluate_and_get_cell(bindings)[2]:
                            bindings[variables[0]] += 1
                        col, row, response[key] = value.evaluate_and_get_cell(
                            bindings)
                        del bindings[variables[0]]
                else:
                    col, row, response[key] = value.evaluate_and_get_cell(
                        bindings)
                if key == "item":
                    response['cell'] = get_actual_cell_index((col, row))
            elif isinstance(value, BooleanEquation):
                if value.variables:
                    variables = list(value.variables)
                    num_of_variables = len(variables)
                    if num_of_variables == 1:
                        bindings[variables[0]] = 0
                        while not value.evaluate(bindings):
                            bindings[variables[0]] += 1
                        col, row, response[key] = value.evaluate_and_get_cell(
                            bindings)
                        response['cell'] = get_actual_cell_index((col, row))
                        del bindings[variables[0]]
                else:
                    col, row, response[key] = value.evaluate_and_get_cell(
                        bindings)
                    response['cell'] = get_actual_cell_index((col, row))
            else:
                response[key] = value

    if get_property_type(response["property"], sparql_endpoint) == "Time":
        if "format" in response:
            try:
                datetime_string, precision = parse_datetime_string(
                    response["value"], additional_formats=[response["format"]])
                if "precision" not in response:
                    response["precision"] = int(precision.value.__str__())
                else:
                    response["precision"] = translate_precision_to_integer(
                        response["precision"])
                response["value"] = datetime_string
            except Exception as e:
                raise e
    return response
Beispiel #10
0
def highlight_region(item_table: ItemTable, excel_data_filepath: str,
                     sheet_name: str, region_specification: dict,
                     template: dict) -> str:
    """
	This function add holes in the region_object and builds up the list of data_region, item_region and qualifier_region
	:param item_table:
	:param excel_data_filepath:
	:param sheet_name:
	:param region_specification:
	:param template:
	:return:
	"""
    update_bindings(item_table, region_specification, excel_data_filepath,
                    sheet_name)
    region = region_specification['region_object']
    remove_empty_and_invalid_cells(region)
    head = region.get_head()
    data = {
        "data_region": set(),
        "item": set(),
        "qualifier_region": set(),
        'error': dict()
    }
    bindings["$col"] = head[0]
    bindings["$row"] = head[1]
    holes = []
    try:
        item = template['item']
    except KeyError:
        item = None

    try:
        qualifiers = template['qualifier']
    except KeyError:
        qualifiers = None

    while region.sheet.get((bindings["$col"], bindings["$row"]),
                           None) is not None:
        try:
            row_be_skipped = False
            column_be_skipped = False
            cell_be_skipped = False
            if region_specification['skip_row']:
                for i in range(len(region_specification['skip_row'])):
                    row_be_skipped = row_be_skipped or region_specification[
                        'skip_row'][i].evaluate(bindings)

            if region_specification['skip_column']:
                for i in range(len(region_specification['skip_column'])):
                    column_be_skipped = column_be_skipped or region_specification[
                        'skip_column'][i].evaluate(bindings)

            if region_specification['skip_cell']:
                for i in range(len(region_specification['skip_cell'])):
                    cell_be_skipped = cell_be_skipped or region_specification[
                        'skip_cell'][i].evaluate(bindings)

            if not row_be_skipped and not column_be_skipped and not cell_be_skipped:
                data_cell = get_actual_cell_index(
                    (bindings["$col"], bindings["$row"]))
                data["data_region"].add(data_cell)

                if item and isinstance(
                        item,
                    (ItemExpression, ValueExpression, BooleanEquation,
                     ColumnExpression, RowExpression)):
                    try:
                        item_cell = get_cell(item)
                        item_cell = get_actual_cell_index(item_cell)
                        data["item"].add(item_cell)
                    except AttributeError:
                        pass

                if qualifiers:
                    qualifier_cells = set()
                    for qualifier in qualifiers:
                        if isinstance(
                                qualifier["value"],
                            (ItemExpression, ValueExpression, BooleanEquation,
                             ColumnExpression, RowExpression)):
                            try:
                                qualifier_cell = get_cell(qualifier["value"])
                                qualifier_cell = get_actual_cell_index(
                                    qualifier_cell)
                                qualifier_cells.add(qualifier_cell)
                            except AttributeError:
                                pass
                    data["qualifier_region"] |= qualifier_cells
            else:
                holes.append((bindings["$row"], bindings["$col"]))
        except Exception as e:
            data['error'][get_actual_cell_index(
                (bindings["$col"], bindings["$row"]))] = str(e)

        if region.sheet[(bindings["$col"], bindings["$row"])].next is not None:
            bindings["$col"], bindings["$row"] = region.sheet[(
                bindings["$col"], bindings["$row"])].next
        else:
            bindings["$col"], bindings["$row"] = None, None

    data['data_region'] = list(data['data_region'])
    data['item'] = list(data['item'])
    data['qualifier_region'] = list(data['qualifier_region'])

    for cell_index in holes:
        region.add_hole(cell_index[0], cell_index[1], cell_index[1])

    return data