Esempio n. 1
0
def evaluate_template(template: dict) -> dict:
	"""
	This function resolves the template by parsing the T2WML expressions
	and replacing them by the class trees of those expressions
	:param template:
	:return:
	"""
	response = dict()
	for key, value in template.items():
		if key == 'qualifier':
			response[key] = []
			for i in range(len(template[key])):
				temp_dict = dict()
				for k, v in template[key][i].items():
					if isinstance(v, (ItemExpression, ValueExpression, BooleanEquation)):
						col, row, temp_dict[k] = v.evaluate_and_get_cell(bindings)
						temp_dict['cell'] = get_actual_cell_index((col, row))
					else:
						temp_dict[k] = v
				if "property" in temp_dict and temp_dict["property"] == "P585":
					if "format" in temp_dict:
						try:
							datetime_string, precision = parse_datetime_string(temp_dict["value"], additional_formats=[temp_dict["format"]])
							if "precision" not in temp_dict:
								temp_dict["precision"] = int(precision.value.__str__())
							else:
								temp_dict["precision"] = translate_precision_to_integer(temp_dict["precision"])
							temp_dict["value"] = datetime_string
						except Exception as e:
							raise e
				response[key].append(temp_dict)
		else:
			if isinstance(value, (ItemExpression, ValueExpression, BooleanEquation)):
				col, row, response[key] = value.evaluate_and_get_cell(bindings)
				if key == "item":
					response['cell'] = get_actual_cell_index((col, row))
			else:
				response[key] = value
	return response
Esempio n. 2
0
def model_data() -> None:
    """
	This function generates triples for user defined properties for uploading them to wikidata
	:return:
	"""
    stream = open(Path.cwd().parent /
                  "Datasets/new-property-configuration.yaml",
                  'r',
                  encoding='utf8')
    yaml_data = yaml.safe_load(stream)
    # initialize
    kg_schema = KGSchema()
    kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl')
    etk = ETK(kg_schema=kg_schema, modules=ETKModule)
    doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects")

    # bind prefixes
    doc.kg.bind('wikibase', 'http://wikiba.se/ontology#')
    doc.kg.bind('wd', 'http://www.wikidata.org/entity/')
    doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/')
    doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/')
    doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/')
    doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/')
    doc.kg.bind('wdv', 'http://www.wikidata.org/value/')
    doc.kg.bind('wdref', 'http://www.wikidata.org/reference/')
    doc.kg.bind('p', 'http://www.wikidata.org/prop/')
    doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/')
    doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/')
    doc.kg.bind('prn',
                'http://www.wikidata.org/prop/reference/value-normalized/')
    doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/')
    doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/')
    doc.kg.bind('psn',
                'http://www.wikidata.org/prop/statement/value-normalized/')
    doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/')
    doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/')
    doc.kg.bind('pqn',
                'http://www.wikidata.org/prop/qualifier/value-normalized/')
    doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#')
    doc.kg.bind('prov', 'http://www.w3.org/ns/prov#')
    doc.kg.bind('schema', 'http://schema.org/')
    sparql_endpoint = "https://query.wikidata.org/sparql"
    type_map = {'quantity': Datatype.QuantityValue, 'url': URLValue}
    property_type_cache = {}
    for k, v in yaml_data.items():
        p = WDProperty(k,
                       type_map[v['type']],
                       creator='http://www.isi.edu/t2wml')
        for lang, value in v['label'].items():
            for val in value:
                p.add_label(val, lang=lang)
        for lang, value in v['description'].items():
            for val in value:
                p.add_description(val, lang=lang)
        for pnode, items in v['statements'].items():
            for item in items:
                try:
                    property_type = property_type_cache[pnode]
                except KeyError:
                    property_type = get_property_type(pnode, sparql_endpoint)
                    property_type_cache[pnode] = property_type
                if property_type == "WikibaseItem":
                    value = Item(str(item['value']))
                elif property_type == "WikibaseProperty":
                    value = Property(item['value'])
                elif property_type == "String":
                    value = StringValue(item['value'])
                elif property_type == "Quantity":
                    value = QuantityValue(item['value'])
                elif property_type == "Time":
                    value = TimeValue(
                        str(item['value']), Item(item["calendar"]),
                        translate_precision_to_integer(item["precision"]),
                        item["time_zone"])
                elif property_type == "Url":
                    value = URLValue(item['value'])
                elif property_type == "Monolingualtext":
                    value = MonolingualText(item['value'], item["lang"])
                elif property_type == "ExternalId":
                    value = ExternalIdentifier(item['value'])
                elif property_type == "GlobeCoordinate":
                    value = GlobeCoordinate(item["latitude"],
                                            item["longitude"],
                                            item["precision"])

                p.add_statement(pnode, value)

        doc.kg.add_subject(p)

    with open(Path.cwd().parent / "new_properties/result.ttl", "w") as f:
        data = doc.kg.serialize('ttl')
        f.write(data)
Esempio n. 3
0
def generate_triples(user_id: str,
                     resolved_excel: list,
                     sparql_endpoint: str,
                     filetype: str = 'ttl',
                     created_by: str = 't2wml') -> str:
    """
    This function uses ETK to generate the RDF triples
    :param user_id:
    :param resolved_excel:
    :param sparql_endpoint:
    :param filetype:
    :return:
    """
    # initialize
    kg_schema = KGSchema()
    kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl')
    etk = ETK(kg_schema=kg_schema, modules=ETKModule)
    doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects")
    property_type_map = property_type_dict

    # bind prefixes
    doc.kg.bind('wikibase', 'http://wikiba.se/ontology#')
    doc.kg.bind('wd', 'http://www.wikidata.org/entity/')
    doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/')
    doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/')
    doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/')
    doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/')
    doc.kg.bind('wdv', 'http://www.wikidata.org/value/')
    doc.kg.bind('wdref', 'http://www.wikidata.org/reference/')
    doc.kg.bind('p', 'http://www.wikidata.org/prop/')
    doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/')
    doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/')
    doc.kg.bind('prn',
                'http://www.wikidata.org/prop/reference/value-normalized/')
    doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/')
    doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/')
    doc.kg.bind('psn',
                'http://www.wikidata.org/prop/statement/value-normalized/')
    doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/')
    doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/')
    doc.kg.bind('pqn',
                'http://www.wikidata.org/prop/qualifier/value-normalized/')
    doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#')
    doc.kg.bind('prov', 'http://www.w3.org/ns/prov#')
    doc.kg.bind('schema', 'http://schema.org/')

    # property_type_cache = {}
    is_error = False
    for i in resolved_excel:
        _item = i["statement"]["item"]
        if _item is not None:
            item = WDItem(_item,
                          creator='http://www.isi.edu/{}'.format(created_by))
            try:
                property_type = property_type_map[i["statement"]["property"]]
            except KeyError:
                property_type = get_property_type(i["statement"]["property"],
                                                  sparql_endpoint)
                if property_type != "Property Not Found" and i["statement"][
                        "property"] not in property_type_map:
                    property_type_map[i["statement"]
                                      ["property"]] = property_type
            if property_type == "WikibaseItem":
                value = Item(str(i["statement"]["value"]))
            elif property_type == "WikibaseProperty":
                value = Property(i["statement"]["value"])
            elif property_type == "String":
                value = StringValue(i["statement"]["value"])
            elif property_type == "Quantity":
                _value = i["statement"]["value"]
                _value = str(_value).replace(',', '')
                value = QuantityValue(_value)
            elif property_type == "Time":
                value = TimeValue(
                    str(i["statement"]["value"]),
                    Item(i["statement"]["calendar"]),
                    translate_precision_to_integer(
                        i["statement"]["precision"]),
                    i["statement"]["time_zone"])
            elif property_type == "Url":
                value = URLValue(i["statement"]["value"])
            elif property_type == "Monolingualtext":
                value = MonolingualText(i["statement"]["value"],
                                        i["statement"]["lang"])
            elif property_type == "ExternalId":
                value = ExternalIdentifier(i["statement"]["value"])
            elif property_type == "GlobeCoordinate":
                value = GlobeCoordinate(i["statement"]["latitude"],
                                        i["statement"]["longitude"],
                                        i["statement"]["precision"])
            elif property_type == "Property Not Found":
                is_error = True
                break
            s = item.add_statement(i["statement"]["property"], value)
            doc.kg.add_subject(item)

            if "qualifier" in i["statement"]:
                for j in i["statement"]["qualifier"]:
                    try:
                        property_type = property_type_map[j["property"]]

                    except KeyError:
                        property_type = get_property_type(
                            j["property"], sparql_endpoint)
                        if property_type != "Property Not Found" and i[
                                "statement"][
                                    "property"] not in property_type_map:
                            property_type_map[i["statement"]
                                              ["property"]] = property_type
                    if property_type == "WikibaseItem":
                        value = Item(str(j["value"]))
                    elif property_type == "WikibaseProperty":
                        value = Property(j["value"])
                    elif property_type == "String":
                        value = StringValue(j["value"])
                    elif property_type == "Quantity":
                        value = QuantityValue(j["value"])
                    elif property_type == "Time":
                        value = TimeValue(str(j["value"]), Item(j["calendar"]),
                                          j["precision"], j["time_zone"])
                    elif property_type == "Url":
                        value = URLValue(j["value"])
                    elif property_type == "Monolingualtext":
                        value = MonolingualText(j["value"], j["lang"])
                    elif property_type == "ExternalId":
                        value = ExternalIdentifier(j["value"])
                    elif property_type == "GlobeCoordinate":
                        value = GlobeCoordinate(j["latitude"], j["longitude"],
                                                j["precision"])
                    elif property_type == "Property Not Found":
                        is_error = True
                    if value is None:
                        continue
                    else:
                        s.add_qualifier(j["property"], value)
            doc.kg.add_subject(s)
    if not is_error:
        data = doc.kg.serialize(filetype)
    else:
        # data = "Property Not Found"
        raise Exception('data exception while generating triples')

    return data
Esempio n. 4
0
def generate_triples(user_id: str, resolved_excel: list, sparql_endpoint: str, filetype: str = 'ttl') -> str:
	"""
	This function uses ETK to generate the RDF triples
	:param user_id:
	:param resolved_excel:
	:param sparql_endpoint:
	:param filetype:
	:return:
	"""
	# initialize
	kg_schema = KGSchema()
	kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl')
	etk = ETK(kg_schema=kg_schema, modules=ETKModule)
	doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects")

	# bind prefixes
	doc.kg.bind('wikibase', 'http://wikiba.se/ontology#')
	doc.kg.bind('wd', 'http://www.wikidata.org/entity/')
	doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/')
	doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/')
	doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/')
	doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/')
	doc.kg.bind('wdv', 'http://www.wikidata.org/value/')
	doc.kg.bind('wdref', 'http://www.wikidata.org/reference/')
	doc.kg.bind('p', 'http://www.wikidata.org/prop/')
	doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/')
	doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/')
	doc.kg.bind('prn', 'http://www.wikidata.org/prop/reference/value-normalized/')
	doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/')
	doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/')
	doc.kg.bind('psn', 'http://www.wikidata.org/prop/statement/value-normalized/')
	doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/')
	doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/')
	doc.kg.bind('pqn', 'http://www.wikidata.org/prop/qualifier/value-normalized/')
	doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#')
	doc.kg.bind('prov', 'http://www.w3.org/ns/prov#')
	doc.kg.bind('schema', 'http://schema.org/')

	# property_type_cache = {}
	is_error = False
	for i in resolved_excel:
		item = WDItem(i["statement"]["item"],  creator='http://www.isi.edu/t2wml')
		try:
			property_type = property_type_map[i["statement"]["property"]]
		except KeyError:
			property_type = get_property_type(i["statement"]["property"], sparql_endpoint)
			property_type_map[i["statement"]["property"]] = property_type
		if property_type == "WikibaseItem":
			value = Item(str(i["statement"]["value"]))
		elif property_type == "WikibaseProperty":
			value = Property(i["statement"]["value"])
		elif property_type == "String":
			value = StringValue(i["statement"]["value"])
		elif property_type == "Quantity":
			value = QuantityValue(i["statement"]["value"])
		elif property_type == "Time":
			value = TimeValue(str(i["statement"]["value"]), Item(i["statement"]["calendar"]), translate_precision_to_integer(i["statement"]["precision"]), i["statement"]["time_zone"])
		elif property_type == "Url":
			value = URLValue(i["statement"]["value"])
		elif property_type == "Monolingualtext":
			value = MonolingualText(i["statement"]["value"], i["statement"]["lang"])
		elif property_type == "ExternalId":
			value = ExternalIdentifier(i["statement"]["value"])
		elif property_type == "GlobeCoordinate":
			value = GlobeCoordinate(i["statement"]["latitude"], i["statement"]["longitude"], i["statement"]["precision"])
		elif property_type == "Property Not Found":
			is_error = True
			break
		s = item.add_statement(i["statement"]["property"], value)
		doc.kg.add_subject(item)

		if "qualifier" in i["statement"]:
			for j in i["statement"]["qualifier"]:
				try:
					property_type = property_type_map[j["property"]]
				except KeyError:
					property_type = get_property_type(j["property"], sparql_endpoint)
					property_type_map[j["property"]] = property_type
				if property_type == "WikibaseItem":
					value = Item(str(j["value"]))
				elif property_type == "WikibaseProperty":
					value = Property(j["value"])
				elif property_type == "String":
					value = StringValue(j["value"])
				elif property_type == "Quantity":
					value = QuantityValue(j["value"])
				elif property_type == "Time":
					value = TimeValue(str(j["value"]), Item(j["calendar"]), j["precision"], j["time_zone"])
				elif property_type == "Url":
					value = URLValue(j["value"])
				elif property_type == "Monolingualtext":
					value = MonolingualText(j["value"], j["lang"])
				elif property_type == "ExternalId":
					value = ExternalIdentifier(j["value"])
				elif property_type == "GlobeCoordinate":
					value = GlobeCoordinate(j["latitude"], j["longitude"], j["precision"])
				elif property_type == "Property Not Found":
					is_error = True
				s.add_qualifier(j["property"], value)
		doc.kg.add_subject(s)
	if not is_error:
		data = doc.kg.serialize(filetype)
	else:
		data = "Property Not Found"
	# os.makedirs(Path.cwd() / "new_properties", exist_ok=True)
	# results_file_name = user_id + "_results.ttl"
	# changes_file_name = user_id + "_changes.tsv"

	# with open(Path(app.config['downloads']) / results_file_name, "w") as fp:
	# 	fp.write(data)
	# with open(Path(app.config['downloads']) / changes_file_name, "w") as fp:
	# 	serialize_change_record(fp)
	return data
Esempio n. 5
0
def evaluate_template(template: dict, sparql_endpoint: str) -> dict:
    """
    This function resolves the template by parsing the T2WML expressions
    and replacing them by the class trees of those expressions
    :param template:
    :return:
    """
    response = dict()
    for key, value in template.items():
        if key == 'qualifier':
            response[key] = []
            for i in range(len(template[key])):
                temp_dict = dict()
                for k, v in template[key][i].items():
                    if isinstance(v, (ItemExpression, ValueExpression)):
                        if v.variables:
                            variables = list(v.variables)
                            num_of_variables = len(variables)
                            if num_of_variables == 1:
                                bindings[variables[0]] = 0
                                while not v.evaluate_and_get_cell(bindings)[2]:
                                    bindings[variables[0]] += 1
                                col, row, temp_dict[
                                    'value'] = v.evaluate_and_get_cell(
                                        bindings)
                                temp_dict['cell'] = get_actual_cell_index(
                                    (col, row))
                                del bindings[variables[0]]
                        else:
                            col, row, temp_dict[
                                'value'] = v.evaluate_and_get_cell(bindings)
                            temp_dict['cell'] = get_actual_cell_index(
                                (col, row))
                    elif isinstance(v, BooleanEquation):
                        if v.variables:
                            variables = list(v.variables)
                            num_of_variables = len(variables)
                            if num_of_variables == 1:
                                bindings[variables[0]] = 0
                                while not v.evaluate(bindings):
                                    bindings[variables[0]] += 1
                                col, row, temp_dict[
                                    'value'] = v.evaluate_and_get_cell(
                                        bindings)
                                temp_dict['cell'] = get_actual_cell_index(
                                    (col, row))
                                del bindings[variables[0]]
                        else:
                            col, row, temp_dict[
                                'value'] = v.evaluate_and_get_cell(bindings)
                            temp_dict['cell'] = get_actual_cell_index(
                                (col, row))
                    else:
                        temp_dict[k] = v
                if "property" in temp_dict and get_property_type(
                        temp_dict["property"], sparql_endpoint) == "Time":
                    if "format" in temp_dict:
                        try:
                            datetime_string, precision = parse_datetime_string(
                                temp_dict["value"],
                                additional_formats=[temp_dict["format"]])
                            if "precision" not in temp_dict:
                                temp_dict["precision"] = int(
                                    precision.value.__str__())
                            else:
                                temp_dict[
                                    "precision"] = translate_precision_to_integer(
                                        temp_dict["precision"])
                            temp_dict["value"] = datetime_string
                        except Exception as e:
                            raise e
                response[key].append(temp_dict)
        else:
            if isinstance(value, (ItemExpression, ValueExpression)):
                if value.variables:
                    variables = list(value.variables)
                    num_of_variables = len(variables)
                    if num_of_variables == 1:
                        bindings[variables[0]] = 0
                        while not value.evaluate_and_get_cell(bindings)[2]:
                            bindings[variables[0]] += 1
                        col, row, response[key] = value.evaluate_and_get_cell(
                            bindings)
                        del bindings[variables[0]]
                else:
                    col, row, response[key] = value.evaluate_and_get_cell(
                        bindings)
                if key == "item":
                    response['cell'] = get_actual_cell_index((col, row))
            elif isinstance(value, BooleanEquation):
                if value.variables:
                    variables = list(value.variables)
                    num_of_variables = len(variables)
                    if num_of_variables == 1:
                        bindings[variables[0]] = 0
                        while not value.evaluate(bindings):
                            bindings[variables[0]] += 1
                        col, row, response[key] = value.evaluate_and_get_cell(
                            bindings)
                        response['cell'] = get_actual_cell_index((col, row))
                        del bindings[variables[0]]
                else:
                    col, row, response[key] = value.evaluate_and_get_cell(
                        bindings)
                    response['cell'] = get_actual_cell_index((col, row))
            else:
                response[key] = value

    if get_property_type(response["property"], sparql_endpoint) == "Time":
        if "format" in response:
            try:
                datetime_string, precision = parse_datetime_string(
                    response["value"], additional_formats=[response["format"]])
                if "precision" not in response:
                    response["precision"] = int(precision.value.__str__())
                else:
                    response["precision"] = translate_precision_to_integer(
                        response["precision"])
                response["value"] = datetime_string
            except Exception as e:
                raise e
    return response