def test_parsetype_resource(): g = rdflib.Graph().parse(data=data2) print(g.serialize(format='n3'))
def test_group_of_tables(mock_urlopen): mock_urlopen.side_effect = dispatch_files_as_url csv_urls = [ "http://example.org/gov.uk/data/organizations.csv", "http://example.org/gov.uk/data/professions.csv", "http://example.org/senior-roles.csv", "http://example.org/junior-roles.csv" ] csvw = CSVW(csv_url=csv_urls, metadata_url="http://example.org/csv-metadata.json") with warnings.catch_warnings(): warnings.simplefilter("ignore", RiotWarning) rdf_output = csvw.to_rdf() g = rdflib.Graph().parse(data=rdf_output, format="turtle") org = Namespace("http://www.w3.org/ns/org#") post_in = URIRef("http://example.org/organization/hefce.ac.uk") grade = URIRef("http://example.org/gov.uk/def/grade") job = URIRef("http://example.org/gov.uk/def/job") prof = URIRef("http://example.org/gov.uk/def/profession") post = Namespace("http://example.org/organization/hefce.ac.uk/post/") person = Namespace("http://example.org/organization/hefce.ac.uk/person/") min_pay = URIRef("http://example.org/gov.uk/def/min_pay") max_pay = URIRef("http://example.org/gov.uk/def/max_pay") num_posts = URIRef("http://example.org/gov.uk/def/number_of_posts") post_90115 = post["90115"] post_90334 = post["90334"] p1 = person["1"] p2 = person["2"] post_90115_triples = list(g.triples((post_90115, None, None))) assert len(post_90115_triples) == 7 assert (post_90115, DCTERMS.identifier, Literal("90115")) in post_90115_triples assert (post_90115, org.heldBy, p1) in post_90115_triples assert (post_90115, grade, Literal("SCS1A")) in post_90115_triples assert (post_90115, job, Literal("Deputy Chief Executive")) in post_90115_triples assert (post_90115, org.reportsTo, post_90334) in post_90115_triples assert (post_90115, prof, Literal("Finance")) in post_90115_triples assert (post_90115, org.postIn, post_in) in post_90115_triples p1_triples = list(g.triples((p1, None, None))) assert len(p1_triples) == 1 assert (p1, FOAF.name, Literal("Steve Egan")) in p1_triples post_90334_triples = list(g.triples((post_90334, None, None))) assert len(post_90334_triples) == 6 assert (post_90334, DCTERMS.identifier, Literal("90334")) in post_90334_triples assert (post_90334, org.heldBy, p2) in post_90334_triples assert (post_90334, grade, Literal("SCS4")) in post_90334_triples assert (post_90334, job, Literal("Chief Executive")) in post_90334_triples assert (post_90334, prof, Literal("Policy")) in post_90334_triples assert (post_90334, org.postIn, post_in) in post_90334_triples p2_triples = list(g.triples((p2, None, None))) assert len(p2_triples) == 1 assert (p2, FOAF.name, Literal("Sir Alan Langlands")) in p2_triples bnode1 = list(g.triples((None, grade, Literal("4"))))[0][0] b1_triples = list(g.triples((bnode1, None, None))) assert len(b1_triples) == 8 assert (bnode1, org.reportsTo, post_90115) in b1_triples assert (bnode1, min_pay, Literal(17426, datatype=XSD.integer)) in b1_triples assert (bnode1, max_pay, Literal(20002, datatype=XSD.integer)) in b1_triples assert (bnode1, job, Literal("Administrator")) in b1_triples assert (bnode1, num_posts, Literal(8.67, datatype=XSD.double)) in b1_triples assert (bnode1, prof, Literal("Operational Delivery")) in b1_triples assert (bnode1, org.postIn, post_in) in b1_triples bnode2 = list(g.triples((None, grade, Literal("5"))))[0][0] b2_triples = list(g.triples((bnode2, None, None))) assert len(b2_triples) == 8 assert (bnode2, org.reportsTo, post_90115) in b2_triples assert (bnode2, min_pay, Literal(19546, datatype=XSD.integer)) in b2_triples assert (bnode2, max_pay, Literal(22478, datatype=XSD.integer)) in b2_triples assert (bnode2, job, Literal("Administrator")) in b2_triples assert (bnode2, num_posts, Literal(0.5, datatype=XSD.double)) in b2_triples assert (bnode2, prof, Literal("Operational Delivery")) in b2_triples assert (bnode2, org.postIn, post_in) in b2_triples assert len(list(g.triples((None, None, None)))) == 7 + 1 + 6 + 1 + 8 + 8
def main(inputfile, model, configfile, outputfile, compression): compression = int(compression) print('Loading configuration file...') # Read configuration file with open(configfile, encoding='utf-8-sig') as json_file: config_data = json.load(json_file) def_base_uri = config_data['baseuri'] getValue = config_data['getValue'] numberOfRowsToConsider = int(config_data['numberOfRowsToConsider']) # Load Recipient Categorisation dictionary recipientCatg = config_data['recipientCatg'] recipientCatg_new = dict() for key in recipientCatg: newkey = key.casefold() recipientCatg_new[newkey] = recipientCatg[key] recipientCatg = recipientCatg_new # Load Public Organisation dictionary publicOrganisationCatg = config_data['publicOrganisationCatg'] publicOrganisationCatg_new = dict() for key in publicOrganisationCatg: newkey = key.casefold() publicOrganisationCatg_new[newkey] = publicOrganisationCatg[key] publicOrganisationCatg = publicOrganisationCatg_new nomenclatureBase = config_data['nomenclatureBase'] #------------------------# # Load model and data # #------------------------# # Set filenames data_filename = inputfile model_filename = model cwd = os.getcwd() # Load data print('Loading data...') os.chdir(os.path.join(cwd, 'data/raw')) data = pandas.read_csv(data_filename, sep=',', encoding="ANSI", quotechar='"', na_filter=False, low_memory=False) data = data.replace({'\n': ', '}, regex=True) # remove newline characters in data session = Session.get_current() # Load model from Turtle file print('Loading model...') os.chdir(os.path.join(cwd, 'models')) ontology = Ontology.load(model_filename) os.chdir(cwd) #-------------------------------# # Print all classes detected # #-------------------------------# if False: print("List of all terms detected:") for term in ontology.__terms__: print(term) print('') #----------------------------------# # Load all controlled vocabularies # #----------------------------------# print('Loading controlled vocabularies...') # Query a file to get a dictionary with keys 'o' and values 'uri' def getQueryDict(modelfile): graph = rdflib.Graph() graph.parse(modelfile) rowlist = graph.query( """PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT ?uri ?o WHERE { ?uri skos:prefLabel ?o . FILTER (lang(?o) = 'en') }""") objdict = {} for row in rowlist: objdict[str(row.o.toPython())] = str(row.uri.toPython()) return objdict # Create dictionary of Countries countriesmodelfile = config_data["countriesmodelfile"] countryList = getQueryDict(countriesmodelfile) countryNotFoundBase = config_data["countryNotFoundBase"] countryReplace = config_data["countryReplace"] # Create URI for currency (only EUR for now) currencyEUR = config_data["currencyEUR"] # Create URI for corporate Body corporateBodyBase = config_data["corporateBodyBase"] corporateBodyReplace = config_data["corporateBodyReplace"] # set up dictionaries for controlled vocabularies organisationTypeDict = {} corporatebodyDict = {} actionTypeDict = {} def checkControlledDictionary(controlledDict, keyLabel, valueLabel, label, base_uri=def_base_uri): # updates the controlled vocabulary and return the skos Concept URI to be used if row[getValue[keyLabel]] not in controlledDict: if base_uri == def_base_uri: lbl = label + row[getValue[valueLabel]] URISpec = URISpecification(base_uri, lbl) Concept_tmp = ontology.skosConcept(uri=URISpec) else: Concept_tmp = ontology.skosConcept(uri=None, imposeURI=base_uri + row[getValue[keyLabel]]) Concept_tmp.skosprefLabel += row[getValue[valueLabel]] controlledDict[row[ getValue[keyLabel]]] = Concept_tmp.getInstanceUri() return controlledDict[row[getValue[keyLabel]]] #-------------------------------# # Create Instances of Dataset # #-------------------------------# flushfrequency = int(config_data['flushfrequency'] ) #nb of rows before flushing the data to file. batchlimits = range(flushfrequency, len(data.index), flushfrequency) output = open(outputfile, 'w', encoding='utf8') # Go through the data file creating all instances with click.progressbar(data.iterrows(), label='Creating instances', length=len(data.index)) as total: for ix, row in total: if ix < numberOfRowsToConsider or numberOfRowsToConsider == -1: #----------------# # Create Address # #----------------# country = row[getValue['countryDescriptionEn']] if country in countryReplace: country = countryReplace[country] lbl = row[getValue['address']] + row[getValue['city']] + row[ getValue['postCode']] + country URISpec = URISpecification(def_base_uri, lbl) Address_tmp = ontology.locnAddress(uri=URISpec) Address_tmp.locnadminUnitL1 += countryList.setdefault( country, countryNotFoundBase + country) Address_tmp.locnfullAddress += row[getValue['address']] Address_tmp.locnpostName += row[getValue['city']] Address_tmp.locnpostCode += row[getValue['postCode']] #-----------------# # Create Location # #-----------------# geographicName = str( row[getValue['recipientName']]) + ', ' + str( row[getValue['city']]) + ', ' + str( row[getValue['countryDescriptionEn']]) URISpec = URISpecification(def_base_uri, geographicName) Location_tmp = ontology.dctLocation(uri=URISpec) Location_tmp.locngeographicName += geographicName Location_tmp.locnaddress += Address_tmp #------------------# # Create Recipient # #------------------# lbl = row[getValue['recipientName']] + geographicName URISpec = URISpecification(def_base_uri, lbl) Recipient_tmp = ontology.Recipient(uri=URISpec) Recipient_tmp.prefLabel += row[getValue['recipientName']] Recipient_tmp.hasLocation += Location_tmp recipientType = recipientCatg[row[ getValue['recipientTypeDescription']].casefold()] recipientURI = Recipient_tmp.getInstanceUri() # Enforce extra indicator fields if row[getValue['isNaturalPerson']]: recipientType = "Person" elif row[getValue['isNFPO']]: recipientType = "NFPO" elif row[getValue['isNGO']]: recipientType = "NGO" # If needed, an extra type for the Recipient is assigned if recipientType == "Registered Organisation": RecipientAlter_tmp = ontology.rovRegisteredOrganization( uri=None, imposeURI=recipientURI) RecipientAlter_tmp.rovlegalName += row[ getValue['recipientName']] lbl = row[getValue['recipientVAT']] URISpec = URISpecification(def_base_uri, lbl) RecipientVAT_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['recipientVAT']]) RecipientAlter_tmp.rovregistration += RecipientVAT_tmp RecipientAlter_tmp.rovorgType += checkControlledDictionary( organisationTypeDict, 'organisationTypeCode', 'organisationTypeDescription', 'RegisteredOrganisation') elif recipientType == "Public Organisation": RecipientAlter_tmp = ontology.cpovPublicOrganisation( uri=None, imposeURI=recipientURI) RecipientAlter_tmp.orgclassification += publicOrganisationCatg[ row[getValue['recipientTypeDescription']].casefold( )] # literal for the pilot elif recipientType == "Person": RecipientAlter_tmp = ontology.foafPerson( uri=None, imposeURI=recipientURI) RecipientAlter_tmp.foaffamilyName += row[ getValue['recipientName']] elif recipientType == "Recipient": pass # Recipient object already made elif recipientType == "International Organisation": RecipientAlter_tmp = ontology.InternationalOrganization( uri=None, imposeURI=recipientURI) elif recipientType == "Trust Fund": RecipientAlter_tmp = ontology.TrustFund( uri=None, imposeURI=recipientURI) elif recipientType == "NFPO": RecipientAlter_tmp = ontology.NonProfitOrganisation( uri=None, imposeURI=recipientURI) lbl = row[getValue['recipientVAT']] URISpec = URISpecification(def_base_uri, lbl) RecipientVAT_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['recipientVAT']]) RecipientAlter_tmp.rovregistration += RecipientVAT_tmp RecipientAlter_tmp.rovorgType += checkControlledDictionary( organisationTypeDict, 'organisationTypeCode', 'organisationTypeDescription', 'NFPO') elif recipientType == "NGO": RecipientAlter_tmp = ontology.NGO(uri=None, imposeURI=recipientURI) lbl = row[getValue['recipientVAT']] URISpec = URISpecification(def_base_uri, lbl) RecipientVAT_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['recipientVAT']]) RecipientAlter_tmp.rovregistration += RecipientVAT_tmp RecipientAlter_tmp.rovorgType += checkControlledDictionary( organisationTypeDict, 'organisationTypeCode', 'organisationTypeDescription', 'NGO') else: print('Recipient: no additional type match.') # -----------------------# # Create Action Location # # -----------------------# actionlbl = row[getValue['actionLocation']] if actionlbl: URISpec = URISpecification(def_base_uri, actionlbl) ActionLocation_tmp = ontology.dctLocation(uri=URISpec) ActionLocation_tmp.locngeographicName += row[ getValue['actionLocation']] # --------------------# # Create Contract Key # # --------------------# lbl = str(row[getValue['contractKey']]) URISpec = URISpecification(def_base_uri, lbl) ContractKey_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['contractKey']]) # ------------------------# # Create Legal Commitment # # ------------------------# lbl = str(row[getValue['commitmentKey']]) URISpec = URISpecification(def_base_uri, lbl) LegalCommitment_tmp = ontology.LegalCommitment(uri=URISpec) LegalCommitment_tmp.dctdescription += row[getValue['subject']] LegalCommitment_tmp.fundingType += row[getValue['fundingType']] LegalCommitment_tmp.contractKey += ContractKey_tmp if row[getValue['isCoordinator']]: LegalCommitment_tmp.hasCoordinator += Recipient_tmp if actionlbl: LegalCommitment_tmp.hasActionLocation += ActionLocation_tmp # ----------------------# # Create Monetary Value # --> link to EU Budget # ----------------------# lbl = str(row[getValue['totalValue']]) URISpec = URISpecification(def_base_uri, lbl) MonetaryValue_tmp = ontology.MonetaryValue(uri=URISpec) MonetaryValue_tmp.value += row[getValue['totalValue']] MonetaryValue_tmp.currency += currencyEUR # ------------------------------# # Create Indicative Transaction # # ------------------------------# lbl = row[getValue['DG']] + row[ getValue['recipientName']] + str( row[getValue['totalValue']]) + geographicName URISpec = URISpecification(def_base_uri, lbl) IndicativeTransaction_tmp = ontology.IndicativeTransaction( uri=URISpec) IndicativeTransaction_tmp.committedTo += Recipient_tmp # construct corporate body uri DG = row[getValue['DG']] if DG in corporateBodyReplace: DG = corporateBodyReplace[DG] IndicativeTransaction_tmp.committedBy += checkControlledDictionary( corporatebodyDict, 'DG', 'DGDescriptionEn', 'CorporateBody', base_uri=corporateBodyBase) IndicativeTransaction_tmp.hasEstimatedValue += MonetaryValue_tmp # --------------------# # Create Position Key # # --------------------# lbl = str(row[getValue['positionKey']]) URISpec = URISpecification(def_base_uri, lbl) PositionKey_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['positionKey']]) # ----------------------# # Create Commitment Key # # ----------------------# lbl = str(row[getValue['commitmentKey']]) URISpec = URISpecification(def_base_uri, lbl) CommitmentKey_tmp = ontology.admsIdentifier( uri=URISpec, label=row[getValue['commitmentKey']]) # --------------------# # Create Nomenclature # --> link to EU Budget # --------------------# nomenclatureURI = nomenclatureBase + str( row[getValue['year']]) + '_SEC3' + row[ getValue['budgetLine']].replace('.', '_') # ----------------------------# # Create Budgetary Commitment # # ----------------------------# lbl = str(row[getValue['positionKey']]) + row[ getValue['financialManagementArea']] + str( row[getValue['expenseType']]) + str( row[getValue['commitmentKey']]) + str( row[getValue['totalValue']]) URISpec = URISpecification(def_base_uri, lbl) BudgetaryCommitment_tmp = ontology.BudgetaryCommitment( uri=URISpec) BudgetaryCommitment_tmp.positionKey += PositionKey_tmp BudgetaryCommitment_tmp.commitmentKey += CommitmentKey_tmp BudgetaryCommitment_tmp.dctdate += row[getValue['year']] actionTypeVal = checkControlledDictionary( actionTypeDict, 'actionType', 'actionTypeDescriptionEn', 'actionType') BudgetaryCommitment_tmp.actionType += actionTypeVal financialManagementAreaBase = config_data[ 'financialManagementAreaBase'] BudgetaryCommitment_tmp.financialManagementArea += financialManagementAreaBase + row[ getValue['financialManagementArea']] expenseTypeBase = config_data['expenseTypeBase'] expenseTypeMap = config_data['expenseTypeMap'] BudgetaryCommitment_tmp.expenseType += expenseTypeBase + expenseTypeMap[ str(row[getValue['expenseType']])] BudgetaryCommitment_tmp.hasBudgetLine += nomenclatureURI BudgetaryCommitment_tmp.hasTotalValue += MonetaryValue_tmp BudgetaryCommitment_tmp.hasLegalCommitment += LegalCommitment_tmp BudgetaryCommitment_tmp.hasIndicativeTransaction += IndicativeTransaction_tmp # ----------------------# # Create Corporate Body # --> link to EU Budget # ----------------------# # we will link to URI directly in indicative transaction #-----------------------------------------------# # Print triples so far to file # #-----------------------------------------------# if ix in batchlimits: flushToFile(session, output, compression) flushToFile(session, output, compression) output.close() # ----------------------------# # Transform triples to Turtle # # ----------------------------# if compression > 1: print('Transforming the generated triples to Turtle...') g = rdflib.Graph() g.parse(outputfile, format="nt") ttloutput = outputfile.rsplit('.') g.serialize(destination=ttloutput[0] + '.ttl', format='turtle')
import sys import json import rdflib wfns = 'http://purl.org/net/wf-invocation#' exns = 'http://www.wings-workflows.org/ontology/execution.owl#' runid = sys.argv[1] # Load the run id g = rdflib.Graph() g.load(runid) # Get the plan uri run = rdflib.URIRef(runid) hasPlan = rdflib.URIRef(exns + 'hasPlan') plan = g.value(run, hasPlan) # Load the plan if plan: g.load(plan) # Query for Variable bindings vbindings = {} query = 'select ?v ?d where { ?v <' + wfns + 'hasDataBinding> ?d }' for row in g.query(query): varid = str(row.v) varname = varid[varid.index("#") + 1:] vbindings[varname] = str(row.d) rundetails = {"runid": runid, "files": vbindings} print(json.dumps(rundetails))
import json import rdflib import requests from rdflib import URIRef, Literal, BNode from rdflib.namespace import RDF, SKOS, OWL, Namespace, NamespaceManager, XSD BDR = Namespace("http://purl.bdrc.io/resource/") BDO = Namespace("http://purl.bdrc.io/ontology/core/") BDG = Namespace("http://purl.bdrc.io/graph/") BDA = Namespace("http://purl.bdrc.io/admindata/") ADM = Namespace("http://purl.bdrc.io/ontology/admin/") MBBT = Namespace("http://mbingenheimer.net/tools/bibls/") CBCT_URI = "https://dazangthings.nz/cbc/text/" CBCT = Namespace(CBCT_URI) NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) NSM.bind("", BDO) NSM.bind("bdg", BDG) NSM.bind("bda", BDA) NSM.bind("adm", ADM) NSM.bind("skos", SKOS) NSM.bind("rdf", RDF) NSM.bind("cbct", CBCT) NSM.bind("mbbt", MBBT) def get_id_for_str(id): pass
def load(kb, goal, identification, base): kb_stream, goal_stream = kb, goal implies = rdflib.URIRef("http://www.w3.org/2000/10/swap/log#implies") store = OrderedStore() kb_graph = rdflib.Graph(store=store, identifier=base) kb_conjunctive = rdflib.ConjunctiveGraph(store=store, identifier=base) kb_graph.parse(kb_stream, format='n3', publicID=base) if not nolog: log('---kb:') try: for l in kb_graph.serialize(format='n3').splitlines(): log(l.decode('utf8')) except Exception as e: log(str(e)) log('---kb quads:') for l in kb_conjunctive.serialize(format='nquads').splitlines(): log(l.decode('utf8')) log('---') def fixup3(o): if isinstance(o, rdflib.Graph): return URIRef(o.identifier) return o def fixup2(o): if type(o) == rdflib.BNode: return rdflib.Variable(str(o.lower())) return o def fixup(spo): s, p, o = spo return (fixup2(s), fixup2(p), fixup2(o)) rules = [] head_triples_triples_id = 0 kb_graph_triples = [fixup(x) for x in kb_graph.triples((None, None, None))] facts = Graph( Triple(un_move_me_ize_pred(fixup3(x[1])), [fixup3(x[0]), fixup3(x[2])]) for x in kb_graph_triples) facts.id = head_triples_triples_id head_triples_triples_id += 1 for kb_graph_triple_idx, (s, p, o) in enumerate(kb_graph_triples): rules.append(Rule(facts, kb_graph_triple_idx, Graph())) if p == implies: body = Graph() head_triples = [ fixup(x) for x in kb_conjunctive.triples((None, None, None, o)) ] head_triples_triples = Graph() for triple in [ Triple(fixup3(x[1]), [fixup3(x[0]), fixup3(x[2])]) for x in head_triples ]: move = False if triple.pred == URIRef( 'http://www.w3.org/1999/02/22-rdf-syntax-ns#move_me_to_body_first' ): triple.pred = URIRef( 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first') move = True if triple.pred == URIRef( 'http://www.w3.org/1999/02/22-rdf-syntax-ns#move_me_to_body_rest' ): triple.pred = URIRef( 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') move = True if move: body.append(triple) else: head_triples_triples.append(triple) head_triples_triples.id = head_triples_triples_id head_triples_triples_id += 1 for body_triple in [ fixup(x) for x in kb_conjunctive.triples((None, None, None, s)) ]: body.append( Triple((un_move_me_ize_pred(fixup3(body_triple[1]))), [fixup3(body_triple[0]), fixup3(body_triple[2])])) #body.reverse() to_expand = [] for triple in head_triples_triples + body: for thing in triple.args: if type(thing) == rdflib.Variable: if str(thing).endswith('_'): to_expand.append(thing) for thing in to_expand: body.insert( 0, Triple( rdflib.RDF.first, [thing, rdflib.Variable(str(thing)[:-1] + 'f')])) body.insert( 0, Triple( rdflib.RDF.rest, [thing, rdflib.Variable(str(thing)[:-1] + 'r')])) if len(head_triples_triples) > 1: with open(_rules_file_name, 'a') as ru: ru.write( head_triples_triples.str(shorten) + " <= " + body.str(shorten) + ":\n") for head_triple_idx in range(len(head_triples_triples)): rules.append(Rule(head_triples_triples, head_triple_idx, body)) goal_rdflib_graph = rdflib.ConjunctiveGraph(store=OrderedStore(), identifier=base) goal_rdflib_graph.parse(goal_stream, format='n3', publicID=base) if not nolog: log('---goal:') try: for l in goal_rdflib_graph.serialize(format='n3').splitlines(): log(l.decode('utf8')) except Exception as e: log(str(e)) log('---goal nq:') for l in goal_rdflib_graph.serialize(format='nquads').splitlines(): log(l.decode('utf8')) log('---') goal = Graph() for s, p, o in [ fixup(x) for x in goal_rdflib_graph.triples((None, None, None, None)) ]: goal.append( Triple(un_move_me_ize_pred(fixup3(p)), [fixup3(s), fixup3(o)])) #goal.reverse() query_rule = Rule([], None, goal) return rules, query_rule, goal
def update_RDF(map_base, map_id, map_source, annotations, update_knowledgebase=False): map_dir = os.path.join(map_base, map_id) # RDF generation if update_knowledgebase: kb_path = os.path.join(map_base, 'KnowledgeBase.sqlite') print('Knowledge base: ', kb_path, (not os.path.exists(kb_path))) graph = KnowledgeBase(kb_path, create=(not os.path.exists(kb_path))) else: graph = rdflib.Graph() # graph.namespace_manager = NS.SCICRUNCH_NS # namespaces_dict = NS.namespaces_dict() ## Only really need rdf: obo: fma: FMA: RO: UBERON: ILX: flatmap: ## See https://github.com/RDFLib/rdflib/issues/794 # namespaces_dict = { 'FMA': rdflib.namespace.Namespace('http://purl.org/sig/ont/fma/fma'), 'ILX': rdflib.namespace.Namespace('http://uri.interlex.org/base/ilx_'), 'NCBITaxon': rdflib.namespace.Namespace( 'http://purl.obolibrary.org/obo/NCBITaxon_'), 'RO': rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/RO_'), 'UBERON': rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/UBERON_'), 'fma': rdflib.namespace.Namespace('http://purl.org/sig/ont/fma/'), 'ilx': rdflib.namespace.Namespace('http://uri.interlex.org/'), 'obo': rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/'), } for pfx, ns in namespaces_dict.items(): graph.bind(pfx, ns, override=True) FLATMAP_NS = rdflib.namespace.Namespace( 'http://celldl.org/ontologies/flatmap/') graph.bind('flatmap', FLATMAP_NS, override=True) map_uri = rdflib.URIRef(map_source) for object_id, metadata in annotations.items(): if 'error' in metadata: print('Error in {} layer: {}: {}'.format(metadata['layer'], metadata['error'], metadata['annotation'])) continue layer_urls = UrlMaker(map_source, metadata['layer']) annotation = metadata['annotation'] properties = Parser.annotation(annotation) feature_id = properties.get('id') feature_uri = layer_urls.url(feature_id) graph.remove((feature_uri, None, None)) feature_class = None route = {'source': '', 'via': [], 'target': ''} for key, value in properties.items(): if key == 'models': prop = namespaces_dict['RO']['0003301'] (prefix, local) = value.split(':', 1) graph.add((feature_uri, prop, namespaces_dict[prefix][local])) elif key == 'node': feature_class = FLATMAP_NS['Node'] graph.add((feature_uri, FLATMAP_NS['nodeClass'], FLATMAP_NS[value[0]])) elif key == 'edge': feature_class = FLATMAP_NS['Edge'] if len(value) < 2: raise ValueError( 'Edge must have a source and target: {}'.format( annotation)) route['source'] = value[0] route['target'] = value[-1] route['via'] = value[1:-1] elif key in ['source', 'via', 'target']: if feature_class is None: feature_class = FLATMAP_NS['Edge'] elif feature_class != FLATMAP_NS['Edge']: raise ValueError( 'Only edges can be routed: {}'.format(annotation)) if key in ['source', 'target']: route[key] = value[0] else: route['via'].extend(value) if feature_class is None: feature_class = FLATMAP_NS['Node'] # Assume we have a Node elif feature_class == FLATMAP_NS['Edge']: if route['source']: graph.add((feature_uri, FLATMAP_NS['source'], layer_urls.url(route['source']))) if route['target']: graph.add((feature_uri, FLATMAP_NS['target'], layer_urls.url(route['target']))) for via in route['via']: graph.add( (feature_uri, FLATMAP_NS['via'], layer_urls.url(via))) graph.add((feature_uri, FLATMAP_NS['map'], map_uri)) graph.add((feature_uri, rdflib.namespace.RDF['type'], feature_class)) with open(os.path.join(map_dir, 'annotations.ttl'), 'w') as turtle: # Don't set `base=map_uri` until RDFLib 5.0 and then use `explicit_base=True` # See https://github.com/RDFLib/rdflib/issues/559 turtle.write(graph.serialize(format='turtle').decode('utf-8')) graph.close()
def run(self, *, foodon_to_root_file='../data/out/foodon_to_root_path.pkl', recipes_file='../data/out/recipe_ingname_list.json', index_dict_file='../data/out/food_index_dict.pkl', food_link_files=['../data/in/foodon-links-1.ttl'], save_ppmi_dict='../data/out/foodon_ppmi_sim_dict.pkl'): with open(foodon_to_root_file, 'rb') as f: foodon_to_root_dict = pickle.load(f) ################ g = rdflib.Graph() for file in food_link_files: g.parse(file, format='ttl') food_to_foodon = dict() for subj, obj in g.subject_objects(predicate=rdflib.URIRef( 'http://idea.rpi.edu/heals/kb/equivalentFoodOnClass')): food_to_foodon[subj] = obj valid_foodon_items = set( item[1] for item in food_to_foodon.items()) - IGNORE_INGS foodon_super_to_root = defaultdict(lambda: set()) for foodon_food in valid_foodon_items: path_items = foodon_to_root_dict.get(foodon_food, []) for item in path_items: if item in valid_foodon_items: foodon_super_to_root[foodon_food].add(item) foodon_super_to_root = { key: frozenset(val) for key, val in foodon_super_to_root.items() } R2V = RecToVec(graph=rdflib.Graph(), food_index_file=index_dict_file) with open(recipes_file, 'r') as f: recipe_list = json.load(f) print("files loaded") ing_context_ocurrences = defaultdict(lambda: defaultdict(lambda: 0)) ing_occurrence_count = defaultdict(lambda: 0) context_ocurrences = defaultdict(lambda: 0) unique_contexts = set() relevant_foods = set() ind_to_context = [] completed_recipes = 0 start = time.time() for recipe in recipe_list: ings = [food_to_foodon.get(foodkg_ns[ing], 0) for ing in recipe] if 0 in ings: continue ings_set = set(ings) - IGNORE_INGS for ing in ings_set: context_ings = frozenset(ings_set - {ing}) unique_contexts.add(context_ings) context_ocurrences[context_ings] += 1 super_foods = foodon_super_to_root[ing] for related_ing in super_foods: # superclasses in super_foods also includes the ing itself relevant_foods.add(related_ing) ing_context_ocurrences[related_ing][context_ings] += 1 ing_occurrence_count[related_ing] += 1 completed_recipes += 1 if completed_recipes % 10000 == 0: print(completed_recipes, ' - time - ', time.time() - start) start = time.time() total_context_count = len(unique_contexts) print('unique contexts: ', total_context_count) print("setting up computing ppmi, using foodon relations") ind_to_context = [] context_to_ind = dict() for c in context_ocurrences.keys(): context_to_ind[c] = len(ind_to_context) ind_to_context.append(c) ind_to_ing = [] ing_to_context_ppmi = lil_matrix( (len(R2V.food_index), total_context_count)) finished_count = 0 for ing in relevant_foods: ing_index = R2V.food_index[ing] ing_contexts = ing_context_ocurrences[ing] ing_context_mat = ing_context_ocurrences.get(ing, None) if ing_context_mat is None: continue ing_occ_count = ing_occurrence_count[ing] ing_contexts_as_set = frozenset(ing_contexts.keys()) for c in ing_contexts_as_set: i = context_to_ind[c] # # V1 and V2 ppmi = max( 0, np.log10((ing_contexts[c] * total_context_count) / (ing_occ_count * context_ocurrences[c])) * np.sqrt(max(ing_occ_count, context_ocurrences[c]))) ing_to_context_ppmi[ing_index, i] = ppmi # v4 # ing_to_context_ppmi[ing_index, i] = ing_contexts[c] finished_count += 1 if finished_count % 100 == 0 or finished_count < 5: print('getting ppmi, completed ', finished_count) ing_context_ocurrences[ing] = None ing_to_context_ppmi = ing_to_context_ppmi.tocsr() ing_to_ing_ppmi_sim = dict() print("converting to cosine sim...") finished_count = 0 def l2_norm(mat): return np.sqrt(np.sum(mat.multiply(mat), axis=1)) l2n = l2_norm(ing_to_context_ppmi) cosine_sim = ing_to_context_ppmi.dot(ing_to_context_ppmi.T) / (l2n.dot( l2n.T)) for ing1 in relevant_foods: ing_index = R2V.food_index[ing] ing_to_ing_ppmi_sim[ing1] = dict() irow = ing_to_context_ppmi[ing_index] for ing2 in relevant_foods: ing_to_ing_ppmi_sim[ing1][ing2] = cosine_sim[ R2V.food_index[ing1], R2V.food_index[ing2]] finished_count += 1 if finished_count % 100 == 0 or finished_count < 5: print('completed count: ', finished_count) print('finished, saving output') with open(save_ppmi_dict, 'wb') as f: pickle.dump(ing_to_ing_ppmi_sim, f)
import rdflib from rdflib.namespace import RDF from rdflib import RDF, RDFS, Namespace onto = rdflib.Graph() onto.parse("file:em-rdfs.n3", format="n3") print("graph has %s statements." % len(onto)) # prints graph has 79 statements. for subj, pred, obj in onto: if (subj, pred, obj) not in onto: raise Exception("It better be!") # s = onto.serialize(format='n3') # print(s.decode('UTF-8')) rml = rdflib.Graph() rml.parse("file:EM2EM.rml", format="n3") # s = rml.serialize(format='n3') # print(s.decode('UTF-8')) RR = Namespace("http://www.w3.org/ns/r2rml#") RML = Namespace("http://semweb.mmlab.be/ns/rml#") QL = Namespace("http://semweb.mmlab.be/ns/ql#") rml += onto # =========================================================================================== # Gestion de la subsomption des concepts
def __init__(self, uri, kind, app_label_entities="entities", app_label_relations="relations", app_label_vocabularies="vocabularies", **kwargs): """ :param uri: (url) Uri to parse the object from (http://test.at). The uri must start with a base url mentioned in the RDF parser settings file. :param kind: (string) Kind of entity (Person, Place, Institution, Work, Event) :param app_label_entities: (string) Name of the Django app that contains the entities that we create. :param app_label_relations: (string) Name of the Django app that contains the relations for the merging process. :param app_label_vocabularies: (string) Name of the Django app that contains the vocabularies defining the entities and relations. """ owl = "http://www.w3.org/2002/07/owl#" def exist(uri): if objct.objects.filter(uri__uri=uri).count() > 0: return True, objct.objects.get(uri__uri=uri) else: return False, False def prep_string(tupl): if isinstance(tupl, str): return tupl if tupl[1]: m = re.match(tupl[1][0], tupl[0]) group = tupl[1][1] if not group: group = 0 try: return m.group(group) except: return tupl[0] else: r = tupl[0] return r.strip() objct = ContentType.objects.get(app_label=app_label_entities, model=kind.lower()).model_class() force = kwargs.get('force', None) res_attrb = dict() labels = [] related_objcts = [] uri = harmonize_geonames_id(uri) self.uri = uri self.kind = kind self.saved = False test = exist(self.uri) if test[0] and not force: self.objct = test[1] self.created = False else: self.created = True rdf_t = dict() for x in sett_RDF_generic[kind]['data']: self.settings_defined = False if not uri.startswith(x['base_url']): continue self.settings_defined = True g = rdflib.Graph() uri_2 = uri if not uri_2.endswith('/'): uri_2 += '/' o2 = rdflib.term.URIRef(uri) g.parse('{}{}'.format(uri_2.strip(), x['url_appendix']), format='xml') sameas = rdflib.term.URIRef(owl+'sameAs') list_sameas = [] for p in g.objects(subject=o2, predicate=sameas): list_sameas.append(genUri(uri=p)) self.sameas = list_sameas if 'kind' in x.keys(): for k in x['kind']: kind_rdf = rdflib.term.URIRef(k[0]) kind_val = g.value(o2, kind_rdf) if kind_val is not None: break else: kind_val = k[1] if kind_val is not None: kind_objct = ContentType.objects.get( app_label=app_label_vocabularies, model=kind.lower() + 'Type'.lower()).model_class() kind_objct, created = kind_objct.objects.get_or_create(name=kind_val) res_attrb['kind'] = kind_objct for uri_2 in list_sameas: test = exist(uri_2) if test[0]: self.objct = test[1] self.created = False uri_3 = genUri(uri=uri, entity=self.objct) uri_3.save() for xx in x['attributes']: rdf_t[xx['name']] = () subj2 = [] results = [] ind_type = () for z in xx['identifiers']: if len(results) > 0: continue cnt = 0 cnt_2 = 1 try: k = z[cnt_2] except: k = '=' subj = [o2, ] while k: for indx, s in enumerate(subj): if z[cnt][0] == 'objects': pred = rdflib.term.URIRef(z[cnt][2]) res = g.objects(subject=s, predicate=pred) if type(res) != types.GeneratorType: break for r in res: if z[cnt][3]: if not getattr(r, z[cnt][3][0]) == z[cnt][3][1]: continue if k == '>': subj2.append(r) elif k == '=': results.append((z[cnt][1], r, indx)) ind_type += ((len(ind_type), z[cnt][1]),) cnt_2 += 2 try: k = z[cnt_2] except: k = '=' if cnt + 2 > len(z): k = None cnt += 2 subj = subj2 for attrb in sett_RDF_generic[kind]['matching']['attributes'].keys(): res_2 = [] for x in sett_RDF_generic[kind]['matching']['attributes'][attrb]: for s in x: for ind, elem in filter(lambda x: x[1] == s[0], ind_type): elem = results[ind][1] res_2.append(prep_string((elem, s[1]))) if isinstance(s, str): res_2.append(s) if len(res_2) == len(x): res_attrb[attrb] = ''.join(res_2) for lab in sett_RDF_generic[kind]['matching']['labels'].keys(): lb_type, created = LabelType.objects.get_or_create(name=lab) for x in sett_RDF_generic[kind]['matching']['labels'][lab]: for ind, elem in filter(lambda a: a[1]==x[0], ind_type): elem = results[ind][1] lb = Label(label=prep_string((elem, x[1])), isoCode_639_3=elem.language, label_type=lb_type) labels.append(lb) if kwargs.get('drill_down', True): for con in sett_RDF_generic[kind]['matching']['linked objects']: for x in con['object']: for ind, elem in filter(lambda a: a[1]==x[0], ind_type): elem = results[ind][1] ob = GenericRDFParser(elem, con['type'], drill_down=False) if ob.created and not ob.saved: ob.save() # TODO: We should move the save of related objects in the save routine try: u = ContentType.objects.get(app_label=app_label_relations, model=kind.lower()+con['type'].lower()) u_kind = ContentType.objects.get(app_label=app_label_vocabularies, model=kind.lower()+con['type'].lower()+'Relation'.lower()) except ContentType.DoesNotExist: u = ContentType.objects.get(app_label=app_label_relations, model=con['type'].lower()+kind.lower()) u_kind = ContentType.objects.get(app_label=app_label_vocabularies, model=con['type'].lower()+kind.lower()+'Relation'.lower()) u_kind_2 = u_kind.model_class() u2 = u.model_class()() uk, created = u_kind_2.objects.get_or_create(name=con['kind']) if con['type'] == kind: setattr(u2, 'related_' + con['type'].lower() + 'B_id', ob.objct.pk) else: setattr(u2, 'related_' + con['type'].lower() + '_id', ob.objct.pk) setattr(u2, 'relation_type_id', uk.pk) related_objcts.append(u2) self.objct = objct(**res_attrb) self.labels = labels self.related_objcts = related_objcts
def main(): Total = dict() Tagsets = dict() TagsetsToTags = dict() Equipment = dict() print "hi" BRICK = rdflib.Namespace('https://brickschema.org/schema/1.0.1/Brick#') BRICKFRAME = rdflib.Namespace( 'https://brickschema.org/schema/1.0.1/BrickFrame#') GHC = rdflib.Namespace('http://cmu.edu/building/ontology/ghc#') #RDF, RDFS and OWL have already been imported in the library initializations print GHC["test"] #Initiate graph from base ttl file g = rdflib.Graph() g.bind('GHC', GHC) g.bind('brick', BRICK) # new = rdflib.Graph() #new.parse('GHCYuvraj_brick.ttl',format='ttl') # g.parse('../BuildingSchema/Brick.ttl', format='turtle') count1 = 0 count2 = 0 changeablemapping = dict() with open('CMU_GHC.csv', 'rU') as DataFile: with open('CMU_AHU_OddBuildingTagSet.csv', 'rU') as Mapping: with open('TagSets.csv', 'rU') as GDocs: changeable = csv.DictReader(GDocs) for row in changeable: Value = row['Dimension'] Key = row['TagSet'] Key = re.sub(' ', '_', Key) Values = Value.split('>') if (len(Values) > 1): changeablemapping[Key] = Values # print Values reader = csv.DictReader(Mapping) for row in reader: #print row['Bas1'], row['TagSet'], row['Tags'] BasTag = row['Bas1'] ListBasTag = BasTag.split('/') length = len(ListBasTag) Key = ListBasTag[length - 1] # print Key x = row['TagSet'] NewX = re.sub(' ', '_', x) Key = re.sub(' ', '_', Key) # print NewX Tagsets[Key] = NewX Tags = row['Tags'] listTags = Tags.split(';') TagsetsToTags[NewX] = listTags if (NewX in changeablemapping.keys()): # print "1" pass else: print "2", NewX # print ListBasTag MapReader = csv.reader(Mapping, delimiter=' ', quotechar='|') # for row in MapReader: # print row reader = csv.DictReader(DataFile) # g.add((GHC['GHC_HVAC'],RDF.type,OWL.NamedIndividual)) # g.add((GHC['GHC_HVAC'],RDF.type,BRICK['HVAC'])) for row in reader: New = row['bas_raw'] ListBasTag = New.split('/') length = len(ListBasTag) Key = ListBasTag[length - 1] Key = re.sub(' ', '_', Key) y = 0 if ('Parking' in ListBasTag[2]): y = 1 NewKey = ListBasTag[1] + '/' + ListBasTag[3 + y] + '/' + Key NewKey = re.sub(' ', '_', NewKey) x = GHC[NewKey] # g.add((Key,RDF.type,OWL.NamedIndividual)) # g.add((Key,RDF.type,BRICK[Tagsets[key]])) # print Key Equip = "" BelongsTo = "" if Key in Tagsets: Total[Key] = 1 count1 += 1 g.add((x, RDF.type, OWL.NamedIndividual)) g.add((x, RDF.type, BRICK[Tagsets[Key]])) location = "" for i in range(0, 3 + y): location = location + ListBasTag[i] location = re.sub(' ', '_', location) g.add((GHC[location], RDF.type, OWL.NamedIndividual)) g.add((GHC[location], RDF.type, BRICK["Location"])) # g.add((x,BRICK.hasLocation,GHC[location])) if 'AHU' in ListBasTag[3 + y]: Equip = "AHU" elif 'VAV' in ListBasTag[3 + y] or 'FSB' in ListBasTag[3 + y]: # print ListBasTag[3+y] Equip = "VAV" elif 'CRAC' in ListBasTag[3 + y]: Equip = "CRAC" elif 'FCU' in ListBasTag[3 + y]: # print "HELLO" Equip = "Fan_Coil_Unit" else: #print ListBasTag[3+y],NewKey if ('Usage' in NewKey or 'Peak' in NewKey): Equip = "Meter" else: pass #print NewKey # mapping = changeablemapping[Tagsets[Key]] # if(len(mapping) == 4): # Equip = re.sub(' ','_',mapping[3]) # BelongsTo ="" # if(len(mapping) == 3): # Equip = re.sub(' ','_',mapping[2]) # BelongsTo = re.sub(' ','_',mapping[1]) # if(len(mapping) > 4): # Equip = re.sub(' ','_',mapping[4]) # BelongsTo = re.sub(' ','_',mapping[3]) # LowestEquip = mapping[len(mapping)-1] # NewEquipment = ListBasTag[1]+'/'+ListBasTag[3+y]+'/'+Equip # NewBelongs = ListBasTag[1]+'/'+ListBasTag[3+y]+'/'+BelongsTo # NewEquipment = re.sub(' ','_',NewEquipment) # NewBelong = re.sub(' ','_',NewBelongs) blank = re.sub(' ', '_', ListBasTag[3 + y]) # print changeablemapping[Tagsets[Key]], Key # # print blank, ListBasTag[3+y] # NewEquip = NewEquipment # print NewBelong, NewEquip, x if not (blank in Equipment) and not 'Interface' in ListBasTag[ 3 + y] and Equip != "": Equipment[blank] = 1 g.add((GHC[blank], RDF.type, OWL.NamedIndividual)) g.add((GHC[blank], RDF.type, BRICK[Equip])) if (Equip != "Meter" and Equip != "AHU"): number = re.search('[0-9]+', blank) floor = number.group()[0] g.add((GHC[floor + "Floor"], RDF.type, BRICK["Floor"])) g.add((GHC[blank + "Room"], BRICKFRAME.hasPart, GHC[blank + "Room"])) g.add((GHC[blank + "Room"], RDF.type, OWL.NamedIndividual)) g.add((GHC[blank + "Room"], RDF.type, BRICK["Room"])) if (Equip != "FCU"): g.add((GHC[blank + "Zone"], RDF.type, OWL.NamedIndividual)) g.add((GHC[blank + "Zone"], RDF.type, BRICK["HVAC_Zone"])) g.add((GHC[blank + "Zone"], BRICKFRAME.hasPoint, GHC[blank + "Room"])) g.add((GHC[blank + "Room"], BRICKFRAME.isPointOf, GHC[blank + "Zone"])) g.add((GHC[blank], BRICKFRAME.feeds, GHC[blank + "Zone"])) g.add((GHC[blank], BRICKFRAME.feeds, GHC[blank + "Room"])) g.add((GHC[blank + "Room"], BRICKFRAME.isFedBy, GHC[blank])) g.add((GHC[blank + "Zone"], BRICKFRAME.isFedBy, GHC[blank])) g.add((x, BRICKFRAME.isLocatedIn, GHC[blank + "Room"])) g.add((GHC[blank + "Room"], BRICKFRAME.contains, x)) # g.add((GHC[blank],BRICH.isLocatedIn,GHC[location])) # if not(NewBelong in Equipment): # g.add((GHC[NewBelong],RDF.type, OWL.NamedIndividual)) # g.add((GHC[NewBelong],RDF.type,BRICK[BelongsTo])) # Equipment[NewBelong]=1 # g.add((GHC[NewEquip],BRICK.hasLocation,GHC[location])) # print NewBelong, NewEquip # g.add((GHC[NewEquip],BRICK.isPartOf,GHC[NewBelong])) # g.add((GHC[NewEquip],BRICK.hasLocation, GHC[location])) if (Equip != ""): g.add((GHC[blank], BRICKFRAME.hasPoint, x)) g.add((GHC[blank + "Room"], BRICKFRAME.hasPoint, x)) print x g.add((x, BRICKFRAME.isPointOf, GHC[blank])) g.add((x, BRICKFRAME.isPointOf, GHC[blank + "Room"])) # if Equip == "": # g.add((GHC['GHC_HVAC'],BRICK.hasPoint,x)) else: Total[Key] = 1 # print Key count2 += 1 # print Key # if ('AHU' in ListBasTag[3]): # print "3",ListBasTag[3] # if ('AHU' in ListBasTag[4]): # print "4",ListBasTag[4] for item in TagsetsToTags.keys(): x = BRICK[item] for value in TagsetsToTags[item]: g.add((x, BRICKFRAME.hasTag, BRICK[value])) with open('AHURelations.csv', 'rU') as relations: reader = csv.DictReader(relations) for row in reader: new = re.sub('_', '-', row['First']) g.add((GHC[new + '_I'], BRICKFRAME.feeds, GHC[row['Third']])) # g.add((GHC["AHU-1_Zone-Temperature"],RDF.type,OWL.NamedIndividual)) if ((BRICK["Run_Request"], None, None) in g): print "Hi" # g.add((GHC["AHU-1_Zone-Temperature"],RDF.type,BRICK["Zone_Temp"])) # g.add((GHC["VAV1"], BRICK.hasPoint, GHC["AHU-1_Zone-Temperature"])) g.serialize(destination='GHC_brick.ttl', format='turtle') print count1 print count2 print len(Total.keys())
def test_str(self): self.assertIsInstance(self.graph.to_str(), str) g2 = rdflib.Graph() g2.parse(data=str(self.graph), format="ttl")
def run(self): if self.target_graph is not None: the_target_graph = self.target_graph else: has_cloned = False if self.ont_graph is not None: # creates a copy of self.data_graph, doesn't modify it the_target_graph = self.mix_in_ontology() has_cloned = True else: the_target_graph = self.data_graph inference_option = self.options.get('inference', 'none') if inference_option and not self.pre_inferenced and str( inference_option) != "none": if not has_cloned and not self.inplace: the_target_graph = clone_graph(the_target_graph) self._run_pre_inference(the_target_graph, inference_option, self.logger) self.pre_inferenced = True self._target_graph = the_target_graph shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. if self.options['advanced']: target_types = gather_target_types(self.shacl_graph) advanced = { 'functions': gather_functions(self.shacl_graph), 'rules': gather_rules(self.shacl_graph) } for s in shapes: s.set_advanced(True) apply_target_types(target_types) else: advanced = {} if isinstance(the_target_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): named_graphs = [ rdflib.Graph( the_target_graph.store, i, namespace_manager=the_target_graph.namespace_manager) if not isinstance(i, rdflib.Graph) else i for i in the_target_graph.store.contexts(None) ] else: named_graphs = [the_target_graph] reports = [] non_conformant = False for g in named_graphs: if advanced: apply_functions(advanced['functions'], g) apply_rules(advanced['rules'], g) for s in shapes: _is_conform, _reports = s.validate(g) non_conformant = non_conformant or (not _is_conform) reports.extend(_reports) if advanced: unapply_functions(advanced['functions'], g) v_report, v_text = self.create_validation_report( self.shacl_graph, not non_conformant, reports) return (not non_conformant), v_report, v_text
def __init__(self, path): self.path = path install_rdf_path = os.path.join(path, 'install.rdf') self.rdf = rdflib.Graph().parse(open(install_rdf_path)) self.package_type = None self.find_root() # Will set self.package_type
def testEmpty(self): g = rdflib.Graph() s = g.serialize(format='trig') self.assertTrue(s is not None)
def gen_scale_free_graph( destination_folder: Path, vertices_number: int, vertices_degree: int, labels: Tuple[str, ...] = ('A', 'B', 'C', 'D')) -> Path: """ Generates scale free graph :param destination_folder: directory to save the graph :type destination_folder: Path :param vertices_number: number of vertices in the graph :type vertices_number: int :param vertices_degree: degree of a vertex in the graph :type vertices_degree: int :param labels: edge labels in the graph :type labels: Tuple[str, ...] :return: path to generated graph :rtype: Path """ g = { i: [(j, np.random.choice(labels)) for j in range(vertices_degree)] for i in range(vertices_degree) } degree = [3] * vertices_degree for i in range(vertices_degree, vertices_number): to_vertices = np.random.choice(range(i), size=vertices_degree, replace=False, p=np.array(degree) / sum(degree)) g[i] = [] degree.append(0) for to in to_vertices: label = np.random.choice(labels) g[i].append((to, label)) degree[to] += 1 degree[i] += 1 output_graph = rdflib.Graph() edges = list() for v in g: for to in g[v]: edges.append((v, to[1], to[0])) for subj, pred, obj in tqdm( edges, desc= f'scale_free_graph_{vertices_number}_{vertices_degree} generation' ): add_rdf_edge(subj, pred, obj, output_graph) target = destination_folder / f'scale_free_graph_{vertices_number}_{vertices_degree}.xml' write_to_rdf(target, output_graph) return target
def __init__(self, zip_file, certinfo=None): self.zip_file = zip_file self.certinfo = certinfo self.rdf = rdflib.Graph().parse(data=zip_file.read('install.rdf')) self.package_type = None self.find_root() # Will set self.package_type
import sys, string, urllib import datetime, random, rdflib as r aa = r.Namespace("http://purl.org/socialparticipation/aa/") xsd = r.namespace.XSD rdf = r.namespace.RDF if len(sys.argv) == 1: print("usage: aa this is a aa shout, for registering ongoing work") else: shout = " ".join(sys.argv[1:]) if aamongo: urllib.request.urlretrieve( "http://aaserver.herokuapp.com/shout?nick=%s&shout=%s" % (NICK, urllib.parse.quote(shout))) print("shout mongo logged") if ORe: g = r.Graph() # ID is datetime with milisseconds and 5 digit random number tid = str(datetime.datetime.now().timestamp()) tid += ''.join(["%s" % random.randint(0, 9) for num in range(0, 5)]) uri = aa.Shout + "#" + tid g.add((uri, rdf.type, aa.Shout)) g.add((uri, aa.provenance, r.Literal("ORe", datatype=xsd.string))) uri_ = aa.User + "#" + NICK g.add((uri_, rdf.type, aa.User)) g.add((uri_, aa.nick, r.Literal(NICK, datatype=xsd.string))) g.add((uri, aa.shoutMessage, r.Literal(shout, datatype=xsd.string))) g.add((uri, aa.created, r.Literal(datetime.datetime.now(), datatype=xsd.dateTime))) g.add((uri, aa.mongoDuplicate, r.Literal(aamongo, datatype=xsd.boolean)))
def test_broken_add(): g = rdflib.Graph() nose.tools.assert_raises(AssertionError, lambda: g.add((1, 2, 3))) nose.tools.assert_raises(AssertionError, lambda: g.addN([(1, 2, 3, g)]))
import rdflib g = rdflib.Graph().parse('../data-unreified.ttl', format='ttl') all_rdf = [] i = 1 for s, p, o in g: rdf = ''' :s{} a rdf:Statement ; rdf:subject {} ; rdf:predicate {} ; rdf:object {} ; dct:created "2019-07-10"^^xsd:date ; loci:hadGenerationMethod :method ; .'''.format( str(i).zfill(3), s.replace('http://linked.data.gov.au/def/lgpc/', 'lgpc:'), p.replace('http://www.w3.org/2004/02/skos/core#', 'skos:'), o.replace('http://linked.data.gov.au/def/gpc/', 'gpc:')) all_rdf.append(rdf) i += 1 with open('../data.ttl', 'w') as f: f.write('\n'.join(all_rdf))
""" 利用serialize将数据以一定规范存储到硬盘中 """ import rdflib graph = rdflib.Graph() # work with the graph: s = rdflib.URIRef('牛膝') p = rdflib.URIRef('功效属性') o = rdflib.URIRef("活血") graph.add((s, p, o)) # 以n3的格式存储 graph.serialize('zhongyaoyao.rdf', format='n3') s = rdflib.URIRef("http://www.example.org/牛膝") p = rdflib.URIRef("http://www.example.org/功效属性") o = rdflib.URIRef("http://www.example.org/活血") g1 = rdflib.Graph() g1.add((s, p, o)) g1.serialize('zhongyaoyao1.rdf') # 默认以'xml'格式存储 g2 = rdflib.Graph() g2.parse('zhongyaoyao1.rdf', format='xml') # 解析rdf文件时,需要指定格式 subject = g2.subjects(p, o) for i in subject: print("i:", i)
assert type(x).__name__ == "str" except AssertionError: print("not a string?", type(x), x) return x def write_jsonld(filename, graph, vocab="vocab.json"): """ serialize the given graph a JSON-LD output """ with open(vocab, "r") as f: context = json.load(f) with open(filename, "wb") as f: f.write(graph.serialize(format="json-ld", context=context, indent=2)) if __name__ == "__main__": # load the graph filename = sys.argv[1] graph = rdflib.Graph().parse(filename, format="n3") # enumerate all of the relations for subj, pred, obj in graph: print(subj, pred, obj) # serialize the graph as JSON-LD filename = "tmp.jsonld" write_jsonld(filename, graph)
def getDatesWD(period): """ query wikidata to get dates of periods. Upload to the triplestore to store the information for fast retrieval """ queryWdDates = """ SELECT ?start_date ?end_date WHERE { OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P580> ?start_date_1 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P571> ?start_date_2 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period. ?broader_period <http://www.wikidata.org/prop/direct/P571> ?start_date_3 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period. ?broader_period <http://www.wikidata.org/prop/direct/P580> ?start_date_3_1 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2596> ?culture . ?culture <http://www.wikidata.org/prop/direct/P571> ?start_date_4 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2348> ?culture . ?culture <http://www.wikidata.org/prop/direct/P580> ?start_date_5 } . BIND(COALESCE(?start_date_1, ?start_date_2, ?start_date_3, ?start_date_3_1, ?start_date_4, ?start_date_5) AS ?start_date) . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P582> ?end_date_1} . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2348> ?culture . ?culture <http://www.wikidata.org/prop/direct/P582> ?end_date_2 } . OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period. ?broader_period <http://www.wikidata.org/prop/direct/P582> ?end_date_3 } . BIND(COALESCE(?end_date_1, ?end_date_2, ?end_date_3) AS ?end_date) . } """ sparqlWD = SPARQLWrapper(conf.wikidataEndpoint) sparqlWD.setQuery(queryWdDates) sparqlWD.setReturnFormat(JSON) resultsWD = sparqlWD.query().convert() base = 'https://w3id.org/artchives/' wd = rdflib.Graph(identifier=URIRef(base + 'wd/')) WDP = Namespace("http://www.wikidata.org/prop/direct/") for resultWD in resultsWD["results"]["bindings"]: if "start_date" in resultWD: start_date = resultWD["start_date"]["value"] wd.add((URIRef(period), URIRef("http://www.wikidata.org/prop/direct/P580"), Literal(start_date, datatype=XSD.dateTime))) else: start_date = 'no date' if "end_date" in resultWD: end_date = resultWD["end_date"]["value"] wd.add((URIRef(period), URIRef("http://www.wikidata.org/prop/direct/P582"), Literal(end_date, datatype=XSD.dateTime))) else: end_date = 'no date' recordID = period.split("entity/", 1)[1] if 'entity' in period else period.split( "artchives/", 1)[1] if len(wd) == 0: wd.add( (URIRef(period), URIRef("https://w3id.org/artchives/wikidataReconciliation"), Literal("no data added"))) # Create a copy in folder /records and load on the triplestore wd.serialize(destination='records/' + recordID + '.trig', format='trig', encoding='utf-8') server.update('load <file:///' + dir_path + '/records/' + recordID + '.trig>') return [start_date, end_date]
def __init__(self): """Initialize the session.""" self._registry = Registry() self.root = None self.graph = rdflib.Graph()
import rdflib from sdotermsource import * from sdoterm import * from localmarkdown import Markdown Markdown.setWikilinkCssClass("localLink") Markdown.setWikilinkPrePath("/") if VOCABURI.startswith("https://"): triplesfile = "../data/schemaorg-all-https.nt" else: triplesfile = "../data/schemaorg-all-http.nt" termgraph = rdflib.Graph() termgraph.parse(triplesfile, format="nt") print ("loaded %s triples" % len(termgraph)) SdoTermSource.setSourceGraph(termgraph) print ("Types Count: %s" % len(SdoTermSource.getAllTypes(expanded=False))) print ("Properties Count: %s" % len(SdoTermSource.getAllProperties(expanded=False))) for termname in ["acceptedAnswer","Book"]: term = SdoTermSource.getTerm(termname) print("") print("TYPE: %s" % term.termType) print("URI: %s" % term.uri)
def __init__(self, model_uri, sparql_wrapper=None, threshold=0.3, include_body: bool = False, resolve: bool = True, use_caching: bool = False): self._graph = rdflib.Graph() self.thesoz = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/thesoz-komplett.xml", skos_xl_labels=True, prefix="http://lod.gesis.org/thesoz/") self._graph = self.thesoz.get_merged_graph() self.unesco = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/unesco-thesaurus.xml", skos_xl_labels=False, prefix="http://vocabularies.unesco.org/thesaurus/") self._graph = self.unesco.get_merged_graph() self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl", format="turtle") self._sparql_wrapper = sparql_wrapper # type: SPARQLWrapper self._uri_generator = ClaimsKGURIGenerator(model_uri) self._threshold = threshold self._include_body = include_body self._resolve = resolve self._use_caching = use_caching self.model_uri = model_uri self._namespace_manager = NamespaceManager(Graph()) self._claimskg_prefix = rdflib.Namespace(model_uri) self._namespace_manager.bind('claimskg', self._claimskg_prefix, override=False) self._namespace_manager.bind('base', self._claimskg_prefix, override=True) self.counter = TypedCounter() self._rdfs_prefix = rdflib.Namespace( "http://www.w3.org/2000/01/rdf-schema#") self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False) self._schema_prefix = rdflib.Namespace("http://schema.org/") self._namespace_manager.bind('schema', self._schema_prefix, override=False) self._namespace_manager.bind('owl', OWL, override=True) self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/") self._namespace_manager.bind("dbo", self._dbo_prefix, override=False) self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/") self._namespace_manager.bind("dbr", self._dbr_prefix, override=False) self._dbc_prefix = rdflib.Namespace( "http://dbpedia.org/resource/Category_") self._namespace_manager.bind("dbc", self._dbr_prefix, override=False) self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#") self._namespace_manager.bind("dcat", self._dcat_prefix, override=False) self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/") self._namespace_manager.bind("dct", self._dct_prefix, override=False) self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/") self._namespace_manager.bind("foaf", self._foaf_prefix, override=False) self._vcard_prefix = rdflib.Namespace( "http://www.w3.org/2006/vcard/ns#") self._namespace_manager.bind("vcard", self._vcard_prefix, override=False) self._adms_prefix = Namespace("http://www.w3.org/ns/adms#") self._namespace_manager.bind("adms", self._adms_prefix, override=False) self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#") self._namespace_manager.bind("skos", self._skos_prefix, override=False) self._owl_same_as = URIRef(OWL['sameAs']) self._schema_claim_review_class_uri = URIRef( self._schema_prefix['ClaimReview']) self._schema_creative_work_class_uri = URIRef( self._schema_prefix['CreativeWork']) self._schema_organization_class_uri = URIRef( self._schema_prefix['Organization']) self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing']) self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating']) self._schema_language_class_uri = URIRef( self._schema_prefix['Language']) self._schema_claim_reviewed_property_uri = URIRef( self._schema_prefix['claimReviewed']) self._schema_url_property_uri = URIRef(self._schema_prefix['url']) self._schema_name_property_uri = URIRef(self._schema_prefix['name']) self._schema_date_published_property_uri = URIRef( self._schema_prefix['datePublished']) self._schema_in_language_preperty_uri = URIRef( self._schema_prefix['inLanguage']) self._schema_author_property_uri = URIRef( self._schema_prefix['author']) self._schema_same_as_property_uri = URIRef( self._schema_prefix['sameAs']) self._schema_citation_preperty_uri = URIRef( self._schema_prefix['citation']) self._schema_item_reviewed_property_uri = URIRef( self._schema_prefix['itemReviewed']) self._schema_alternate_name_property_uri = URIRef( self._schema_prefix['alternateName']) self._schema_description_property_uri = URIRef( self._schema_prefix['description']) self._schema_rating_value_property_uri = URIRef( self._schema_prefix['ratingValue']) self._schema_mentions_property_uri = URIRef( self._schema_prefix['mentions']) self._schema_keywords_property_uri = URIRef( self._schema_prefix['keywords']) self._schema_headline_property_uri = URIRef( self._schema_prefix['headline']) self._schema_review_body_property_uri = URIRef( self._schema_prefix['reviewBody']) self._schema_text_property_uri = URIRef(self._schema_prefix['text']) self._iso1_language_tag = "en" self._iso3_language_tag = "eng" self._english_uri = URIRef(self._claimskg_prefix["language/English"]) self._graph.add( (self._english_uri, RDF.type, self._schema_language_class_uri)) self._graph.add( (self._english_uri, self._schema_alternate_name_property_uri, Literal(self._iso1_language_tag))) self._graph.add((self._english_uri, self._schema_name_property_uri, Literal("English"))) self._nif_prefix = rdflib.Namespace( "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") self._namespace_manager.bind('nif', self._nif_prefix, override=False) self._nif_RFC5147String_class_uri = URIRef( self._nif_prefix['RFC5147String']) self._nif_context_class_uri = URIRef(self._nif_prefix['Context']) self._nif_source_url_property_uri = URIRef( self._nif_prefix['sourceUrl']) self._nif_begin_index_property_uri = URIRef( self._nif_prefix["beginIndex"]) self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"]) self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"]) self._its_prefix = rdflib.Namespace( "https://www.w3.org/2005/11/its/rdf#") self._namespace_manager.bind('itsrdf', self._its_prefix, override=False) self.its_ta_confidence_property_uri = URIRef( self._its_prefix['taConfidence']) self.its_ta_ident_ref_property_uri = URIRef( self._its_prefix['taIdentRef']) self._logical_view_claims = [] # type: List[ClaimLogicalView] self._creative_works_index = [] self.keyword_uri_set = set() self.global_statistics = ClaimsKGStatistics() self.per_source_statistics = {}
def validate(goldenset, results): with codecs.open(goldenset, 'rb', encoding='utf-8') as goldensetfile, codecs.open( results, 'rb', encoding='utf-8') as resultsfile: a = rdflib.Graph() a.parse(goldensetfile, format='n3') r = rdflib.Graph() r.parse(resultsfile, format='n3') tweets = {} offsets = {} multiword = {} # Tweet extraction for s, p, o in a: if s.endswith(',') and p.endswith('isString'): id = s.split('#')[0] tweets[id] = o # Multiword entities are extracted for s, p, o in a: if p.endswith('anchorOf'): id = s.split('#')[0] offsets[s] = o for offset in offsets.keys(): startoffset1 = int(s.split('#char=')[1].split(',')[0]) endoffset1 = int(s.split('#char=')[1].split(',')[1]) startoffset2 = int(offset.split('#char=')[1].split(',')[0]) endoffset2 = int(offset.split('#char=')[1].split(',')[1]) if id == offset.split( '#')[0] and startoffset1 != startoffset2 and abs( endoffset1 - startoffset2) < 5: if tweets[id][min(endoffset1, startoffset2):max( endoffset1, startoffset2)] == ' of ': if not multiword.has_key(id): multiword[id] = [] #print tweets[id][min(startoffset1,startoffset2):max(endoffset1,endoffset2)] multiword[id].append( tweets[id][min(startoffset1, startoffset2 ):max(endoffset1, endoffset2)]) elif tweets[id][min(endoffset1, startoffset2):max( endoffset1, startoffset2)] == '/': if not multiword.has_key(id): multiword[id] = [] #print tweets[id][min(startoffset1,startoffset2):max(endoffset1,endoffset2)] multiword[id].append( tweets[id][min(startoffset1, startoffset2 ):max(endoffset1, endoffset2)]) """for m in multiword: print m, multiword[m]""" # Calculates the precision of the system def precision(): fullmentions = 0 totalmentions = 0 partialmentions = 0 annotatedmentions = {} # The golden set annotated mentions are extracted for s, p, o in a: if p.endswith('anchorOf'): id = s.split('#')[0] #print 'Golden set ',id if not annotatedmentions.has_key(id): annotatedmentions[id] = [] annotatedmentions[id].append(o) # Compares the mentions obtained by the system with the ones annotated in the golden set for s, p, o in r: #print s,p,o if p.endswith('anchorOf'): #print o #print s id = s.split('#')[0] #print id if id in annotatedmentions.keys(): # Checks the mentions that match fully if o in annotatedmentions[id]: fullmentions += 1 else: scored = False for m in annotatedmentions[id]: # Check the mentions that match partially if o in m: partialmentions += 1 scored = True break # Check the mentions formed by more than one entities of the golden set if scored == False and multiword.has_key(id): #print multiword[id] for multientity in multiword[id]: #print multientity, o if multientity in o: partialmentions += 1 #print id,",",multientity,",", o totalmentions += 1 score = float(fullmentions) / float(totalmentions) partialscore = float(fullmentions + partialmentions) / float(totalmentions) #print "Full Mentions Precision: ",fullmentions, totalmentions, score #print "Full+Partial Mentions Precision: ",fullmentions+partialmentions, totalmentions, partialscore return score, partialscore def recall(): totalmentions = 0 fullmentions = 0 partialmentions = 0 resultmentions = {} # Extracts the mentions obtained by the system for s, p, o in r: if p.endswith('anchorOf'): id = s.split('#')[0] if not resultmentions.has_key(id): resultmentions[id] = [] resultmentions[id].append(o) # Compares the mentions obtained by the system with the ones annotated in the golden set for s, p, o in a: if p.endswith('anchorOf'): id = s.split('#')[0] # Checks the mentions that match fully if id in resultmentions.keys() and o in resultmentions[id]: fullmentions += 1 elif id in resultmentions.keys(): scored = False for m in resultmentions[id]: # Check the mentions that match partially if m in o: partialmentions += 1 scored = True break # Check the mentions formed by more than one entities of the golden set if scored == False and multiword.has_key(id): #print multiword[id] for multientity in multiword[id]: #print multientity, o if multientity in o: partialmentions += 1 #print id,",",multientity,",", o totalmentions += 1 score = float(fullmentions) / float(totalmentions) partialscore = float(fullmentions + partialmentions) / float(totalmentions) #print "Full Mentions Recall: ",fullmentions, totalmentions, score #print "Full+Partial Mentions Recall: ",fullmentions+partialmentions, totalmentions, partialscore return score, partialscore def f1(): fullprec, partialprec = precision() fullrec, partialrec = recall() results = "Full Mentions Precision: " + str( fullprec) + "\nFull+Partial Mentions Precision: " + str( partialprec) results += "\nFull Mentions Recall: " + str( fullrec) + "\nFull+Partial Mentions Recall: " + str(partialrec) finalresults = "\nFull Mentions F1: " + str( 2 * fullprec * fullrec / (fullprec + fullrec)) + "\nFull+Partial Mentions F1: " + str( 2 * partialprec * partialrec / (partialprec + partialrec)) return results + finalresults return f1()
def load_rdf_from_content(self, rdf_content, _format='n3'): graph_in_memory = rdflib.Graph("IOMemory") return graph_in_memory.parse(data=rdf_content, format=_format)
def convert(self): self.ont2wb = rdflib.Graph() if os.path.exists(self.link_graph_file): self.ont2wb.load(self.link_graph_file, format='turtle') else: self.create_subst_property( RDFS.subClassOf, 'P279', 'subClassOf', 'item') # https://www.wikidata.org/wiki/Property:P279 self.create_subst_property( RDFS.subPropertyOf, 'P1647', 'subPropertyOf', 'property') # https://www.wikidata.org/wiki/Property:P1647 #self.create_subst_property(SCHEMA.domain, SCHEMA.identifier, '')) # #self.create_subst_property(SCHEMA.range, SCHEMA.identifier, '')) # -> datatype self.create_subst_property( SCHEMA.inLanguage, 'P305', 'inLanguage', None) # https://www.wikidata.org/wiki/Property:P305 self.create_subst_property( SCHEMA.version, 'P348', 'version', None) # https://www.wikidata.org/wiki/Property:P348 self.create_subst_property( SCHEMA.isBasedOn, 'P144', 'isBasedOn', 'property') # https://www.wikidata.org/wiki/Property:P144 self.create_subst_property( SCHEMA.copyrightHolder, 'P3931', 'copyrightHolder', 'item') # https://www.wikidata.org/wiki/Property:P3931 self.create_subst_property( SCHEMA.licenseDeclared, 'P2479', 'licenseDeclared', 'item') # https://www.wikidata.org/wiki/Property:P2479 self.create_subst_property( SCHEMA.creativeWorkStatus, 'P548', 'creativeWorkStatus', None ) # https://www.wikidata.org/wiki/Property:P548 - aka version type self.create_subst_property( SCHEMA.image, 'P4765', 'image', None ) # https://www.wikidata.org/wiki/Property:P4765 - aka Commons compatible image available at URL self.create_subst_property( SCHEMA.hasPart, 'P527', 'hasPart', 'item' ) # https://www.wikidata.org/wiki/Property:P527 - has part #self.create_subst_property(SCHEMA.hasPart, 'P2670', '', True) # https://www.wikidata.org/wiki/Property:P2670 - has parts of the class self.create_subst_property( SCHEMA.codeRepository, 'P1324', 'sourceCodeRepository', None ) # https://www.wikidata.org/wiki/Property:P1324 - source code repository self.create_subst_property( SCHEMA.value, 'P8203', 'supportedMetaData', None ) # https://www.wikidata.org/wiki/Property:P8203 - aka supported Metadata self.create_subst_property( OBO.BFO_0000016, 'P7535', 'scopeAndContent', None ) # function -> https://www.wikidata.org/wiki/Property:P7535 - aka scope and content self.create_subst_property( SCHEMA.amount, 'P1114', 'quantity', None ) # https://www.wikidata.org/wiki/Property:P1114 - aka quantity self.create_subst_item( SCHEMA.URL, 'QXXXXXXX', 'URL', None ) # https://www.wikidata.org/wiki/Property:P2699 - aka URL self.create_subst_property(SPDX.licenseDeclared, 'PXXXXXXXX', 'licenseDeclared', None) self.create_subst_property(SCHEMA.fileFormat, 'PXXXXXX', 'fileFormat', None) # create the items and properties for subj in self.graph.subjects(): if self.skip_subj(subj): continue wb_ids = list(self.ont2wb.objects(subj, SCHEMA.identifier)) wb_id = wb_ids[0] if len(wb_ids) > 0 else None if wb_id is None: print('- Creating WB part for subject "%s" ...' % subj) wb_id = self.create_ont_wb_thing(subj) self.ont2wb.add( (subj, SCHEMA.identifier, rdflib.Literal(wb_id))) #else: # XXX We might want to recreate it here, anyway! print('- Subject "%s" is represented by "%s"' % (subj, wb_id)) self.ont2wb.serialize(self.link_graph_file, format='turtle') # Create the connections/predicates/claims for subj in self.graph.subjects(): if self.skip_subj(subj): continue wb_ids = list(self.ont2wb.objects(subj, SCHEMA.identifier)) wb_id = wb_ids[0] if isinstance(wb_id, rdflib.Literal): wb_id = str(wb_id) for _, pred, obj in self.graph.triples((subj, None, None)): if pred == RDFS.range: print('XXX range') elif pred == RDFS.domain: print('XXX domain') else: self.create_claim(wb_id, subj, pred, obj)
def p(): rdflib.Graph().parse(data=data)