def test_parsetype_resource():
    g = rdflib.Graph().parse(data=data2)
    print(g.serialize(format='n3'))
def test_group_of_tables(mock_urlopen):
    mock_urlopen.side_effect = dispatch_files_as_url
    csv_urls = [
        "http://example.org/gov.uk/data/organizations.csv",
        "http://example.org/gov.uk/data/professions.csv",
        "http://example.org/senior-roles.csv",
        "http://example.org/junior-roles.csv"
    ]
    csvw = CSVW(csv_url=csv_urls,
                metadata_url="http://example.org/csv-metadata.json")
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", RiotWarning)
        rdf_output = csvw.to_rdf()
    g = rdflib.Graph().parse(data=rdf_output, format="turtle")

    org = Namespace("http://www.w3.org/ns/org#")
    post_in = URIRef("http://example.org/organization/hefce.ac.uk")
    grade = URIRef("http://example.org/gov.uk/def/grade")
    job = URIRef("http://example.org/gov.uk/def/job")
    prof = URIRef("http://example.org/gov.uk/def/profession")
    post = Namespace("http://example.org/organization/hefce.ac.uk/post/")
    person = Namespace("http://example.org/organization/hefce.ac.uk/person/")
    min_pay = URIRef("http://example.org/gov.uk/def/min_pay")
    max_pay = URIRef("http://example.org/gov.uk/def/max_pay")
    num_posts = URIRef("http://example.org/gov.uk/def/number_of_posts")

    post_90115 = post["90115"]
    post_90334 = post["90334"]
    p1 = person["1"]
    p2 = person["2"]

    post_90115_triples = list(g.triples((post_90115, None, None)))
    assert len(post_90115_triples) == 7
    assert (post_90115, DCTERMS.identifier,
            Literal("90115")) in post_90115_triples
    assert (post_90115, org.heldBy, p1) in post_90115_triples
    assert (post_90115, grade, Literal("SCS1A")) in post_90115_triples
    assert (post_90115, job,
            Literal("Deputy Chief Executive")) in post_90115_triples
    assert (post_90115, org.reportsTo, post_90334) in post_90115_triples
    assert (post_90115, prof, Literal("Finance")) in post_90115_triples
    assert (post_90115, org.postIn, post_in) in post_90115_triples

    p1_triples = list(g.triples((p1, None, None)))
    assert len(p1_triples) == 1
    assert (p1, FOAF.name, Literal("Steve Egan")) in p1_triples

    post_90334_triples = list(g.triples((post_90334, None, None)))
    assert len(post_90334_triples) == 6
    assert (post_90334, DCTERMS.identifier,
            Literal("90334")) in post_90334_triples
    assert (post_90334, org.heldBy, p2) in post_90334_triples
    assert (post_90334, grade, Literal("SCS4")) in post_90334_triples
    assert (post_90334, job, Literal("Chief Executive")) in post_90334_triples
    assert (post_90334, prof, Literal("Policy")) in post_90334_triples
    assert (post_90334, org.postIn, post_in) in post_90334_triples

    p2_triples = list(g.triples((p2, None, None)))
    assert len(p2_triples) == 1
    assert (p2, FOAF.name, Literal("Sir Alan Langlands")) in p2_triples

    bnode1 = list(g.triples((None, grade, Literal("4"))))[0][0]
    b1_triples = list(g.triples((bnode1, None, None)))
    assert len(b1_triples) == 8
    assert (bnode1, org.reportsTo, post_90115) in b1_triples
    assert (bnode1, min_pay, Literal(17426,
                                     datatype=XSD.integer)) in b1_triples
    assert (bnode1, max_pay, Literal(20002,
                                     datatype=XSD.integer)) in b1_triples
    assert (bnode1, job, Literal("Administrator")) in b1_triples
    assert (bnode1, num_posts, Literal(8.67,
                                       datatype=XSD.double)) in b1_triples
    assert (bnode1, prof, Literal("Operational Delivery")) in b1_triples
    assert (bnode1, org.postIn, post_in) in b1_triples

    bnode2 = list(g.triples((None, grade, Literal("5"))))[0][0]
    b2_triples = list(g.triples((bnode2, None, None)))
    assert len(b2_triples) == 8
    assert (bnode2, org.reportsTo, post_90115) in b2_triples
    assert (bnode2, min_pay, Literal(19546,
                                     datatype=XSD.integer)) in b2_triples
    assert (bnode2, max_pay, Literal(22478,
                                     datatype=XSD.integer)) in b2_triples
    assert (bnode2, job, Literal("Administrator")) in b2_triples
    assert (bnode2, num_posts, Literal(0.5, datatype=XSD.double)) in b2_triples
    assert (bnode2, prof, Literal("Operational Delivery")) in b2_triples
    assert (bnode2, org.postIn, post_in) in b2_triples

    assert len(list(g.triples((None, None, None)))) == 7 + 1 + 6 + 1 + 8 + 8
Beispiel #3
0
def main(inputfile, model, configfile, outputfile, compression):

    compression = int(compression)

    print('Loading configuration file...')

    # Read configuration file
    with open(configfile, encoding='utf-8-sig') as json_file:
        config_data = json.load(json_file)
    def_base_uri = config_data['baseuri']
    getValue = config_data['getValue']
    numberOfRowsToConsider = int(config_data['numberOfRowsToConsider'])

    # Load Recipient Categorisation dictionary
    recipientCatg = config_data['recipientCatg']
    recipientCatg_new = dict()
    for key in recipientCatg:
        newkey = key.casefold()
        recipientCatg_new[newkey] = recipientCatg[key]
    recipientCatg = recipientCatg_new

    # Load Public Organisation dictionary
    publicOrganisationCatg = config_data['publicOrganisationCatg']
    publicOrganisationCatg_new = dict()
    for key in publicOrganisationCatg:
        newkey = key.casefold()
        publicOrganisationCatg_new[newkey] = publicOrganisationCatg[key]
    publicOrganisationCatg = publicOrganisationCatg_new

    nomenclatureBase = config_data['nomenclatureBase']

    #------------------------#
    # Load model and data    #
    #------------------------#

    # Set filenames
    data_filename = inputfile
    model_filename = model

    cwd = os.getcwd()
    # Load data
    print('Loading data...')
    os.chdir(os.path.join(cwd, 'data/raw'))
    data = pandas.read_csv(data_filename,
                           sep=',',
                           encoding="ANSI",
                           quotechar='"',
                           na_filter=False,
                           low_memory=False)
    data = data.replace({'\n': ', '},
                        regex=True)  # remove newline characters in data
    session = Session.get_current()
    # Load model from Turtle file
    print('Loading model...')
    os.chdir(os.path.join(cwd, 'models'))
    ontology = Ontology.load(model_filename)
    os.chdir(cwd)

    #-------------------------------#
    # Print all classes detected    #
    #-------------------------------#

    if False:
        print("List of all terms detected:")
        for term in ontology.__terms__:
            print(term)
        print('')

    #----------------------------------#
    # Load all controlled vocabularies #
    #----------------------------------#
    print('Loading controlled vocabularies...')

    # Query a file to get a dictionary with keys 'o' and values 'uri'
    def getQueryDict(modelfile):
        graph = rdflib.Graph()
        graph.parse(modelfile)
        rowlist = graph.query(
            """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        SELECT ?uri ?o
               WHERE {
                  ?uri skos:prefLabel ?o .
                  FILTER (lang(?o) = 'en')
               }""")
        objdict = {}
        for row in rowlist:
            objdict[str(row.o.toPython())] = str(row.uri.toPython())
        return objdict

    # Create dictionary of Countries
    countriesmodelfile = config_data["countriesmodelfile"]
    countryList = getQueryDict(countriesmodelfile)
    countryNotFoundBase = config_data["countryNotFoundBase"]
    countryReplace = config_data["countryReplace"]

    # Create URI for currency (only EUR for now)
    currencyEUR = config_data["currencyEUR"]

    # Create URI for corporate Body
    corporateBodyBase = config_data["corporateBodyBase"]
    corporateBodyReplace = config_data["corporateBodyReplace"]

    # set up dictionaries for controlled vocabularies
    organisationTypeDict = {}
    corporatebodyDict = {}
    actionTypeDict = {}

    def checkControlledDictionary(controlledDict,
                                  keyLabel,
                                  valueLabel,
                                  label,
                                  base_uri=def_base_uri):
        # updates the controlled vocabulary and return the skos Concept URI to be used
        if row[getValue[keyLabel]] not in controlledDict:
            if base_uri == def_base_uri:
                lbl = label + row[getValue[valueLabel]]
                URISpec = URISpecification(base_uri, lbl)
                Concept_tmp = ontology.skosConcept(uri=URISpec)
            else:
                Concept_tmp = ontology.skosConcept(uri=None,
                                                   imposeURI=base_uri +
                                                   row[getValue[keyLabel]])
            Concept_tmp.skosprefLabel += row[getValue[valueLabel]]
            controlledDict[row[
                getValue[keyLabel]]] = Concept_tmp.getInstanceUri()
        return controlledDict[row[getValue[keyLabel]]]

    #-------------------------------#
    # Create Instances of Dataset   #
    #-------------------------------#

    flushfrequency = int(config_data['flushfrequency']
                         )  #nb of rows before flushing the data to file.
    batchlimits = range(flushfrequency, len(data.index), flushfrequency)
    output = open(outputfile, 'w', encoding='utf8')

    # Go through the data file creating all instances
    with click.progressbar(data.iterrows(),
                           label='Creating instances',
                           length=len(data.index)) as total:
        for ix, row in total:

            if ix < numberOfRowsToConsider or numberOfRowsToConsider == -1:

                #----------------#
                # Create Address #
                #----------------#
                country = row[getValue['countryDescriptionEn']]
                if country in countryReplace:
                    country = countryReplace[country]
                lbl = row[getValue['address']] + row[getValue['city']] + row[
                    getValue['postCode']] + country
                URISpec = URISpecification(def_base_uri, lbl)
                Address_tmp = ontology.locnAddress(uri=URISpec)
                Address_tmp.locnadminUnitL1 += countryList.setdefault(
                    country, countryNotFoundBase + country)
                Address_tmp.locnfullAddress += row[getValue['address']]
                Address_tmp.locnpostName += row[getValue['city']]
                Address_tmp.locnpostCode += row[getValue['postCode']]

                #-----------------#
                # Create Location #
                #-----------------#
                geographicName = str(
                    row[getValue['recipientName']]) + ', ' + str(
                        row[getValue['city']]) + ', ' + str(
                            row[getValue['countryDescriptionEn']])
                URISpec = URISpecification(def_base_uri, geographicName)
                Location_tmp = ontology.dctLocation(uri=URISpec)
                Location_tmp.locngeographicName += geographicName
                Location_tmp.locnaddress += Address_tmp

                #------------------#
                # Create Recipient #
                #------------------#
                lbl = row[getValue['recipientName']] + geographicName
                URISpec = URISpecification(def_base_uri, lbl)
                Recipient_tmp = ontology.Recipient(uri=URISpec)
                Recipient_tmp.prefLabel += row[getValue['recipientName']]
                Recipient_tmp.hasLocation += Location_tmp
                recipientType = recipientCatg[row[
                    getValue['recipientTypeDescription']].casefold()]
                recipientURI = Recipient_tmp.getInstanceUri()

                # Enforce extra indicator fields
                if row[getValue['isNaturalPerson']]:
                    recipientType = "Person"
                elif row[getValue['isNFPO']]:
                    recipientType = "NFPO"
                elif row[getValue['isNGO']]:
                    recipientType = "NGO"

                # If needed, an extra type for the Recipient is assigned
                if recipientType == "Registered Organisation":
                    RecipientAlter_tmp = ontology.rovRegisteredOrganization(
                        uri=None, imposeURI=recipientURI)
                    RecipientAlter_tmp.rovlegalName += row[
                        getValue['recipientName']]
                    lbl = row[getValue['recipientVAT']]
                    URISpec = URISpecification(def_base_uri, lbl)
                    RecipientVAT_tmp = ontology.admsIdentifier(
                        uri=URISpec, label=row[getValue['recipientVAT']])
                    RecipientAlter_tmp.rovregistration += RecipientVAT_tmp
                    RecipientAlter_tmp.rovorgType += checkControlledDictionary(
                        organisationTypeDict, 'organisationTypeCode',
                        'organisationTypeDescription',
                        'RegisteredOrganisation')
                elif recipientType == "Public Organisation":
                    RecipientAlter_tmp = ontology.cpovPublicOrganisation(
                        uri=None, imposeURI=recipientURI)
                    RecipientAlter_tmp.orgclassification += publicOrganisationCatg[
                        row[getValue['recipientTypeDescription']].casefold(
                        )]  # literal for the pilot
                elif recipientType == "Person":
                    RecipientAlter_tmp = ontology.foafPerson(
                        uri=None, imposeURI=recipientURI)
                    RecipientAlter_tmp.foaffamilyName += row[
                        getValue['recipientName']]
                elif recipientType == "Recipient":
                    pass  # Recipient object already made
                elif recipientType == "International Organisation":
                    RecipientAlter_tmp = ontology.InternationalOrganization(
                        uri=None, imposeURI=recipientURI)
                elif recipientType == "Trust Fund":
                    RecipientAlter_tmp = ontology.TrustFund(
                        uri=None, imposeURI=recipientURI)
                elif recipientType == "NFPO":
                    RecipientAlter_tmp = ontology.NonProfitOrganisation(
                        uri=None, imposeURI=recipientURI)
                    lbl = row[getValue['recipientVAT']]
                    URISpec = URISpecification(def_base_uri, lbl)
                    RecipientVAT_tmp = ontology.admsIdentifier(
                        uri=URISpec, label=row[getValue['recipientVAT']])
                    RecipientAlter_tmp.rovregistration += RecipientVAT_tmp
                    RecipientAlter_tmp.rovorgType += checkControlledDictionary(
                        organisationTypeDict, 'organisationTypeCode',
                        'organisationTypeDescription', 'NFPO')
                elif recipientType == "NGO":
                    RecipientAlter_tmp = ontology.NGO(uri=None,
                                                      imposeURI=recipientURI)
                    lbl = row[getValue['recipientVAT']]
                    URISpec = URISpecification(def_base_uri, lbl)
                    RecipientVAT_tmp = ontology.admsIdentifier(
                        uri=URISpec, label=row[getValue['recipientVAT']])
                    RecipientAlter_tmp.rovregistration += RecipientVAT_tmp
                    RecipientAlter_tmp.rovorgType += checkControlledDictionary(
                        organisationTypeDict, 'organisationTypeCode',
                        'organisationTypeDescription', 'NGO')
                else:
                    print('Recipient: no additional type match.')

                # -----------------------#
                # Create Action Location #
                # -----------------------#
                actionlbl = row[getValue['actionLocation']]
                if actionlbl:
                    URISpec = URISpecification(def_base_uri, actionlbl)
                    ActionLocation_tmp = ontology.dctLocation(uri=URISpec)
                    ActionLocation_tmp.locngeographicName += row[
                        getValue['actionLocation']]

                # --------------------#
                # Create Contract Key #
                # --------------------#
                lbl = str(row[getValue['contractKey']])
                URISpec = URISpecification(def_base_uri, lbl)
                ContractKey_tmp = ontology.admsIdentifier(
                    uri=URISpec, label=row[getValue['contractKey']])

                # ------------------------#
                # Create Legal Commitment #
                # ------------------------#
                lbl = str(row[getValue['commitmentKey']])
                URISpec = URISpecification(def_base_uri, lbl)
                LegalCommitment_tmp = ontology.LegalCommitment(uri=URISpec)
                LegalCommitment_tmp.dctdescription += row[getValue['subject']]
                LegalCommitment_tmp.fundingType += row[getValue['fundingType']]
                LegalCommitment_tmp.contractKey += ContractKey_tmp
                if row[getValue['isCoordinator']]:
                    LegalCommitment_tmp.hasCoordinator += Recipient_tmp
                if actionlbl:
                    LegalCommitment_tmp.hasActionLocation += ActionLocation_tmp

                # ----------------------#
                # Create Monetary Value # --> link to EU Budget
                # ----------------------#
                lbl = str(row[getValue['totalValue']])
                URISpec = URISpecification(def_base_uri, lbl)
                MonetaryValue_tmp = ontology.MonetaryValue(uri=URISpec)
                MonetaryValue_tmp.value += row[getValue['totalValue']]
                MonetaryValue_tmp.currency += currencyEUR

                # ------------------------------#
                # Create Indicative Transaction #
                # ------------------------------#
                lbl = row[getValue['DG']] + row[
                    getValue['recipientName']] + str(
                        row[getValue['totalValue']]) + geographicName
                URISpec = URISpecification(def_base_uri, lbl)
                IndicativeTransaction_tmp = ontology.IndicativeTransaction(
                    uri=URISpec)
                IndicativeTransaction_tmp.committedTo += Recipient_tmp
                # construct corporate body uri
                DG = row[getValue['DG']]
                if DG in corporateBodyReplace:
                    DG = corporateBodyReplace[DG]
                IndicativeTransaction_tmp.committedBy += checkControlledDictionary(
                    corporatebodyDict,
                    'DG',
                    'DGDescriptionEn',
                    'CorporateBody',
                    base_uri=corporateBodyBase)
                IndicativeTransaction_tmp.hasEstimatedValue += MonetaryValue_tmp

                # --------------------#
                # Create Position Key #
                # --------------------#
                lbl = str(row[getValue['positionKey']])
                URISpec = URISpecification(def_base_uri, lbl)
                PositionKey_tmp = ontology.admsIdentifier(
                    uri=URISpec, label=row[getValue['positionKey']])

                # ----------------------#
                # Create Commitment Key #
                # ----------------------#
                lbl = str(row[getValue['commitmentKey']])
                URISpec = URISpecification(def_base_uri, lbl)
                CommitmentKey_tmp = ontology.admsIdentifier(
                    uri=URISpec, label=row[getValue['commitmentKey']])

                # --------------------#
                # Create Nomenclature # --> link to EU Budget
                # --------------------#
                nomenclatureURI = nomenclatureBase + str(
                    row[getValue['year']]) + '_SEC3' + row[
                        getValue['budgetLine']].replace('.', '_')

                # ----------------------------#
                # Create Budgetary Commitment #
                # ----------------------------#
                lbl = str(row[getValue['positionKey']]) + row[
                    getValue['financialManagementArea']] + str(
                        row[getValue['expenseType']]) + str(
                            row[getValue['commitmentKey']]) + str(
                                row[getValue['totalValue']])
                URISpec = URISpecification(def_base_uri, lbl)
                BudgetaryCommitment_tmp = ontology.BudgetaryCommitment(
                    uri=URISpec)
                BudgetaryCommitment_tmp.positionKey += PositionKey_tmp
                BudgetaryCommitment_tmp.commitmentKey += CommitmentKey_tmp
                BudgetaryCommitment_tmp.dctdate += row[getValue['year']]
                actionTypeVal = checkControlledDictionary(
                    actionTypeDict, 'actionType', 'actionTypeDescriptionEn',
                    'actionType')
                BudgetaryCommitment_tmp.actionType += actionTypeVal
                financialManagementAreaBase = config_data[
                    'financialManagementAreaBase']
                BudgetaryCommitment_tmp.financialManagementArea += financialManagementAreaBase + row[
                    getValue['financialManagementArea']]
                expenseTypeBase = config_data['expenseTypeBase']
                expenseTypeMap = config_data['expenseTypeMap']
                BudgetaryCommitment_tmp.expenseType += expenseTypeBase + expenseTypeMap[
                    str(row[getValue['expenseType']])]
                BudgetaryCommitment_tmp.hasBudgetLine += nomenclatureURI
                BudgetaryCommitment_tmp.hasTotalValue += MonetaryValue_tmp
                BudgetaryCommitment_tmp.hasLegalCommitment += LegalCommitment_tmp
                BudgetaryCommitment_tmp.hasIndicativeTransaction += IndicativeTransaction_tmp

                # ----------------------#
                # Create Corporate Body # --> link to EU Budget
                # ----------------------#

                # we will link to URI directly in indicative transaction

                #-----------------------------------------------#
                # Print triples so far to file                  #
                #-----------------------------------------------#

                if ix in batchlimits:
                    flushToFile(session, output, compression)

    flushToFile(session, output, compression)
    output.close()

    # ----------------------------#
    # Transform triples to Turtle #
    # ----------------------------#

    if compression > 1:
        print('Transforming the generated triples to Turtle...')
        g = rdflib.Graph()
        g.parse(outputfile, format="nt")
        ttloutput = outputfile.rsplit('.')
        g.serialize(destination=ttloutput[0] + '.ttl', format='turtle')
Beispiel #4
0
import sys
import json
import rdflib

wfns = 'http://purl.org/net/wf-invocation#'
exns = 'http://www.wings-workflows.org/ontology/execution.owl#'
runid = sys.argv[1]

# Load the run id
g = rdflib.Graph()
g.load(runid)

# Get the plan uri
run = rdflib.URIRef(runid)
hasPlan = rdflib.URIRef(exns + 'hasPlan')
plan = g.value(run, hasPlan)

# Load the plan
if plan:
    g.load(plan)
    # Query for Variable bindings
    vbindings = {}
    query = 'select ?v ?d where { ?v <' + wfns + 'hasDataBinding> ?d }'
    for row in g.query(query):
        varid = str(row.v)
        varname = varid[varid.index("#") + 1:]
        vbindings[varname] = str(row.d)

    rundetails = {"runid": runid, "files": vbindings}

    print(json.dumps(rundetails))
Beispiel #5
0
import json
import rdflib
import requests
from rdflib import URIRef, Literal, BNode
from rdflib.namespace import RDF, SKOS, OWL, Namespace, NamespaceManager, XSD

BDR = Namespace("http://purl.bdrc.io/resource/")
BDO = Namespace("http://purl.bdrc.io/ontology/core/")
BDG = Namespace("http://purl.bdrc.io/graph/")
BDA = Namespace("http://purl.bdrc.io/admindata/")
ADM = Namespace("http://purl.bdrc.io/ontology/admin/")
MBBT = Namespace("http://mbingenheimer.net/tools/bibls/")
CBCT_URI = "https://dazangthings.nz/cbc/text/"
CBCT = Namespace(CBCT_URI)

NSM = NamespaceManager(rdflib.Graph())
NSM.bind("bdr", BDR)
NSM.bind("", BDO)
NSM.bind("bdg", BDG)
NSM.bind("bda", BDA)
NSM.bind("adm", ADM)
NSM.bind("skos", SKOS)
NSM.bind("rdf", RDF)
NSM.bind("cbct", CBCT)
NSM.bind("mbbt", MBBT)


def get_id_for_str(id):
    pass

Beispiel #6
0
def load(kb, goal, identification, base):

    kb_stream, goal_stream = kb, goal
    implies = rdflib.URIRef("http://www.w3.org/2000/10/swap/log#implies")
    store = OrderedStore()
    kb_graph = rdflib.Graph(store=store, identifier=base)
    kb_conjunctive = rdflib.ConjunctiveGraph(store=store, identifier=base)
    kb_graph.parse(kb_stream, format='n3', publicID=base)
    if not nolog:
        log('---kb:')
        try:
            for l in kb_graph.serialize(format='n3').splitlines():
                log(l.decode('utf8'))
        except Exception as e:
            log(str(e))
        log('---kb quads:')
        for l in kb_conjunctive.serialize(format='nquads').splitlines():
            log(l.decode('utf8'))
        log('---')

    def fixup3(o):
        if isinstance(o, rdflib.Graph):
            return URIRef(o.identifier)
        return o

    def fixup2(o):
        if type(o) == rdflib.BNode:
            return rdflib.Variable(str(o.lower()))
        return o

    def fixup(spo):
        s, p, o = spo
        return (fixup2(s), fixup2(p), fixup2(o))

    rules = []
    head_triples_triples_id = 0
    kb_graph_triples = [fixup(x) for x in kb_graph.triples((None, None, None))]
    facts = Graph(
        Triple(un_move_me_ize_pred(fixup3(x[1])),
               [fixup3(x[0]), fixup3(x[2])]) for x in kb_graph_triples)
    facts.id = head_triples_triples_id
    head_triples_triples_id += 1
    for kb_graph_triple_idx, (s, p, o) in enumerate(kb_graph_triples):
        rules.append(Rule(facts, kb_graph_triple_idx, Graph()))
        if p == implies:
            body = Graph()
            head_triples = [
                fixup(x) for x in kb_conjunctive.triples((None, None, None, o))
            ]
            head_triples_triples = Graph()
            for triple in [
                    Triple(fixup3(x[1]),
                           [fixup3(x[0]), fixup3(x[2])]) for x in head_triples
            ]:
                move = False
                if triple.pred == URIRef(
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#move_me_to_body_first'
                ):
                    triple.pred = URIRef(
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#first')
                    move = True
                if triple.pred == URIRef(
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#move_me_to_body_rest'
                ):
                    triple.pred = URIRef(
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
                    move = True
                if move:
                    body.append(triple)
                else:
                    head_triples_triples.append(triple)
            head_triples_triples.id = head_triples_triples_id
            head_triples_triples_id += 1
            for body_triple in [
                    fixup(x)
                    for x in kb_conjunctive.triples((None, None, None, s))
            ]:
                body.append(
                    Triple((un_move_me_ize_pred(fixup3(body_triple[1]))),
                           [fixup3(body_triple[0]),
                            fixup3(body_triple[2])]))
            #body.reverse()
            to_expand = []
            for triple in head_triples_triples + body:
                for thing in triple.args:
                    if type(thing) == rdflib.Variable:
                        if str(thing).endswith('_'):
                            to_expand.append(thing)
            for thing in to_expand:
                body.insert(
                    0,
                    Triple(
                        rdflib.RDF.first,
                        [thing, rdflib.Variable(str(thing)[:-1] + 'f')]))
                body.insert(
                    0,
                    Triple(
                        rdflib.RDF.rest,
                        [thing, rdflib.Variable(str(thing)[:-1] + 'r')]))
            if len(head_triples_triples) > 1:
                with open(_rules_file_name, 'a') as ru:
                    ru.write(
                        head_triples_triples.str(shorten) + " <= " +
                        body.str(shorten) + ":\n")
            for head_triple_idx in range(len(head_triples_triples)):
                rules.append(Rule(head_triples_triples, head_triple_idx, body))

    goal_rdflib_graph = rdflib.ConjunctiveGraph(store=OrderedStore(),
                                                identifier=base)
    goal_rdflib_graph.parse(goal_stream, format='n3', publicID=base)

    if not nolog:
        log('---goal:')
        try:
            for l in goal_rdflib_graph.serialize(format='n3').splitlines():
                log(l.decode('utf8'))
        except Exception as e:
            log(str(e))
        log('---goal nq:')
        for l in goal_rdflib_graph.serialize(format='nquads').splitlines():
            log(l.decode('utf8'))
        log('---')

    goal = Graph()
    for s, p, o in [
            fixup(x)
            for x in goal_rdflib_graph.triples((None, None, None, None))
    ]:
        goal.append(
            Triple(un_move_me_ize_pred(fixup3(p)),
                   [fixup3(s), fixup3(o)]))
    #goal.reverse()
    query_rule = Rule([], None, goal)
    return rules, query_rule, goal
Beispiel #7
0
def update_RDF(map_base,
               map_id,
               map_source,
               annotations,
               update_knowledgebase=False):

    map_dir = os.path.join(map_base, map_id)

    # RDF generation

    if update_knowledgebase:
        kb_path = os.path.join(map_base, 'KnowledgeBase.sqlite')
        print('Knowledge base: ', kb_path, (not os.path.exists(kb_path)))
        graph = KnowledgeBase(kb_path, create=(not os.path.exists(kb_path)))
    else:
        graph = rdflib.Graph()

#    graph.namespace_manager = NS.SCICRUNCH_NS
#    namespaces_dict = NS.namespaces_dict()
## Only really need rdf: obo: fma: FMA: RO: UBERON: ILX: flatmap:
## See https://github.com/RDFLib/rdflib/issues/794
#
    namespaces_dict = {
        'FMA':
        rdflib.namespace.Namespace('http://purl.org/sig/ont/fma/fma'),
        'ILX':
        rdflib.namespace.Namespace('http://uri.interlex.org/base/ilx_'),
        'NCBITaxon':
        rdflib.namespace.Namespace(
            'http://purl.obolibrary.org/obo/NCBITaxon_'),
        'RO':
        rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/RO_'),
        'UBERON':
        rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/UBERON_'),
        'fma':
        rdflib.namespace.Namespace('http://purl.org/sig/ont/fma/'),
        'ilx':
        rdflib.namespace.Namespace('http://uri.interlex.org/'),
        'obo':
        rdflib.namespace.Namespace('http://purl.obolibrary.org/obo/'),
    }
    for pfx, ns in namespaces_dict.items():
        graph.bind(pfx, ns, override=True)

    FLATMAP_NS = rdflib.namespace.Namespace(
        'http://celldl.org/ontologies/flatmap/')
    graph.bind('flatmap', FLATMAP_NS, override=True)

    map_uri = rdflib.URIRef(map_source)
    for object_id, metadata in annotations.items():
        if 'error' in metadata:
            print('Error in {} layer: {}: {}'.format(metadata['layer'],
                                                     metadata['error'],
                                                     metadata['annotation']))
            continue

        layer_urls = UrlMaker(map_source, metadata['layer'])
        annotation = metadata['annotation']
        properties = Parser.annotation(annotation)
        feature_id = properties.get('id')

        feature_uri = layer_urls.url(feature_id)
        graph.remove((feature_uri, None, None))
        feature_class = None

        route = {'source': '', 'via': [], 'target': ''}

        for key, value in properties.items():
            if key == 'models':
                prop = namespaces_dict['RO']['0003301']
                (prefix, local) = value.split(':', 1)
                graph.add((feature_uri, prop, namespaces_dict[prefix][local]))
            elif key == 'node':
                feature_class = FLATMAP_NS['Node']
                graph.add((feature_uri, FLATMAP_NS['nodeClass'],
                           FLATMAP_NS[value[0]]))
            elif key == 'edge':
                feature_class = FLATMAP_NS['Edge']
                if len(value) < 2:
                    raise ValueError(
                        'Edge must have a source and target: {}'.format(
                            annotation))
                route['source'] = value[0]
                route['target'] = value[-1]
                route['via'] = value[1:-1]
            elif key in ['source', 'via', 'target']:
                if feature_class is None:
                    feature_class = FLATMAP_NS['Edge']
                elif feature_class != FLATMAP_NS['Edge']:
                    raise ValueError(
                        'Only edges can be routed: {}'.format(annotation))
                if key in ['source', 'target']:
                    route[key] = value[0]
                else:
                    route['via'].extend(value)
        if feature_class is None:
            feature_class = FLATMAP_NS['Node']  # Assume we have a Node
        elif feature_class == FLATMAP_NS['Edge']:
            if route['source']:
                graph.add((feature_uri, FLATMAP_NS['source'],
                           layer_urls.url(route['source'])))
            if route['target']:
                graph.add((feature_uri, FLATMAP_NS['target'],
                           layer_urls.url(route['target'])))
            for via in route['via']:
                graph.add(
                    (feature_uri, FLATMAP_NS['via'], layer_urls.url(via)))

        graph.add((feature_uri, FLATMAP_NS['map'], map_uri))
        graph.add((feature_uri, rdflib.namespace.RDF['type'], feature_class))

    with open(os.path.join(map_dir, 'annotations.ttl'), 'w') as turtle:
        # Don't set `base=map_uri` until RDFLib 5.0 and then use `explicit_base=True`
        # See https://github.com/RDFLib/rdflib/issues/559
        turtle.write(graph.serialize(format='turtle').decode('utf-8'))

    graph.close()
Beispiel #8
0
    def run(self,
            *,
            foodon_to_root_file='../data/out/foodon_to_root_path.pkl',
            recipes_file='../data/out/recipe_ingname_list.json',
            index_dict_file='../data/out/food_index_dict.pkl',
            food_link_files=['../data/in/foodon-links-1.ttl'],
            save_ppmi_dict='../data/out/foodon_ppmi_sim_dict.pkl'):

        with open(foodon_to_root_file, 'rb') as f:
            foodon_to_root_dict = pickle.load(f)

        ################

        g = rdflib.Graph()
        for file in food_link_files:
            g.parse(file, format='ttl')
        food_to_foodon = dict()

        for subj, obj in g.subject_objects(predicate=rdflib.URIRef(
                'http://idea.rpi.edu/heals/kb/equivalentFoodOnClass')):
            food_to_foodon[subj] = obj

        valid_foodon_items = set(
            item[1] for item in food_to_foodon.items()) - IGNORE_INGS
        foodon_super_to_root = defaultdict(lambda: set())
        for foodon_food in valid_foodon_items:
            path_items = foodon_to_root_dict.get(foodon_food, [])
            for item in path_items:
                if item in valid_foodon_items:
                    foodon_super_to_root[foodon_food].add(item)
        foodon_super_to_root = {
            key: frozenset(val)
            for key, val in foodon_super_to_root.items()
        }

        R2V = RecToVec(graph=rdflib.Graph(), food_index_file=index_dict_file)

        with open(recipes_file, 'r') as f:
            recipe_list = json.load(f)

        print("files loaded")

        ing_context_ocurrences = defaultdict(lambda: defaultdict(lambda: 0))

        ing_occurrence_count = defaultdict(lambda: 0)
        context_ocurrences = defaultdict(lambda: 0)
        unique_contexts = set()
        relevant_foods = set()
        ind_to_context = []
        completed_recipes = 0
        start = time.time()
        for recipe in recipe_list:
            ings = [food_to_foodon.get(foodkg_ns[ing], 0) for ing in recipe]

            if 0 in ings:
                continue

            ings_set = set(ings) - IGNORE_INGS
            for ing in ings_set:
                context_ings = frozenset(ings_set - {ing})
                unique_contexts.add(context_ings)
                context_ocurrences[context_ings] += 1

                super_foods = foodon_super_to_root[ing]
                for related_ing in super_foods:  # superclasses in super_foods also includes the ing itself
                    relevant_foods.add(related_ing)
                    ing_context_ocurrences[related_ing][context_ings] += 1
                    ing_occurrence_count[related_ing] += 1

            completed_recipes += 1
            if completed_recipes % 10000 == 0:
                print(completed_recipes, ' - time - ', time.time() - start)
                start = time.time()

        total_context_count = len(unique_contexts)
        print('unique contexts: ', total_context_count)
        print("setting up computing ppmi, using foodon relations")

        ind_to_context = []
        context_to_ind = dict()
        for c in context_ocurrences.keys():
            context_to_ind[c] = len(ind_to_context)
            ind_to_context.append(c)
        ind_to_ing = []

        ing_to_context_ppmi = lil_matrix(
            (len(R2V.food_index), total_context_count))

        finished_count = 0
        for ing in relevant_foods:
            ing_index = R2V.food_index[ing]
            ing_contexts = ing_context_ocurrences[ing]
            ing_context_mat = ing_context_ocurrences.get(ing, None)
            if ing_context_mat is None:
                continue

            ing_occ_count = ing_occurrence_count[ing]

            ing_contexts_as_set = frozenset(ing_contexts.keys())
            for c in ing_contexts_as_set:
                i = context_to_ind[c]
                # # V1 and V2
                ppmi = max(
                    0,
                    np.log10((ing_contexts[c] * total_context_count) /
                             (ing_occ_count * context_ocurrences[c])) *
                    np.sqrt(max(ing_occ_count, context_ocurrences[c])))
                ing_to_context_ppmi[ing_index, i] = ppmi

                # v4
                # ing_to_context_ppmi[ing_index, i] = ing_contexts[c]

            finished_count += 1
            if finished_count % 100 == 0 or finished_count < 5:
                print('getting ppmi, completed ', finished_count)
            ing_context_ocurrences[ing] = None

        ing_to_context_ppmi = ing_to_context_ppmi.tocsr()
        ing_to_ing_ppmi_sim = dict()
        print("converting to cosine sim...")
        finished_count = 0

        def l2_norm(mat):
            return np.sqrt(np.sum(mat.multiply(mat), axis=1))

        l2n = l2_norm(ing_to_context_ppmi)

        cosine_sim = ing_to_context_ppmi.dot(ing_to_context_ppmi.T) / (l2n.dot(
            l2n.T))

        for ing1 in relevant_foods:
            ing_index = R2V.food_index[ing]
            ing_to_ing_ppmi_sim[ing1] = dict()
            irow = ing_to_context_ppmi[ing_index]

            for ing2 in relevant_foods:
                ing_to_ing_ppmi_sim[ing1][ing2] = cosine_sim[
                    R2V.food_index[ing1], R2V.food_index[ing2]]

            finished_count += 1
            if finished_count % 100 == 0 or finished_count < 5:
                print('completed count: ', finished_count)

        print('finished, saving output')

        with open(save_ppmi_dict, 'wb') as f:
            pickle.dump(ing_to_ing_ppmi_sim, f)
import rdflib
from rdflib.namespace import RDF
from rdflib import RDF, RDFS, Namespace

onto = rdflib.Graph()
onto.parse("file:em-rdfs.n3", format="n3")

print("graph has %s statements." % len(onto))
# prints graph has 79 statements.

for subj, pred, obj in onto:
    if (subj, pred, obj) not in onto:
        raise Exception("It better be!")

# s = onto.serialize(format='n3')
# print(s.decode('UTF-8'))

rml = rdflib.Graph()
rml.parse("file:EM2EM.rml", format="n3")

# s = rml.serialize(format='n3')
# print(s.decode('UTF-8'))

RR = Namespace("http://www.w3.org/ns/r2rml#")
RML = Namespace("http://semweb.mmlab.be/ns/rml#")
QL = Namespace("http://semweb.mmlab.be/ns/ql#")

rml += onto

# ===========================================================================================
# Gestion de la subsomption des concepts
Beispiel #10
0
    def __init__(self, uri, kind, app_label_entities="entities",
                 app_label_relations="relations", app_label_vocabularies="vocabularies", **kwargs):
        """
        :param uri: (url) Uri to parse the object from (http://test.at). The uri must start with a base url mentioned in the RDF parser settings file.
        :param kind: (string) Kind of entity (Person, Place, Institution, Work, Event)
        :param app_label_entities: (string) Name of the Django app that contains the entities that we create.
        :param app_label_relations: (string) Name of the Django app that contains the relations for the merging process.
        :param app_label_vocabularies: (string) Name of the Django app that contains the vocabularies defining the entities and relations.
        """
        owl = "http://www.w3.org/2002/07/owl#"

        def exist(uri):
            if objct.objects.filter(uri__uri=uri).count() > 0:
                return True, objct.objects.get(uri__uri=uri)
            else:
                return False, False

        def prep_string(tupl):
            if isinstance(tupl, str):
                return tupl
            if tupl[1]:
                m = re.match(tupl[1][0], tupl[0])
                group = tupl[1][1]
                if not group:
                    group = 0
                try:
                    return m.group(group)
                except:
                    return tupl[0]
            else:
                r = tupl[0]
            return r.strip()
        objct = ContentType.objects.get(app_label=app_label_entities, model=kind.lower()).model_class()
        force = kwargs.get('force', None)
        res_attrb = dict()
        labels = []
        related_objcts = []
        uri = harmonize_geonames_id(uri)
        self.uri = uri
        self.kind = kind
        self.saved = False
        test = exist(self.uri)
        if test[0] and not force:
            self.objct = test[1]
            self.created = False
        else:
            self.created = True
            rdf_t = dict()
            for x in sett_RDF_generic[kind]['data']:
                self.settings_defined = False
                if not uri.startswith(x['base_url']):
                    continue
                self.settings_defined = True
                g = rdflib.Graph()
                uri_2 = uri
                if not uri_2.endswith('/'):
                    uri_2 += '/'
                o2 = rdflib.term.URIRef(uri)
                g.parse('{}{}'.format(uri_2.strip(), x['url_appendix']), format='xml')
                sameas = rdflib.term.URIRef(owl+'sameAs')
                list_sameas = []
                for p in g.objects(subject=o2, predicate=sameas):
                    list_sameas.append(genUri(uri=p))
                self.sameas = list_sameas
                if 'kind' in x.keys():
                    for k in x['kind']:
                        kind_rdf = rdflib.term.URIRef(k[0])
                        kind_val = g.value(o2, kind_rdf)
                        if kind_val is not None:
                            break
                        else:
                            kind_val = k[1]
                    if kind_val is not None:
                        kind_objct = ContentType.objects.get(
                        app_label=app_label_vocabularies, model=kind.lower() + 'Type'.lower()).model_class()
                        kind_objct, created = kind_objct.objects.get_or_create(name=kind_val)
                        res_attrb['kind'] = kind_objct
                for uri_2 in list_sameas:
                    test = exist(uri_2)
                    if test[0]:
                        self.objct = test[1]
                        self.created = False
                        uri_3 = genUri(uri=uri, entity=self.objct)
                        uri_3.save()
                for xx in x['attributes']:
                    rdf_t[xx['name']] = ()
                    subj2 = []
                    results = []
                    ind_type = ()
                    for z in xx['identifiers']:
                        if len(results) > 0:
                            continue
                        cnt = 0
                        cnt_2 = 1
                        try:
                            k = z[cnt_2]
                        except:
                            k = '='
                        subj = [o2, ]
                        while k:
                            for indx, s in enumerate(subj):
                                if z[cnt][0] == 'objects':
                                    pred = rdflib.term.URIRef(z[cnt][2])
                                    res = g.objects(subject=s, predicate=pred)
                                    if type(res) != types.GeneratorType:
                                        break
                                    for r in res:
                                        if z[cnt][3]:
                                            if not getattr(r, z[cnt][3][0]) == z[cnt][3][1]:
                                                continue
                                        if k == '>':
                                            subj2.append(r)
                                        elif k == '=':
                                            results.append((z[cnt][1], r, indx))
                                            ind_type += ((len(ind_type), z[cnt][1]),)
                            cnt_2 += 2
                            try:
                                k = z[cnt_2]
                            except:
                                k = '='
                            if cnt + 2 > len(z):
                                k = None
                            cnt += 2
                            subj = subj2
                    for attrb in sett_RDF_generic[kind]['matching']['attributes'].keys():
                        res_2 = []
                        for x in sett_RDF_generic[kind]['matching']['attributes'][attrb]:
                            for s in x:
                                for ind, elem in filter(lambda x: x[1] == s[0], ind_type):
                                    elem = results[ind][1]
                                    res_2.append(prep_string((elem, s[1])))
                                if isinstance(s, str):
                                    res_2.append(s)
                        if len(res_2) == len(x):
                            res_attrb[attrb] = ''.join(res_2)
                    for lab in sett_RDF_generic[kind]['matching']['labels'].keys():
                        lb_type, created = LabelType.objects.get_or_create(name=lab)
                        for x in sett_RDF_generic[kind]['matching']['labels'][lab]:
                            for ind, elem in filter(lambda a: a[1]==x[0], ind_type):
                                elem = results[ind][1]
                                lb = Label(label=prep_string((elem, x[1])), isoCode_639_3=elem.language, label_type=lb_type)
                                labels.append(lb)
                    if kwargs.get('drill_down', True):
                        for con in sett_RDF_generic[kind]['matching']['linked objects']:
                            for x in con['object']:
                                for ind, elem in filter(lambda a: a[1]==x[0], ind_type):
                                    elem = results[ind][1]
                                    ob = GenericRDFParser(elem, con['type'], drill_down=False)
                                    if ob.created and not ob.saved:
                                        ob.save()   # TODO: We should move the save of related objects in the save routine
                                    try:
                                        u = ContentType.objects.get(app_label=app_label_relations, model=kind.lower()+con['type'].lower())
                                        u_kind = ContentType.objects.get(app_label=app_label_vocabularies, model=kind.lower()+con['type'].lower()+'Relation'.lower())
                                    except ContentType.DoesNotExist:
                                        u = ContentType.objects.get(app_label=app_label_relations, model=con['type'].lower()+kind.lower())
                                        u_kind = ContentType.objects.get(app_label=app_label_vocabularies, model=con['type'].lower()+kind.lower()+'Relation'.lower())
                                    u_kind_2 = u_kind.model_class()
                                    u2 = u.model_class()()
                                    uk, created = u_kind_2.objects.get_or_create(name=con['kind'])
                                    if con['type'] == kind:
                                        setattr(u2, 'related_' + con['type'].lower() + 'B_id', ob.objct.pk)
                                    else:
                                        setattr(u2, 'related_' + con['type'].lower() + '_id', ob.objct.pk)
                                    setattr(u2, 'relation_type_id', uk.pk)
                                    related_objcts.append(u2)
            self.objct = objct(**res_attrb)
            self.labels = labels
            self.related_objcts = related_objcts
Beispiel #11
0
def main():
    Total = dict()
    Tagsets = dict()
    TagsetsToTags = dict()
    Equipment = dict()
    print "hi"
    BRICK = rdflib.Namespace('https://brickschema.org/schema/1.0.1/Brick#')
    BRICKFRAME = rdflib.Namespace(
        'https://brickschema.org/schema/1.0.1/BrickFrame#')
    GHC = rdflib.Namespace('http://cmu.edu/building/ontology/ghc#')
    #RDF, RDFS and OWL have already been imported in the library initializations
    print GHC["test"]
    #Initiate graph from base ttl file
    g = rdflib.Graph()
    g.bind('GHC', GHC)
    g.bind('brick', BRICK)
    #	new = rdflib.Graph()

    #new.parse('GHCYuvraj_brick.ttl',format='ttl')
    #	g.parse('../BuildingSchema/Brick.ttl', format='turtle')
    count1 = 0
    count2 = 0
    changeablemapping = dict()
    with open('CMU_GHC.csv', 'rU') as DataFile:
        with open('CMU_AHU_OddBuildingTagSet.csv', 'rU') as Mapping:
            with open('TagSets.csv', 'rU') as GDocs:
                changeable = csv.DictReader(GDocs)
                for row in changeable:
                    Value = row['Dimension']
                    Key = row['TagSet']
                    Key = re.sub(' ', '_', Key)
                    Values = Value.split('>')
                    if (len(Values) > 1):
                        changeablemapping[Key] = Values
                #	print Values
                reader = csv.DictReader(Mapping)
                for row in reader:
                    #print row['Bas1'], row['TagSet'], row['Tags']
                    BasTag = row['Bas1']
                    ListBasTag = BasTag.split('/')
                    length = len(ListBasTag)
                    Key = ListBasTag[length - 1]
                    #	print Key
                    x = row['TagSet']
                    NewX = re.sub(' ', '_', x)
                    Key = re.sub(' ', '_', Key)
                    #	print NewX
                    Tagsets[Key] = NewX
                    Tags = row['Tags']
                    listTags = Tags.split(';')
                    TagsetsToTags[NewX] = listTags
                    if (NewX in changeablemapping.keys()):
                        #		print "1"
                        pass
                    else:
                        print "2", NewX
                #	print ListBasTag
                MapReader = csv.reader(Mapping, delimiter=' ', quotechar='|')
        #	for row in MapReader:
        #		print row
        reader = csv.DictReader(DataFile)
        #	g.add((GHC['GHC_HVAC'],RDF.type,OWL.NamedIndividual))
        #	g.add((GHC['GHC_HVAC'],RDF.type,BRICK['HVAC']))
        for row in reader:
            New = row['bas_raw']
            ListBasTag = New.split('/')
            length = len(ListBasTag)
            Key = ListBasTag[length - 1]
            Key = re.sub(' ', '_', Key)
            y = 0
            if ('Parking' in ListBasTag[2]):
                y = 1

            NewKey = ListBasTag[1] + '/' + ListBasTag[3 + y] + '/' + Key
            NewKey = re.sub(' ', '_', NewKey)
            x = GHC[NewKey]
            #	g.add((Key,RDF.type,OWL.NamedIndividual))
            #	g.add((Key,RDF.type,BRICK[Tagsets[key]]))
            #	print Key
            Equip = ""
            BelongsTo = ""
            if Key in Tagsets:
                Total[Key] = 1
                count1 += 1
                g.add((x, RDF.type, OWL.NamedIndividual))
                g.add((x, RDF.type, BRICK[Tagsets[Key]]))
                location = ""
                for i in range(0, 3 + y):
                    location = location + ListBasTag[i]

                location = re.sub(' ', '_', location)
                g.add((GHC[location], RDF.type, OWL.NamedIndividual))
                g.add((GHC[location], RDF.type, BRICK["Location"]))
                #	g.add((x,BRICK.hasLocation,GHC[location]))
                if 'AHU' in ListBasTag[3 + y]:
                    Equip = "AHU"
                elif 'VAV' in ListBasTag[3 + y] or 'FSB' in ListBasTag[3 + y]:
                    #	print ListBasTag[3+y]
                    Equip = "VAV"
                elif 'CRAC' in ListBasTag[3 + y]:
                    Equip = "CRAC"
                elif 'FCU' in ListBasTag[3 + y]:
                    #	print "HELLO"
                    Equip = "Fan_Coil_Unit"
                else:
                    #print ListBasTag[3+y],NewKey
                    if ('Usage' in NewKey or 'Peak' in NewKey):
                        Equip = "Meter"
                    else:
                        pass
                        #print NewKey
            #	mapping = changeablemapping[Tagsets[Key]]
            #	if(len(mapping) == 4):
            #		Equip = re.sub(' ','_',mapping[3])
            #		BelongsTo =""
            #	if(len(mapping) == 3):
            #		Equip = re.sub(' ','_',mapping[2])
            #		BelongsTo = re.sub(' ','_',mapping[1])
            #	if(len(mapping) > 4):
            #		Equip = re.sub(' ','_',mapping[4])
            #		BelongsTo = re.sub(' ','_',mapping[3])

            #	LowestEquip = mapping[len(mapping)-1]
            #	NewEquipment = ListBasTag[1]+'/'+ListBasTag[3+y]+'/'+Equip
            #	NewBelongs =  ListBasTag[1]+'/'+ListBasTag[3+y]+'/'+BelongsTo
            #	NewEquipment = re.sub(' ','_',NewEquipment)
            #	NewBelong = re.sub(' ','_',NewBelongs)

                blank = re.sub(' ', '_', ListBasTag[3 + y])
                #	print changeablemapping[Tagsets[Key]], Key
                #	#	print blank, ListBasTag[3+y]
                #	NewEquip = NewEquipment
                #	print NewBelong, NewEquip, x
                if not (blank in Equipment) and not 'Interface' in ListBasTag[
                        3 + y] and Equip != "":

                    Equipment[blank] = 1
                    g.add((GHC[blank], RDF.type, OWL.NamedIndividual))
                    g.add((GHC[blank], RDF.type, BRICK[Equip]))
                    if (Equip != "Meter" and Equip != "AHU"):
                        number = re.search('[0-9]+', blank)
                        floor = number.group()[0]
                        g.add((GHC[floor + "Floor"], RDF.type, BRICK["Floor"]))
                        g.add((GHC[blank + "Room"], BRICKFRAME.hasPart,
                               GHC[blank + "Room"]))
                        g.add((GHC[blank + "Room"], RDF.type,
                               OWL.NamedIndividual))
                        g.add((GHC[blank + "Room"], RDF.type, BRICK["Room"]))
                        if (Equip != "FCU"):
                            g.add((GHC[blank + "Zone"], RDF.type,
                                   OWL.NamedIndividual))
                            g.add((GHC[blank + "Zone"], RDF.type,
                                   BRICK["HVAC_Zone"]))
                            g.add((GHC[blank + "Zone"], BRICKFRAME.hasPoint,
                                   GHC[blank + "Room"]))
                            g.add((GHC[blank + "Room"], BRICKFRAME.isPointOf,
                                   GHC[blank + "Zone"]))
                            g.add((GHC[blank], BRICKFRAME.feeds,
                                   GHC[blank + "Zone"]))
                            g.add((GHC[blank], BRICKFRAME.feeds,
                                   GHC[blank + "Room"]))
                            g.add((GHC[blank + "Room"], BRICKFRAME.isFedBy,
                                   GHC[blank]))
                            g.add((GHC[blank + "Zone"], BRICKFRAME.isFedBy,
                                   GHC[blank]))

                        g.add((x, BRICKFRAME.isLocatedIn, GHC[blank + "Room"]))
                        g.add((GHC[blank + "Room"], BRICKFRAME.contains, x))

                #	g.add((GHC[blank],BRICH.isLocatedIn,GHC[location]))
                #	if not(NewBelong in Equipment):
                #		g.add((GHC[NewBelong],RDF.type, OWL.NamedIndividual))
                #		g.add((GHC[NewBelong],RDF.type,BRICK[BelongsTo]))
                #		Equipment[NewBelong]=1
                #		g.add((GHC[NewEquip],BRICK.hasLocation,GHC[location]))
                #	print NewBelong, NewEquip

                #	g.add((GHC[NewEquip],BRICK.isPartOf,GHC[NewBelong]))
                #	g.add((GHC[NewEquip],BRICK.hasLocation, GHC[location]))
                if (Equip != ""):
                    g.add((GHC[blank], BRICKFRAME.hasPoint, x))
                    g.add((GHC[blank + "Room"], BRICKFRAME.hasPoint, x))
                    print x
                    g.add((x, BRICKFRAME.isPointOf, GHC[blank]))
                    g.add((x, BRICKFRAME.isPointOf, GHC[blank + "Room"]))
            #	if Equip == "":
            #		g.add((GHC['GHC_HVAC'],BRICK.hasPoint,x))

            else:
                Total[Key] = 1
                #	print Key
                count2 += 1
            #	print Key
        #	if ('AHU' in ListBasTag[3]):
        #		print "3",ListBasTag[3]
        #	if ('AHU' in ListBasTag[4]):
        #		print "4",ListBasTag[4]
        for item in TagsetsToTags.keys():
            x = BRICK[item]
            for value in TagsetsToTags[item]:
                g.add((x, BRICKFRAME.hasTag, BRICK[value]))
    with open('AHURelations.csv', 'rU') as relations:
        reader = csv.DictReader(relations)
        for row in reader:
            new = re.sub('_', '-', row['First'])
            g.add((GHC[new + '_I'], BRICKFRAME.feeds, GHC[row['Third']]))

#	g.add((GHC["AHU-1_Zone-Temperature"],RDF.type,OWL.NamedIndividual))
    if ((BRICK["Run_Request"], None, None) in g):
        print "Hi"


#	g.add((GHC["AHU-1_Zone-Temperature"],RDF.type,BRICK["Zone_Temp"]))
#	g.add((GHC["VAV1"], BRICK.hasPoint, GHC["AHU-1_Zone-Temperature"]))
    g.serialize(destination='GHC_brick.ttl', format='turtle')
    print count1
    print count2
    print len(Total.keys())
 def test_str(self):
     self.assertIsInstance(self.graph.to_str(), str)
     g2 = rdflib.Graph()
     g2.parse(data=str(self.graph), format="ttl")
Beispiel #13
0
    def run(self):
        if self.target_graph is not None:
            the_target_graph = self.target_graph
        else:
            has_cloned = False
            if self.ont_graph is not None:
                # creates a copy of self.data_graph, doesn't modify it
                the_target_graph = self.mix_in_ontology()
                has_cloned = True
            else:
                the_target_graph = self.data_graph
            inference_option = self.options.get('inference', 'none')
            if inference_option and not self.pre_inferenced and str(
                    inference_option) != "none":
                if not has_cloned and not self.inplace:
                    the_target_graph = clone_graph(the_target_graph)
                self._run_pre_inference(the_target_graph, inference_option,
                                        self.logger)
                self.pre_inferenced = True
            self._target_graph = the_target_graph

        shapes = self.shacl_graph.shapes  # This property getter triggers shapes harvest.

        if self.options['advanced']:
            target_types = gather_target_types(self.shacl_graph)
            advanced = {
                'functions': gather_functions(self.shacl_graph),
                'rules': gather_rules(self.shacl_graph)
            }
            for s in shapes:
                s.set_advanced(True)
            apply_target_types(target_types)
        else:
            advanced = {}
        if isinstance(the_target_graph,
                      (rdflib.Dataset, rdflib.ConjunctiveGraph)):
            named_graphs = [
                rdflib.Graph(
                    the_target_graph.store,
                    i,
                    namespace_manager=the_target_graph.namespace_manager)
                if not isinstance(i, rdflib.Graph) else i
                for i in the_target_graph.store.contexts(None)
            ]
        else:
            named_graphs = [the_target_graph]
        reports = []
        non_conformant = False

        for g in named_graphs:
            if advanced:
                apply_functions(advanced['functions'], g)
                apply_rules(advanced['rules'], g)
            for s in shapes:
                _is_conform, _reports = s.validate(g)
                non_conformant = non_conformant or (not _is_conform)
                reports.extend(_reports)
            if advanced:
                unapply_functions(advanced['functions'], g)
        v_report, v_text = self.create_validation_report(
            self.shacl_graph, not non_conformant, reports)
        return (not non_conformant), v_report, v_text
Beispiel #14
0
 def __init__(self, path):
     self.path = path
     install_rdf_path = os.path.join(path, 'install.rdf')
     self.rdf = rdflib.Graph().parse(open(install_rdf_path))
     self.package_type = None
     self.find_root()  # Will set self.package_type
Beispiel #15
0
 def testEmpty(self):
     g = rdflib.Graph()
     s = g.serialize(format='trig')
     self.assertTrue(s is not None)
Beispiel #16
0
def gen_scale_free_graph(
    destination_folder: Path,
    vertices_number: int,
    vertices_degree: int,
    labels: Tuple[str, ...] = ('A', 'B', 'C', 'D')) -> Path:
    """
    Generates scale free graph

    :param destination_folder: directory to save the graph
    :type destination_folder: Path
    :param vertices_number: number of vertices in the graph
    :type vertices_number: int
    :param vertices_degree: degree of a vertex in the graph
    :type vertices_degree: int
    :param labels: edge labels in the graph
    :type labels: Tuple[str, ...]
    :return: path to generated graph
    :rtype: Path
    """

    g = {
        i: [(j, np.random.choice(labels)) for j in range(vertices_degree)]
        for i in range(vertices_degree)
    }

    degree = [3] * vertices_degree

    for i in range(vertices_degree, vertices_number):
        to_vertices = np.random.choice(range(i),
                                       size=vertices_degree,
                                       replace=False,
                                       p=np.array(degree) / sum(degree))

        g[i] = []
        degree.append(0)
        for to in to_vertices:
            label = np.random.choice(labels)
            g[i].append((to, label))
            degree[to] += 1
            degree[i] += 1

    output_graph = rdflib.Graph()

    edges = list()

    for v in g:
        for to in g[v]:
            edges.append((v, to[1], to[0]))

    for subj, pred, obj in tqdm(
            edges,
            desc=
            f'scale_free_graph_{vertices_number}_{vertices_degree} generation'
    ):
        add_rdf_edge(subj, pred, obj, output_graph)

    target = destination_folder / f'scale_free_graph_{vertices_number}_{vertices_degree}.xml'

    write_to_rdf(target, output_graph)

    return target
Beispiel #17
0
 def __init__(self, zip_file, certinfo=None):
     self.zip_file = zip_file
     self.certinfo = certinfo
     self.rdf = rdflib.Graph().parse(data=zip_file.read('install.rdf'))
     self.package_type = None
     self.find_root()  # Will set self.package_type
Beispiel #18
0
import sys, string, urllib
import datetime, random, rdflib as r
aa = r.Namespace("http://purl.org/socialparticipation/aa/")
xsd = r.namespace.XSD
rdf = r.namespace.RDF
if len(sys.argv) == 1:
    print("usage: aa this is a aa shout, for registering ongoing work")
else:
    shout = " ".join(sys.argv[1:])
    if aamongo:
        urllib.request.urlretrieve(
            "http://aaserver.herokuapp.com/shout?nick=%s&shout=%s" %
            (NICK, urllib.parse.quote(shout)))
        print("shout mongo logged")
    if ORe:
        g = r.Graph()
        # ID is datetime with milisseconds and 5 digit random number
        tid = str(datetime.datetime.now().timestamp())
        tid += ''.join(["%s" % random.randint(0, 9) for num in range(0, 5)])
        uri = aa.Shout + "#" + tid
        g.add((uri, rdf.type, aa.Shout))
        g.add((uri, aa.provenance, r.Literal("ORe", datatype=xsd.string)))
        uri_ = aa.User + "#" + NICK
        g.add((uri_, rdf.type, aa.User))
        g.add((uri_, aa.nick, r.Literal(NICK, datatype=xsd.string)))
        g.add((uri, aa.shoutMessage, r.Literal(shout, datatype=xsd.string)))
        g.add((uri, aa.created,
               r.Literal(datetime.datetime.now(), datatype=xsd.dateTime)))
        g.add((uri, aa.mongoDuplicate, r.Literal(aamongo,
                                                 datatype=xsd.boolean)))
Beispiel #19
0
def test_broken_add():

    g = rdflib.Graph()
    nose.tools.assert_raises(AssertionError, lambda: g.add((1, 2, 3)))
    nose.tools.assert_raises(AssertionError, lambda: g.addN([(1, 2, 3, g)]))
Beispiel #20
0
import rdflib

g = rdflib.Graph().parse('../data-unreified.ttl', format='ttl')

all_rdf = []
i = 1
for s, p, o in g:
    rdf = '''
:s{}
    a rdf:Statement ;
    rdf:subject {} ;
    rdf:predicate {} ;
    rdf:object {} ;
    dct:created "2019-07-10"^^xsd:date ;
    loci:hadGenerationMethod :method ;
.'''.format(
        str(i).zfill(3),
        s.replace('http://linked.data.gov.au/def/lgpc/', 'lgpc:'),
        p.replace('http://www.w3.org/2004/02/skos/core#', 'skos:'),
        o.replace('http://linked.data.gov.au/def/gpc/', 'gpc:'))
    all_rdf.append(rdf)

    i += 1

with open('../data.ttl', 'w') as f:
    f.write('\n'.join(all_rdf))
"""
利用serialize将数据以一定规范存储到硬盘中
"""

import rdflib

graph = rdflib.Graph()

# work with the graph:
s = rdflib.URIRef('牛膝')
p = rdflib.URIRef('功效属性')
o = rdflib.URIRef("活血")

graph.add((s, p, o))

# 以n3的格式存储
graph.serialize('zhongyaoyao.rdf', format='n3')

s = rdflib.URIRef("http://www.example.org/牛膝")
p = rdflib.URIRef("http://www.example.org/功效属性")
o = rdflib.URIRef("http://www.example.org/活血")

g1 = rdflib.Graph()
g1.add((s, p, o))
g1.serialize('zhongyaoyao1.rdf')  # 默认以'xml'格式存储

g2 = rdflib.Graph()
g2.parse('zhongyaoyao1.rdf', format='xml')  # 解析rdf文件时,需要指定格式
subject = g2.subjects(p, o)
for i in subject:
    print("i:", i)
Beispiel #22
0
        assert type(x).__name__ == "str"
    except AssertionError:
        print("not a string?", type(x), x)

    return x


def write_jsonld(filename, graph, vocab="vocab.json"):
    """
    serialize the given graph a JSON-LD output
    """
    with open(vocab, "r") as f:
        context = json.load(f)

    with open(filename, "wb") as f:
        f.write(graph.serialize(format="json-ld", context=context, indent=2))


if __name__ == "__main__":
    # load the graph
    filename = sys.argv[1]
    graph = rdflib.Graph().parse(filename, format="n3")

    # enumerate all of the relations
    for subj, pred, obj in graph:
        print(subj, pred, obj)

    # serialize the graph as JSON-LD
    filename = "tmp.jsonld"
    write_jsonld(filename, graph)
Beispiel #23
0
def getDatesWD(period):
    """ query wikidata to get dates of periods.
	Upload to the triplestore to store the information for fast retrieval """
    queryWdDates = """
		SELECT ?start_date ?end_date
		WHERE {

			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P580> ?start_date_1 } .
	  	  	OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P571> ?start_date_2 } .
	  	  	OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period.
	  	   			?broader_period <http://www.wikidata.org/prop/direct/P571> ?start_date_3 } .
			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period.
	  	   			?broader_period <http://www.wikidata.org/prop/direct/P580> ?start_date_3_1 } .
	  	  	OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2596> ?culture .
	  	  			?culture <http://www.wikidata.org/prop/direct/P571> ?start_date_4 } .
			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2348> ?culture .
	  	  			?culture <http://www.wikidata.org/prop/direct/P580> ?start_date_5 } .
	  	   	BIND(COALESCE(?start_date_1, ?start_date_2, ?start_date_3, ?start_date_3_1, ?start_date_4, ?start_date_5) AS ?start_date) .

			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P582> ?end_date_1} .
			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P2348> ?culture .
	  	  			?culture <http://www.wikidata.org/prop/direct/P582> ?end_date_2 } .
			OPTIONAL {<""" + period + """> <http://www.wikidata.org/prop/direct/P361> ?broader_period.
	  	   			?broader_period <http://www.wikidata.org/prop/direct/P582> ?end_date_3 } .
			BIND(COALESCE(?end_date_1, ?end_date_2, ?end_date_3) AS ?end_date) .
			}
	"""
    sparqlWD = SPARQLWrapper(conf.wikidataEndpoint)
    sparqlWD.setQuery(queryWdDates)
    sparqlWD.setReturnFormat(JSON)
    resultsWD = sparqlWD.query().convert()

    base = 'https://w3id.org/artchives/'
    wd = rdflib.Graph(identifier=URIRef(base + 'wd/'))
    WDP = Namespace("http://www.wikidata.org/prop/direct/")

    for resultWD in resultsWD["results"]["bindings"]:
        if "start_date" in resultWD:
            start_date = resultWD["start_date"]["value"]
            wd.add((URIRef(period),
                    URIRef("http://www.wikidata.org/prop/direct/P580"),
                    Literal(start_date, datatype=XSD.dateTime)))
        else:
            start_date = 'no date'

        if "end_date" in resultWD:
            end_date = resultWD["end_date"]["value"]
            wd.add((URIRef(period),
                    URIRef("http://www.wikidata.org/prop/direct/P582"),
                    Literal(end_date, datatype=XSD.dateTime)))
        else:
            end_date = 'no date'

        recordID = period.split("entity/",
                                1)[1] if 'entity' in period else period.split(
                                    "artchives/", 1)[1]

        if len(wd) == 0:
            wd.add(
                (URIRef(period),
                 URIRef("https://w3id.org/artchives/wikidataReconciliation"),
                 Literal("no data added")))
        # Create a copy in folder /records and load on the triplestore
        wd.serialize(destination='records/' + recordID + '.trig',
                     format='trig',
                     encoding='utf-8')
        server.update('load <file:///' + dir_path + '/records/' + recordID +
                      '.trig>')
    return [start_date, end_date]
Beispiel #24
0
 def __init__(self):
     """Initialize the session."""
     self._registry = Registry()
     self.root = None
     self.graph = rdflib.Graph()
Beispiel #25
0
import rdflib

from sdotermsource import *
from sdoterm import *
from localmarkdown import Markdown

Markdown.setWikilinkCssClass("localLink")
Markdown.setWikilinkPrePath("/")

if VOCABURI.startswith("https://"):
    triplesfile = "../data/schemaorg-all-https.nt"
else:
    triplesfile = "../data/schemaorg-all-http.nt"


termgraph = rdflib.Graph()
termgraph.parse(triplesfile, format="nt")

print ("loaded %s triples" % len(termgraph))

SdoTermSource.setSourceGraph(termgraph)
print ("Types Count: %s" % len(SdoTermSource.getAllTypes(expanded=False)))
print ("Properties Count: %s" % len(SdoTermSource.getAllProperties(expanded=False)))


for termname in ["acceptedAnswer","Book"]:
    term = SdoTermSource.getTerm(termname)

    print("")
    print("TYPE: %s" % term.termType)
    print("URI: %s" % term.uri)
Beispiel #26
0
    def __init__(self,
                 model_uri,
                 sparql_wrapper=None,
                 threshold=0.3,
                 include_body: bool = False,
                 resolve: bool = True,
                 use_caching: bool = False):
        self._graph = rdflib.Graph()
        self.thesoz = SkosThesaurusMatcher(
            self._graph,
            thesaurus_path="claimskg/data/thesoz-komplett.xml",
            skos_xl_labels=True,
            prefix="http://lod.gesis.org/thesoz/")
        self._graph = self.thesoz.get_merged_graph()

        self.unesco = SkosThesaurusMatcher(
            self._graph,
            thesaurus_path="claimskg/data/unesco-thesaurus.xml",
            skos_xl_labels=False,
            prefix="http://vocabularies.unesco.org/thesaurus/")

        self._graph = self.unesco.get_merged_graph()

        self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl",
                         format="turtle")

        self._sparql_wrapper = sparql_wrapper  # type: SPARQLWrapper
        self._uri_generator = ClaimsKGURIGenerator(model_uri)
        self._threshold = threshold
        self._include_body = include_body
        self._resolve = resolve
        self._use_caching = use_caching

        self.model_uri = model_uri
        self._namespace_manager = NamespaceManager(Graph())

        self._claimskg_prefix = rdflib.Namespace(model_uri)
        self._namespace_manager.bind('claimskg',
                                     self._claimskg_prefix,
                                     override=False)
        self._namespace_manager.bind('base',
                                     self._claimskg_prefix,
                                     override=True)

        self.counter = TypedCounter()

        self._rdfs_prefix = rdflib.Namespace(
            "http://www.w3.org/2000/01/rdf-schema#")
        self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False)

        self._schema_prefix = rdflib.Namespace("http://schema.org/")
        self._namespace_manager.bind('schema',
                                     self._schema_prefix,
                                     override=False)

        self._namespace_manager.bind('owl', OWL, override=True)

        self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/")
        self._namespace_manager.bind("dbo", self._dbo_prefix, override=False)

        self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/")
        self._namespace_manager.bind("dbr", self._dbr_prefix, override=False)

        self._dbc_prefix = rdflib.Namespace(
            "http://dbpedia.org/resource/Category_")
        self._namespace_manager.bind("dbc", self._dbr_prefix, override=False)

        self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#")
        self._namespace_manager.bind("dcat", self._dcat_prefix, override=False)

        self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/")
        self._namespace_manager.bind("dct", self._dct_prefix, override=False)

        self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/")
        self._namespace_manager.bind("foaf", self._foaf_prefix, override=False)

        self._vcard_prefix = rdflib.Namespace(
            "http://www.w3.org/2006/vcard/ns#")
        self._namespace_manager.bind("vcard",
                                     self._vcard_prefix,
                                     override=False)

        self._adms_prefix = Namespace("http://www.w3.org/ns/adms#")
        self._namespace_manager.bind("adms", self._adms_prefix, override=False)

        self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#")
        self._namespace_manager.bind("skos", self._skos_prefix, override=False)

        self._owl_same_as = URIRef(OWL['sameAs'])

        self._schema_claim_review_class_uri = URIRef(
            self._schema_prefix['ClaimReview'])
        self._schema_creative_work_class_uri = URIRef(
            self._schema_prefix['CreativeWork'])
        self._schema_organization_class_uri = URIRef(
            self._schema_prefix['Organization'])
        self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing'])
        self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating'])
        self._schema_language_class_uri = URIRef(
            self._schema_prefix['Language'])

        self._schema_claim_reviewed_property_uri = URIRef(
            self._schema_prefix['claimReviewed'])
        self._schema_url_property_uri = URIRef(self._schema_prefix['url'])
        self._schema_name_property_uri = URIRef(self._schema_prefix['name'])
        self._schema_date_published_property_uri = URIRef(
            self._schema_prefix['datePublished'])
        self._schema_in_language_preperty_uri = URIRef(
            self._schema_prefix['inLanguage'])
        self._schema_author_property_uri = URIRef(
            self._schema_prefix['author'])
        self._schema_same_as_property_uri = URIRef(
            self._schema_prefix['sameAs'])
        self._schema_citation_preperty_uri = URIRef(
            self._schema_prefix['citation'])
        self._schema_item_reviewed_property_uri = URIRef(
            self._schema_prefix['itemReviewed'])
        self._schema_alternate_name_property_uri = URIRef(
            self._schema_prefix['alternateName'])
        self._schema_description_property_uri = URIRef(
            self._schema_prefix['description'])
        self._schema_rating_value_property_uri = URIRef(
            self._schema_prefix['ratingValue'])
        self._schema_mentions_property_uri = URIRef(
            self._schema_prefix['mentions'])
        self._schema_keywords_property_uri = URIRef(
            self._schema_prefix['keywords'])
        self._schema_headline_property_uri = URIRef(
            self._schema_prefix['headline'])
        self._schema_review_body_property_uri = URIRef(
            self._schema_prefix['reviewBody'])
        self._schema_text_property_uri = URIRef(self._schema_prefix['text'])

        self._iso1_language_tag = "en"
        self._iso3_language_tag = "eng"

        self._english_uri = URIRef(self._claimskg_prefix["language/English"])
        self._graph.add(
            (self._english_uri, RDF.type, self._schema_language_class_uri))
        self._graph.add(
            (self._english_uri, self._schema_alternate_name_property_uri,
             Literal(self._iso1_language_tag)))
        self._graph.add((self._english_uri, self._schema_name_property_uri,
                         Literal("English")))

        self._nif_prefix = rdflib.Namespace(
            "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#")
        self._namespace_manager.bind('nif', self._nif_prefix, override=False)

        self._nif_RFC5147String_class_uri = URIRef(
            self._nif_prefix['RFC5147String'])
        self._nif_context_class_uri = URIRef(self._nif_prefix['Context'])

        self._nif_source_url_property_uri = URIRef(
            self._nif_prefix['sourceUrl'])
        self._nif_begin_index_property_uri = URIRef(
            self._nif_prefix["beginIndex"])
        self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"])
        self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"])

        self._its_prefix = rdflib.Namespace(
            "https://www.w3.org/2005/11/its/rdf#")
        self._namespace_manager.bind('itsrdf',
                                     self._its_prefix,
                                     override=False)

        self.its_ta_confidence_property_uri = URIRef(
            self._its_prefix['taConfidence'])
        self.its_ta_ident_ref_property_uri = URIRef(
            self._its_prefix['taIdentRef'])

        self._logical_view_claims = []  # type: List[ClaimLogicalView]
        self._creative_works_index = []

        self.keyword_uri_set = set()

        self.global_statistics = ClaimsKGStatistics()
        self.per_source_statistics = {}
Beispiel #27
0
def validate(goldenset, results):

    with codecs.open(goldenset, 'rb',
                     encoding='utf-8') as goldensetfile, codecs.open(
                         results, 'rb', encoding='utf-8') as resultsfile:
        a = rdflib.Graph()
        a.parse(goldensetfile, format='n3')
        r = rdflib.Graph()
        r.parse(resultsfile, format='n3')

        tweets = {}
        offsets = {}
        multiword = {}

        # Tweet extraction
        for s, p, o in a:

            if s.endswith(',') and p.endswith('isString'):
                id = s.split('#')[0]
                tweets[id] = o

        # Multiword entities are extracted
        for s, p, o in a:
            if p.endswith('anchorOf'):
                id = s.split('#')[0]
                offsets[s] = o
                for offset in offsets.keys():
                    startoffset1 = int(s.split('#char=')[1].split(',')[0])
                    endoffset1 = int(s.split('#char=')[1].split(',')[1])
                    startoffset2 = int(offset.split('#char=')[1].split(',')[0])
                    endoffset2 = int(offset.split('#char=')[1].split(',')[1])

                    if id == offset.split(
                            '#')[0] and startoffset1 != startoffset2 and abs(
                                endoffset1 - startoffset2) < 5:
                        if tweets[id][min(endoffset1, startoffset2):max(
                                endoffset1, startoffset2)] == ' of ':
                            if not multiword.has_key(id):
                                multiword[id] = []
                            #print tweets[id][min(startoffset1,startoffset2):max(endoffset1,endoffset2)]
                            multiword[id].append(
                                tweets[id][min(startoffset1, startoffset2
                                               ):max(endoffset1, endoffset2)])
                        elif tweets[id][min(endoffset1, startoffset2):max(
                                endoffset1, startoffset2)] == '/':
                            if not multiword.has_key(id):
                                multiword[id] = []
                            #print tweets[id][min(startoffset1,startoffset2):max(endoffset1,endoffset2)]
                            multiword[id].append(
                                tweets[id][min(startoffset1, startoffset2
                                               ):max(endoffset1, endoffset2)])
        """for m in multiword:
                print m, multiword[m]"""

        # Calculates the precision of the system
        def precision():

            fullmentions = 0
            totalmentions = 0
            partialmentions = 0
            annotatedmentions = {}

            # The golden set annotated mentions are extracted
            for s, p, o in a:
                if p.endswith('anchorOf'):
                    id = s.split('#')[0]
                    #print 'Golden set ',id
                    if not annotatedmentions.has_key(id):
                        annotatedmentions[id] = []
                    annotatedmentions[id].append(o)

            # Compares the mentions obtained by the system with the ones annotated in the golden set
            for s, p, o in r:
                #print s,p,o

                if p.endswith('anchorOf'):
                    #print o
                    #print s
                    id = s.split('#')[0]
                    #print id

                    if id in annotatedmentions.keys():
                        # Checks the mentions that match fully
                        if o in annotatedmentions[id]:
                            fullmentions += 1
                        else:
                            scored = False
                            for m in annotatedmentions[id]:
                                # Check the mentions that match partially
                                if o in m:
                                    partialmentions += 1
                                    scored = True
                                    break
                            # Check the mentions formed by more than one entities of the golden set
                            if scored == False and multiword.has_key(id):
                                #print multiword[id]
                                for multientity in multiword[id]:
                                    #print multientity, o
                                    if multientity in o:
                                        partialmentions += 1
                                        #print id,",",multientity,",", o

                    totalmentions += 1
            score = float(fullmentions) / float(totalmentions)
            partialscore = float(fullmentions +
                                 partialmentions) / float(totalmentions)
            #print "Full Mentions Precision: ",fullmentions, totalmentions, score
            #print "Full+Partial Mentions Precision: ",fullmentions+partialmentions, totalmentions, partialscore
            return score, partialscore

        def recall():
            totalmentions = 0
            fullmentions = 0
            partialmentions = 0
            resultmentions = {}

            # Extracts the mentions obtained by the system
            for s, p, o in r:

                if p.endswith('anchorOf'):
                    id = s.split('#')[0]
                    if not resultmentions.has_key(id):
                        resultmentions[id] = []
                    resultmentions[id].append(o)

            # Compares the mentions obtained by the system with the ones annotated in the golden set
            for s, p, o in a:

                if p.endswith('anchorOf'):
                    id = s.split('#')[0]
                    # Checks the mentions that match fully
                    if id in resultmentions.keys() and o in resultmentions[id]:
                        fullmentions += 1
                    elif id in resultmentions.keys():
                        scored = False
                        for m in resultmentions[id]:
                            # Check the mentions that match partially
                            if m in o:
                                partialmentions += 1
                                scored = True
                                break
                        # Check the mentions formed by more than one entities of the golden set
                        if scored == False and multiword.has_key(id):
                            #print multiword[id]
                            for multientity in multiword[id]:
                                #print multientity, o
                                if multientity in o:
                                    partialmentions += 1
                                    #print id,",",multientity,",", o

                    totalmentions += 1
            score = float(fullmentions) / float(totalmentions)
            partialscore = float(fullmentions +
                                 partialmentions) / float(totalmentions)
            #print "Full Mentions Recall: ",fullmentions, totalmentions, score
            #print "Full+Partial Mentions Recall: ",fullmentions+partialmentions, totalmentions, partialscore
            return score, partialscore

        def f1():
            fullprec, partialprec = precision()
            fullrec, partialrec = recall()
            results = "Full Mentions Precision: " + str(
                fullprec) + "\nFull+Partial Mentions Precision: " + str(
                    partialprec)
            results += "\nFull Mentions Recall: " + str(
                fullrec) + "\nFull+Partial Mentions Recall: " + str(partialrec)
            finalresults = "\nFull Mentions F1: " + str(
                2 * fullprec * fullrec /
                (fullprec + fullrec)) + "\nFull+Partial Mentions F1: " + str(
                    2 * partialprec * partialrec / (partialprec + partialrec))
            return results + finalresults

    return f1()
Beispiel #28
0
    def load_rdf_from_content(self, rdf_content, _format='n3'):
        graph_in_memory = rdflib.Graph("IOMemory")

        return graph_in_memory.parse(data=rdf_content, format=_format)
Beispiel #29
0
    def convert(self):
        self.ont2wb = rdflib.Graph()
        if os.path.exists(self.link_graph_file):
            self.ont2wb.load(self.link_graph_file, format='turtle')
        else:
            self.create_subst_property(
                RDFS.subClassOf, 'P279', 'subClassOf',
                'item')  # https://www.wikidata.org/wiki/Property:P279
            self.create_subst_property(
                RDFS.subPropertyOf, 'P1647', 'subPropertyOf',
                'property')  # https://www.wikidata.org/wiki/Property:P1647
            #self.create_subst_property(SCHEMA.domain, SCHEMA.identifier, '')) #
            #self.create_subst_property(SCHEMA.range, SCHEMA.identifier, '')) # -> datatype
            self.create_subst_property(
                SCHEMA.inLanguage, 'P305', 'inLanguage',
                None)  # https://www.wikidata.org/wiki/Property:P305
            self.create_subst_property(
                SCHEMA.version, 'P348', 'version',
                None)  # https://www.wikidata.org/wiki/Property:P348
            self.create_subst_property(
                SCHEMA.isBasedOn, 'P144', 'isBasedOn',
                'property')  # https://www.wikidata.org/wiki/Property:P144
            self.create_subst_property(
                SCHEMA.copyrightHolder, 'P3931', 'copyrightHolder',
                'item')  # https://www.wikidata.org/wiki/Property:P3931
            self.create_subst_property(
                SCHEMA.licenseDeclared, 'P2479', 'licenseDeclared',
                'item')  # https://www.wikidata.org/wiki/Property:P2479
            self.create_subst_property(
                SCHEMA.creativeWorkStatus, 'P548', 'creativeWorkStatus', None
            )  # https://www.wikidata.org/wiki/Property:P548 - aka version type
            self.create_subst_property(
                SCHEMA.image, 'P4765', 'image', None
            )  # https://www.wikidata.org/wiki/Property:P4765 - aka Commons compatible image available at URL
            self.create_subst_property(
                SCHEMA.hasPart, 'P527', 'hasPart', 'item'
            )  # https://www.wikidata.org/wiki/Property:P527 - has part
            #self.create_subst_property(SCHEMA.hasPart, 'P2670', '', True) # https://www.wikidata.org/wiki/Property:P2670 - has parts of the class
            self.create_subst_property(
                SCHEMA.codeRepository, 'P1324', 'sourceCodeRepository', None
            )  # https://www.wikidata.org/wiki/Property:P1324 - source code repository
            self.create_subst_property(
                SCHEMA.value, 'P8203', 'supportedMetaData', None
            )  # https://www.wikidata.org/wiki/Property:P8203 -  aka supported Metadata
            self.create_subst_property(
                OBO.BFO_0000016, 'P7535', 'scopeAndContent', None
            )  # function -> https://www.wikidata.org/wiki/Property:P7535 - aka scope and content
            self.create_subst_property(
                SCHEMA.amount, 'P1114', 'quantity', None
            )  # https://www.wikidata.org/wiki/Property:P1114 -  aka quantity
            self.create_subst_item(
                SCHEMA.URL, 'QXXXXXXX', 'URL', None
            )  # https://www.wikidata.org/wiki/Property:P2699 -  aka URL
            self.create_subst_property(SPDX.licenseDeclared, 'PXXXXXXXX',
                                       'licenseDeclared', None)
            self.create_subst_property(SCHEMA.fileFormat, 'PXXXXXX',
                                       'fileFormat', None)

        # create the items and properties
        for subj in self.graph.subjects():
            if self.skip_subj(subj):
                continue
            wb_ids = list(self.ont2wb.objects(subj, SCHEMA.identifier))
            wb_id = wb_ids[0] if len(wb_ids) > 0 else None
            if wb_id is None:
                print('- Creating WB part for subject "%s" ...' % subj)
                wb_id = self.create_ont_wb_thing(subj)
                self.ont2wb.add(
                    (subj, SCHEMA.identifier, rdflib.Literal(wb_id)))
            #else: # XXX We might want to recreate it here, anyway!
            print('- Subject "%s" is represented by "%s"' % (subj, wb_id))

        self.ont2wb.serialize(self.link_graph_file, format='turtle')

        # Create the connections/predicates/claims
        for subj in self.graph.subjects():
            if self.skip_subj(subj):
                continue
            wb_ids = list(self.ont2wb.objects(subj, SCHEMA.identifier))
            wb_id = wb_ids[0]
            if isinstance(wb_id, rdflib.Literal):
                wb_id = str(wb_id)
            for _, pred, obj in self.graph.triples((subj, None, None)):
                if pred == RDFS.range:
                    print('XXX range')
                elif pred == RDFS.domain:
                    print('XXX domain')
                else:
                    self.create_claim(wb_id, subj, pred, obj)
Beispiel #30
0
 def p():
     rdflib.Graph().parse(data=data)