Ejemplo n.º 1
0
def get_degrees(directory_in_str):
    stats = {}
    graph = Graph()
    pathlist = Path(directory_in_str).glob('**/*.rdf')
    nodes = set()
    edges = []
    for path in pathlist:
        path_in_str = str(path)
        graph.parse(path_in_str)

    stats['length'] = len(graph)
    stats['num_of_nodes'] = len(graph.all_nodes())
    #print('graph length: %d, nodes: %d' % (len(graph), len(graph.all_nodes())))

    #print ('all node out-degrees:')
    in_degrees = {}
    out_degrees = {}
    for node in graph.all_nodes():
        out_triple = graph.triples([node, None, None])
        out_degree = len(list(out_triple))
        node_id = urllib.quote_plus(node.encode('utf-8'))
        out_degrees[node_id] = out_degree

        in_triple = graph.triples([None, None, node])
        in_degree = len(list(in_triple))
        in_degrees[node_id] = in_degree

    stats['in_degree'] = in_degrees
    stats['out_degree'] = out_degrees

    return stats
Ejemplo n.º 2
0
def test_issue494_collapsing_bnodes():
    """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes"""
    g = Graph()
    g += [
        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
         RDF['object'],
         URIRef(u'source')),
        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
         RDF['predicate'],
         BNode('vcb3')),
        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
         RDF['subject'],
         BNode('vcb2')),
        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
         RDF['type'],
         RDF['Statement']),
        (BNode('Na713b02f320d409c806ff0190db324f4'),
         RDF['object'],
         URIRef(u'target')),
        (BNode('Na713b02f320d409c806ff0190db324f4'),
         RDF['predicate'],
         BNode('vcb0')),
        (BNode('Na713b02f320d409c806ff0190db324f4'),
         RDF['subject'],
         URIRef(u'source')),
        (BNode('Na713b02f320d409c806ff0190db324f4'),
         RDF['type'],
         RDF['Statement']),
        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
         RDF['object'],
         BNode('vr0KcS4')),
        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
         RDF['predicate'],
         BNode('vrby3JV')),
        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
         RDF['subject'],
         URIRef(u'source')),
        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
         RDF['type'],
         RDF['Statement']),
        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
         RDF['object'],
         URIRef(u'source')),
        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
         RDF['predicate'],
         BNode('vcb5')),
        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
         RDF['subject'],
         URIRef(u'target')),
        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
         RDF['type'],
         RDF['Statement']),
        (BNode('Nec6864ef180843838aa9805bac835c98'),
         RDF['object'],
         URIRef(u'source')),
        (BNode('Nec6864ef180843838aa9805bac835c98'),
         RDF['predicate'],
         BNode('vcb4')),
        (BNode('Nec6864ef180843838aa9805bac835c98'),
         RDF['subject'],
         URIRef(u'source')),
        (BNode('Nec6864ef180843838aa9805bac835c98'),
         RDF['type'],
         RDF['Statement']),
    ]

    print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes())))
    print('triple_bnode degrees:')
    for triple_bnode in g.subjects(RDF['type'], RDF['Statement']):
        print(len(list(g.triples([triple_bnode, None, None]))))
    print('all node degrees:')
    g_node_degs = sorted([
        len(list(g.triples([node, None, None])))
        for node in g.all_nodes()
    ], reverse=True)
    print(g_node_degs)

    cg = to_canonical_graph(g)
    print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())))
    print('triple_bnode degrees:')
    for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']):
        print(len(list(cg.triples([triple_bnode, None, None]))))
    print('all node degrees:')
    cg_node_degs = sorted([
        len(list(cg.triples([node, None, None])))
        for node in cg.all_nodes()
    ], reverse=True)
    print(cg_node_degs)

    assert len(g) == len(cg), \
        'canonicalization changed number of triples in graph'
    assert len(g.all_nodes()) == len(cg.all_nodes()), \
        'canonicalization changed number of nodes in graph'
    assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
        len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
        'canonicalization changed number of statements'
    assert g_node_degs == cg_node_degs, \
        'canonicalization changed node degrees'
Ejemplo n.º 3
0
    predicate2 = URIRef(f"http://xmlns.com/foaf/0.1/votes")
    predicate3 = URIRef(f"http://xmlns.com/foaf/0.1/episode")
    with open(f'ratings.list', encoding='latin-1') as f:
        for line_number, line in enumerate(f):
            info = re.match(
                '^ +[0-9.]+ +([0-9]+) +([0-9.]+) +"?([^"\n]+)"? \(([0-9]+)[^\)]*\)( {(.+)})?',
                line)
            if info:
                # print(info.group(1),info.group(2),info.group(3))
                year_string = info.group(4).strip()
                if year_string == str(year):
                    movie_string = info.group(3).strip()
                    movie_name = Literal(movie_string, datatype=XSD.string)
                    movie = URIRef(
                        f"http://imdb.org/movie/{urllib.parse.quote(movie_string)}"
                    )
                    g.add((movie, FOAF.name, movie_name))
                    rating_string = info.group(2).strip()
                    rating_name = Literal(rating_string, datatype=XSD.integer)
                    g.add((movie, predicate1, rating_name))
                    votes_string = info.group(1).strip()
                    votes_name = Literal(votes_string, datatype=XSD.float)
                    g.add((movie, predicate2, votes_name))
                    if info.group(6):
                        episode_string = info.group(6).strip()
                        episode_name = Literal(episode_string)
                        g.add((movie, predicate3, episode_name))

    print(len(g.all_nodes()))
    os.makedirs("ratings-data/", exist_ok=True)
    g.serialize(destination=f"ratings-data/ratings-{year}.nt", format='nt')
Ejemplo n.º 4
0
def test_issue725_collapsing_bnodes_2():
    g = Graph()
    g += [
        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
         BNode('v2')),
        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
         BNode('v0')),
        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
         URIRef(u'urn:gp_learner:fixed_var:target')),
        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
         BNode('v1')),
        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
         BNode('v0')),
        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
         URIRef(u'urn:gp_learner:fixed_var:target')),
        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
         BNode('v5')),
        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
         BNode('v4')),
        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
         URIRef(u'urn:gp_learner:fixed_var:target')),
        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
        (BNode('N86ac7ca781f546ae939b8963895f672e'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
         URIRef(u'urn:gp_learner:fixed_var:source')),
        (BNode('N86ac7ca781f546ae939b8963895f672e'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
         BNode('v0')),
        (BNode('N86ac7ca781f546ae939b8963895f672e'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
         URIRef(u'urn:gp_learner:fixed_var:target')),
        (BNode('N86ac7ca781f546ae939b8963895f672e'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
         BNode('v1')),
        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
         BNode('v3')),
        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
         URIRef(u'urn:gp_learner:fixed_var:target')),
        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement'))
    ]

    turtle = '''
    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix xml: <http://www.w3.org/XML/1998/namespace> .
    @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

    [] a rdf:Statement ;
        rdf:object [ ] ;
        rdf:predicate _:v0 ;
        rdf:subject <urn:gp_learner:fixed_var:target> .

    [] a rdf:Statement ;
        rdf:object _:v1 ;
        rdf:predicate _:v0 ;
        rdf:subject <urn:gp_learner:fixed_var:target> .

    [] a rdf:Statement ;
        rdf:object [ ] ;
        rdf:predicate [ ] ;
        rdf:subject <urn:gp_learner:fixed_var:target> .

    [] a rdf:Statement ;
        rdf:object <urn:gp_learner:fixed_var:source> ;
        rdf:predicate _:v0 ;
        rdf:subject <urn:gp_learner:fixed_var:target> .

    [] a rdf:Statement ;
        rdf:object _:v1 ;
        rdf:predicate [ ] ;
        rdf:subject <urn:gp_learner:fixed_var:target> .'''

    # g = Graph()
    # g.parse(data=turtle, format='turtle')

    stats = {}
    cg = rdflib.compare.to_canonical_graph(g, stats=stats)

    # print ('graph g length: %d, nodes: %d' % (len(g), len(g.all_nodes())))
    # print ('triple_bnode degrees:')
    # for triple_bnode in g.subjects(rdflib.RDF['type'], rdflib.RDF['Statement']):
    #     print (len(list(g.triples([triple_bnode, None, None]))))
    # print ('all node out-degrees:')
    # print (sorted(
    #     [len(list(g.triples([node, None, None]))) for node in g.all_nodes()]))
    # print ('all node in-degrees:')
    # print (sorted(
    #     [len(list(g.triples([None, None, node]))) for node in g.all_nodes()]))
    # print(g.serialize(format='n3'))
    #
    # print ('graph cg length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())))
    # print ('triple_bnode degrees:')
    # for triple_bnode in cg.subjects(rdflib.RDF['type'],
    #                                 rdflib.RDF['Statement']):
    #     print (len(list(cg.triples([triple_bnode, None, None]))))
    # print ('all node out-degrees:')
    # print (sorted(
    #     [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()]))
    # print ('all node in-degrees:')
    # print (sorted(
    #     [len(list(cg.triples([None, None, node]))) for node in cg.all_nodes()]))
    # print(cg.serialize(format='n3'))

    assert (len(g.all_nodes()) == len(cg.all_nodes()))

    cg = to_canonical_graph(g)
    assert len(g) == len(cg), \
        'canonicalization changed number of triples in graph'
    assert len(g.all_nodes()) == len(cg.all_nodes()), \
        'canonicalization changed number of nodes in graph'
    assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
           len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
        'canonicalization changed number of statements'

    # counter for subject, predicate and object nodes
    g_pos_counts = Counter(), Counter(), Counter()
    for t in g:
        for i, node in enumerate(t):
            g_pos_counts[i][t] += 1
    g_count_signature = [sorted(c.values()) for c in g_pos_counts]

    cg_pos_counts = Counter(), Counter(), Counter()
    for t in cg:
        for i, node in enumerate(t):
            cg_pos_counts[i][t] += 1
    cg_count_signature = [sorted(c.values()) for c in cg_pos_counts]

    assert g_count_signature == cg_count_signature, \
        'canonicalization changed node position counts'
Ejemplo n.º 5
0
def test_issue494_collapsing_bnodes():
    """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes"""
    g = Graph()
    g += [
        (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["object"],
         URIRef("source")),
        (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["predicate"],
         BNode("vcb3")),
        (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["subject"],
         BNode("vcb2")),
        (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["type"],
         RDF["Statement"]),
        (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["object"],
         URIRef("target")),
        (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["predicate"],
         BNode("vcb0")),
        (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["subject"],
         URIRef("source")),
        (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["type"],
         RDF["Statement"]),
        (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["object"],
         BNode("vr0KcS4")),
        (
            BNode("Ndb804ba690a64b3dbb9063c68d5e3550"),
            RDF["predicate"],
            BNode("vrby3JV"),
        ),
        (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["subject"],
         URIRef("source")),
        (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["type"],
         RDF["Statement"]),
        (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["object"],
         URIRef("source")),
        (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["predicate"],
         BNode("vcb5")),
        (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["subject"],
         URIRef("target")),
        (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["type"],
         RDF["Statement"]),
        (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["object"],
         URIRef("source")),
        (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["predicate"],
         BNode("vcb4")),
        (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["subject"],
         URIRef("source")),
        (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["type"],
         RDF["Statement"]),
    ]

    # print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes())))
    # print('triple_bnode degrees:')
    # for triple_bnode in g.subjects(RDF['type'], RDF['Statement']):
    #     print(len(list(g.triples([triple_bnode, None, None]))))
    # print('all node degrees:')
    g_node_degs = sorted(
        [len(list(g.triples([node, None, None]))) for node in g.all_nodes()],
        reverse=True,
    )
    # print(g_node_degs)

    cg = to_canonical_graph(g)
    # print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())))
    # print('triple_bnode degrees:')
    # for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']):
    #     print(len(list(cg.triples([triple_bnode, None, None]))))
    # print('all node degrees:')
    cg_node_degs = sorted(
        [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()],
        reverse=True,
    )
    # print(cg_node_degs)

    assert len(g) == len(
        cg), "canonicalization changed number of triples in graph"
    assert len(g.all_nodes()) == len(
        cg.all_nodes()), "canonicalization changed number of nodes in graph"
    assert len(list(g.subjects(RDF["type"], RDF["Statement"]))) == len(
        list(cg.subjects(RDF["type"], RDF["Statement"]))
    ), "canonicalization changed number of statements"
    assert g_node_degs == cg_node_degs, "canonicalization changed node degrees"

    # counter for subject, predicate and object nodes
    g_pos_counts = Counter(), Counter(), Counter()
    for t in g:
        for i, node in enumerate(t):
            g_pos_counts[i][t] += 1
    g_count_signature = [sorted(c.values()) for c in g_pos_counts]

    cg = to_canonical_graph(g)
    cg_pos_counts = Counter(), Counter(), Counter()
    for t in cg:
        for i, node in enumerate(t):
            cg_pos_counts[i][t] += 1
    cg_count_signature = [sorted(c.values()) for c in cg_pos_counts]

    assert (g_count_signature == cg_count_signature
            ), "canonicalization changed node position counts"
Ejemplo n.º 6
0
from rdflib import Graph, Namespace, OWL, RDF, RDFS, URIRef
from glob import glob
import os

PATH = "/Users/hoekstra/Dropbox/projects/designpatterns_stats/*.owl"
DESTINATION = "/Users/hoekstra/projects/designpatterns/stripped"

for f in glob(PATH):
    (_, target_name) = os.path.split(f)

    g = Graph()
    g.parse(f, format='turtle')
    print g.serialize(format='turtle')
    g.remove((None, None, OWL['AnnotationProperty']))
    print g.serialize(format='turtle')
    print list(g.all_nodes())
    annotations = [
        uri for uri in list(g.all_nodes()) if isinstance(uri, URIRef) and
        'http://www.ontologydesignpatterns.org/schemas/cpannotationschema.owl#'
        in str(uri)
    ]
    print annotations
    break
Ejemplo n.º 7
0
def check(path):

    rdfs_graph = Graph()
    rdfs_graph.load('rdf-schema.ttl', format='turtle')

    rdfs_nodes = list(rdfs_graph.all_nodes())

    with open('{}/grading.csv'.format(path), 'w') as out:
        fieldnames = [
            'Username', 'Assignment 2b |846597', 'Assignment 2c |846599',
            'query', 'syntax', 'asserted', 'inferred', 'baseline',
            'subjects_objects', 'predicates', 'inferred through schema', 'hash'
        ]
        fieldnames += [
            os.path.basename(fn) for fn in glob('../constraints/*.rq')
        ]

        writer = csv.DictWriter(out, fieldnames)
        writer.writeheader()

        for f in glob("{}/*.ttl".format(path)):

            (basename, ext) = os.path.splitext(os.path.basename(f))

            basename = basename.split('_')[-1]
            line = {'Username': basename}

            print "==========\n{}\n==========".format(basename)

            try:
                with open(f, 'r') as fi:
                    contents = fi.readlines()
                    h = md5.new()
                    h.update(''.join(contents))
                    hexdigest = h.hexdigest()
            except:
                traceback.print_exc()
                print "Could not create hash of {}".format(f)

            g = Graph()
            try:
                g.load(f, format='turtle')
                line['syntax'] = 1
            except Exception:

                print "Could not parse {}".format(f)
                line['syntax'] = 0
                print(traceback.format_exc())

            # Baseline is:
            # 1. for every *new* subject, predicate or object that is a URIRef,
            #    a new triple is generated by inference rule 1
            # 2. for every *new* predicate, one additional triple is produced (subproperty of itself)
            # 3. for rdf:Property 2 more triples that define it.
            # 4. for rdf:type 2 more triples

            subjects_objects = len(
                set([
                    so for so in
                    [so for so in g.all_nodes() if type(so) == URIRef]
                    if so not in rdfs_nodes
                ]))
            predicates = len(set([p for p in g.predicates()]))
            baseline = subjects_objects + 2 * predicates + 2 + 2

            # Only count the asserted triples that do not define any RDFS or RDF terms, or specify that some subject is of type RDFS Class or Property.
            asserted_triples = [(s, p, o)
                                for (s, p, o) in g.triples((None, None, None))
                                if s not in rdfs_nodes and o not in rdfs_nodes]

            # for (s,p,o) in asserted_triples:
            #     if type(o) == URIRef:
            #         print g.qname(s), g.qname(p), g.qname(o)
            #     else:
            #         print g.qname(s), g.qname(p), o

            asserted = len(asserted_triples)

            for constraint_file in glob('../constraints/*.rq'):
                with open(constraint_file) as cf:
                    query = cf.read()

                constraint = os.path.basename(constraint_file)
                result = g.query(query)

                count = 0
                for r in result:
                    count += 1

                line[constraint] = count
                # print "{}: {}".format(constraint, count)

            try:
                DeductiveClosure(RDFS_Semantics,
                                 rdfs_closure=True,
                                 axiomatic_triples=False,
                                 datatype_axioms=False).expand(g)
            except:
                traceback.print_exc()

            inferred = len([(s, p, o)
                            for (s, p, o) in g.triples((None, None, None))])
            class_use = len(
                set([
                    s for (s, p, o) in g.triples((None, RDF.type, None))
                    if o not in rdfs_nodes
                ]))
            property_use = len(set([p for (s, p, o) in asserted_triples]))

            try:
                with codecs.open('{}/{}.rq'.format(path, basename),
                                 "r",
                                 encoding='utf-8-sig',
                                 errors='ignore') as qf:
                    query = qf.read()

                # Remove CR
                query = query.replace('\r\n', '\n')
                # try:
                #     query = query.decode("utf-8-sig").encode('utf-8')
                # except:
                #     print "..."

                # This kills the RDFLib parser
                query = query.replace('prefix', 'PREFIX')

                try:
                    # adding triples to database
                    all_triples = g.serialize(format='turtle')
                except:
                    # but if something's wrong with the original graph, just use an empty string
                    all_triples = ""

                update(all_triples)

                # running query
                qresults = sparql(query)

                # If stardog gives an error, we set the qcount to -1,
                # and make sure that if rdflib does better, we overwrite that,
                # otherwise, we fallback to the -1 qcount of stardog.

                if qresults != -1:
                    # counting results
                    stardog_qcount = 0
                    for r in qresults:
                        stardog_qcount += 1
                else:
                    stardog_qcount = -1

                # clearing database
                update(all_triples, action='clear')

                try:
                    qresults = g.query(query)
                    qcount = 0
                    for r in qresults:
                        qcount += 1

                    # Use whichever is higher
                    if stardog_qcount > qcount:
                        qcount = stardog_qcount
                except:
                    qcount = stardog_qcount

            except IOError:
                print "Could not find query"
                qcount = -2
            except:
                print "Query failed"
                try:
                    print query
                except:
                    "..."
                print(traceback.format_exc())
                qcount = -1

            line['query'] = qcount

            line[
                'asserted'] = asserted  # asserted triples that could not be inferred,
            line['inferred'] = inferred  # total triples after inference,
            line[
                'baseline'] = baseline  # minimal expected number of new triples (baseline) for file without schema
            line['subjects_objects'] = subjects_objects
            line['predicates'] = predicates
            line[
                'inferred through schema'] = inferred - baseline - asserted  # triples inferred through the schema
            line['hash'] = hexdigest

            line = grade(line)

            writer.writerow(line)
            del (g)
Ejemplo n.º 8
0
class Model:

    def __init__(self):
        self.graph = Graph()
        self.loaded = set()

    def load(self, source, format=None):
        if source not in self.loaded:
            self.loaded.add(source)
            try:
                self.graph.parse(source, format=format)
            except Exception as e:
                print e
                return False
        return True

    def size(self):
        return len(self.graph)

    def pred(self, subj):
        return list(set(self.graph.predicates(subj)))

    def types(self):
        return set(self.graph.objects(predicate=RDF.type))

    def contains_resource(self, ref):
        resources = filter(lambda x: type(x) == URIRef, self.graph.all_nodes())
        return ref in resources

    def get_resource_objects(self, subj, pred):
        return filter(lambda x: type(x) == URIRef, self.graph.objects(subj, pred))

    def get_objects(self, subj, pred):
        return list(self.graph.objects(subj, pred))

    def get_subjects(self, pred, obj):
        return list(self.graph.subjects(pred, obj))

    def get_properties(self, subj):
        properties = {}
        for pred, obj in self.graph.predicate_objects(subj):
            if pred in properties:
                properties[pred].append(obj)
            else:
                properties[pred] = [obj]
        return properties

    def get_reverse_properties(self, obj):
        properties = {}
        for subj, pred in self.graph.subject_predicates(obj):
            if pred in properties:
                properties[pred].append(subj)
            else:
                properties[pred] = [subj]
        return properties

    def norm(self, ref):
        return self.graph.namespace_manager.normalizeUri(ref) if ref else None

    def to_uriref(self, string):
        """Expand QName to UriRef based on existing namespaces."""
        if not string:
            return None
        elif re.match('[^:/]*:[^:/]+', string):
            prefix, name = string.split(':')
            try:
                namespace = dict(self.graph.namespaces())[prefix]
                return namespace + name
            except:
                return None
        else:
            return URIRef(string)
Ejemplo n.º 9
0
def generateRDF(outf,
                softuri,
                tooldictf,
                softdictf=None,
                tooluris=True,
                normalize=normalizeArcpyToolString,
                toolwebsites='ResourceCatalogue\\ArcGIStoolwebsites.json'):
    from rdflib import URIRef, BNode, Literal, Namespace, Graph
    from rdflib.namespace import RDF, FOAF, RDFS

    dbo = Namespace("http://dbpedia.org/ontology/")
    dbp = Namespace("http://dbpedia.org/resource/")
    dc = Namespace("http://purl.org/dc/elements/1.1/")
    dct = Namespace("http://purl.org/dc/terms/")
    wf = Namespace("http://geographicknowledge.de/vocab/Workflow.rdf#")
    gis = Namespace("http://geographicknowledge.de/vocab/GISConcepts.rdf#")
    tools = Namespace("http://geographicknowledge.de/vocab/GISTools.rdf#")

    tdict = readToolBoxes(tooldictf)
    g = Graph()

    if softdictf != None:
        softdict = readSoft(softdictf)
        softwarelist = []
        for software, v in softdict.items():
            softwarelist.append(software)
            g.add((URIRef(software), RDF.type, dbo.Software))
            if 'name' in v.keys():
                g.add((URIRef(software), FOAF['name'], Literal(v['name'])))
            if 'website' in v.keys():
                g.add((URIRef(software), FOAF['isPrimaryTopicOf'],
                       URIRef(v['website'])))
            if v['companies'] != []:
                g.add((URIRef(software), dbo.developer,
                       URIRef(v['companies'][0])))
        w = getWebsite(softwarelist)
        if w != []:
            for ww in w:
                g.add((URIRef(ww[1]), FOAF['homepage'], URIRef(ww[0])))
    else:  #there is already some software tools, then load them
        g.parse(outf, format='turtle')

    toolws = readSoft(toolwebsites)

    #Now add the tools of some software softuri
    if URIRef(softuri) in g.all_nodes():
        for toolbox, toollist in tdict.items():
            tb = urllib.pathname2url(toolbox)
            g.add((tools[tb], RDF.type, gis.Toolbox))
            g.add((tools[tb], dct.isPartOf, URIRef(softuri)))
            g.add((tools[tb], FOAF.name, Literal(toolbox)))
            for t in toollist:
                toolst = (URIRef(t) if tooluris else tools[t])
                if t in toolws:
                    g.add((toolst, FOAF['homepage'],
                           URIRef(toolws[t]['website'])))
                g.add((toolst, RDF.type, gis.Tool))
                g.add((toolst, dct.isPartOf, tools[tb]))
                g.add((toolst, FOAF.name, Literal(normalize(t))))

    g.bind('dbo', URIRef("http://dbpedia.org/ontology/"))
    g.bind('dbp', URIRef("http://dbpedia.org/resource/"))
    g.bind('dc', URIRef("http://purl.org/dc/elements/1.1/"))
    g.bind('dct', URIRef("http://purl.org/dc/terms/"))
    g.bind('wf', URIRef("http://geographicknowledge.de/vocab/Workflow.rdf#"))
    g.bind('gis',
           URIRef("http://geographicknowledge.de/vocab/GISConcepts.rdf#"))
    g.bind('tools',
           URIRef("http://geographicknowledge.de/vocab/GISTools.rdf#"))
    g.bind('foaf', FOAF)
    g.bind('rdf', RDF)
    g.bind('rdfs', RDFS)
    print 'number of triples generated: ' + str(len(g))
    g.serialize(destination=outf, format='turtle')