Python ConjunctiveGraph.load Exemples, rdflib.ConjunctiveGraph.load Python Exemples

Exemple #1

0

Afficher le fichier

def get_all_measurement_types(ontology_file):
    graph = ConjunctiveGraph()
    graph.load(ontology_file, format="n3")
    query_str = '''SELECT DISTINCT ?mt ?label ?comment ?defn
        WHERE {
          ?mt rdfs:label ?label .
          ?mt rdfs:subClassOf <%s> .
          ?mt rdfs:subClassOf ?r1 .
          ?r1 owl:onProperty oboe:measuresEntity ; owl:someValuesFrom ?ent .
          ?mt rdfs:subClassOf ?r2 .
          ?r2 owl:onProperty oboe:measuresCharacteristic ; owl:someValuesFrom ?char .
          OPTIONAL { ?mt rdfs:comment ?comment }
          OPTIONAL { ?mt skos:definition ?defn }
        }''' % (MeasurementType)
    qres = list(graph.query(query_str, initNs=dict(oboe=URIRef("http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#"),
                                                   owl=OWL,rdfs=RDFS,skos=SKOS)))
    if len(qres) > 0:
        qres.sort(key=lambda x: x[0], reverse=True)
        result = dict()
        i = 0
        for row in qres:
            result[i] = {'uri' : row[0], 'label' : row[1], 'comment' : row[2], 'defn' : row[3]}
            i = i + 1
        print "Sparql query finished!"
        return result
    return None

Exemple #2

0

Afficher le fichier

Fichier : configuration.py Projet : mpetyx/djubby

    def __init__(self, path=None):
        self.__dict__ = self.__shared_state
        if (self.data == None):
            if (path == None):
                raise ValueError("djubby's configuration MUST be initialized a first time, read http://code.google.com/p/djubby/wiki/GettingStarted")
            else:
                self.path = os.path.abspath(path)
                logging.debug("Reading djubby's configuration from %s..." % self.path)
                if (not os.path.exists(self.path)):
                    raise ValueError("Not found a proper file at '%s' with a configuration for djubby. Please, provide a right path" % self.path)

                data = ConjunctiveGraph()
                data.bind("conf", ns.config) 
                try:
                    data.load(path, format='n3') 
                except Exception, e:
                    raise ValueError("Not found a proper N3 file at '%s' with a configuration for djubby. Please, provide a valid N3 file" % self.path)

                self.data = data
                try:
                    self.graph = self.get_value("sparqlDefaultGraph")
                    self.endpoint = self.get_value("sparqlEndpoint")
                except Exception, e:
                    raise ValueError("Not found the graph not the endpoint that it's supposed djubby have to query. Please, provide a right donfiguration")

                logging.info("Using <%s> as default graph to query the endpoint <%s>" % (self.graph, self.endpoint))
                self.__class__.__dict__['_Configuration__shared_state']["data"] = data #FIXME

Exemple #3

0

Afficher le fichier

Fichier : litfeed.py Projet : bdarcus/LitFeed

def update():
    """
    Update the library with new articles.
    """
    graph = ConjunctiveGraph()
    # load the existing graph
    library = 'data/articles.rdf'
    graph.load(library)

    feeds = {
        "http://www3.interscience.wiley.com/rss/journal/118485807": "wiley.xsl",
        "http://phg.sagepub.com/rss/current.xml": "sage.xsl",
        "http://www.informaworld.com/ampp/rss~content=t713446924": "infoworld.xsl",
        "http://www.informaworld.com/ampp/rss~content=t788352614": "infoworld.xsl",
        "http://www.envplan.com/rss.cgi?journal=D": "envplan.xsl",
        "http://www.envplan.com/rss.cgi?journal=A": "envplan.xsl",
        "http://cgj.sagepub.com/rss/current.xml": "sage.xsl"
        }

    for feed, stylesheet in feeds.iteritems():
        # grab the feed and transform it
        print "grabbing ", feed
        new = StringIO.StringIO(feed_transform(feed, stylesheet))
        # merge the new triples into the graph
        graph.parse(new)
        new.close()

    graph.serialize(library, format='pretty-xml')

Exemple #4

0

Afficher le fichier

def catalyst_graph_for(file):
    if file.startswith('/'):
        file = 'file://'+file
    logging.info("InferenceStore catalyst_graph_for started")
        
    # quads = jsonld.to_rdf(file, {'format': 'application/nquads'})
    logging.info("InferenceStore JSON-LD loaded")

    g = ConjunctiveGraph()
    g.namespace_manager = namespace_manager
    # g.parse(data=quads, format='nquads')
    g.load(file, format="json-ld")
    logging.info("InferenceStore base graph loaded")

    f = FuXiInferenceStore.get_instance()

    # get the inference engine
    cl = f.get_inference(g)
    logging.info("InferenceStore inference graph loaded")

    union_g = rdflib.ConjunctiveGraph()

    for s,p,o in g.triples( (None, None, None) ):
       union_g.add( (s,p,o) )

    for s,p,o in cl.triples( (None, None, None) ):
       union_g.add( (s,p,o) )

    logging.info("InferenceStore union graph prepared")

    return union_g

Exemple #5

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : trishnaguha/rdfextras

class RecursionTests(unittest.TestCase):
    # debug = True
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')

    def test_simple_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(BASIC_KNOWS_DATA), format='n3')
        results = graph.query(KNOWS_QUERY, processor="sparql", DEBUG=False)
        results = set(results)
        person1 = URIRef('ex:person.1')
        person2 = URIRef('ex:person.2')
        nose.tools.assert_equal(
            results,
            set([(person1, None), (person1, Literal('person 3')),
                 (person2, Literal('person 3'))]))

    def test_secondary_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(SUBCLASS_DATA), format='n3')
        results = graph.query(SUBCLASS_QUERY, processor="sparql", DEBUG=False)
        results = set(results)
        ob = URIRef('ex:ob')
        class1 = URIRef('ex:class.1')
        class2 = URIRef('ex:class.2')
        class3 = URIRef('ex:class.3')
        nose.tools.assert_equal(
            results, set([(ob, class1), (ob, class2), (ob, class3)]))

Exemple #6

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : RDFLib/rdfextras

class RecursionTests(unittest.TestCase):
    # debug = True
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')

    def test_simple_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(BASIC_KNOWS_DATA), format='n3')
        results = graph.query(KNOWS_QUERY,
                              processor="sparql", 
                              DEBUG=False)
        results = set(results)
        person1 = URIRef('ex:person.1')
        person2 = URIRef('ex:person.2')
        nose.tools.assert_equal(
          results,
          set([(person1, None), (person1, Literal('person 3')),
               (person2, Literal('person 3'))]))

    def test_secondary_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(SUBCLASS_DATA), format='n3')
        results = graph.query(SUBCLASS_QUERY,
                              processor="sparql", 
                              DEBUG=False)
        results = set(results)
        ob = URIRef('ex:ob')
        class1 = URIRef('ex:class.1')
        class2 = URIRef('ex:class.2')
        class3 = URIRef('ex:class.3')
        nose.tools.assert_equal(
          results,
          set([(ob, class1), (ob, class2), (ob, class3)]))

Exemple #7

0

Afficher le fichier

    def runTest(self):
        testfile = self.testbase + ".htm"
        resultsf = self.testbase + ".ttl"
        self.failIf(not os.path.isfile(resultsf),
                    "missing expected results file.")

        store1 = RGraph()
        store1.load(resultsf, publicID=self.pubId, format="n3")
        pcontents = store1.serialize(format='nt')
        pg = Graph()
        for a, b, c in store1:
            pg.triples.add(tuple(map(self.nodeToString, (a, b, c))))
            #print tuple(map(self.nodeToString, (a,b,c)))

        store2 = RGraph()
        store2.load(testfile, publicID=self.pubId, format="rdfa")
        qcontents = store2.serialize(format='nt')
        qg = Graph()
        for a, b, c in store2:
            qg.triples.add(tuple(map(self.nodeToString, (a, b, c))))

        self.failIf(
            not hash(pg) == hash(qg),
            "In %s: results do not match.\n%s\n\n%s" %
            (self.shortDescription(), pcontents, qcontents))

Exemple #8

0

Afficher le fichier

Fichier : inference.py Projet : mapofemergence/edgesense

def catalyst_graph_for(file):
    if file.startswith("/"):
        file = "file://" + file
    logging.info("InferenceStore catalyst_graph_for started")

    # quads = jsonld.to_rdf(file, {'format': 'application/nquads'})
    logging.info("InferenceStore JSON-LD loaded")

    g = ConjunctiveGraph()
    g.namespace_manager = namespace_manager
    # g.parse(data=quads, format='nquads')
    g.load(file, format="json-ld")
    logging.info("InferenceStore base graph loaded")

    f = FuXiInferenceStore.get_instance()

    # get the inference engine
    cl = f.get_inference(g)
    logging.info("InferenceStore inference graph loaded")

    union_g = rdflib.ConjunctiveGraph()

    for s, p, o in g.triples((None, None, None)):
        union_g.add((s, p, o))

    for s, p, o in cl.triples((None, None, None)):
        union_g.add((s, p, o))

    logging.info("InferenceStore union graph prepared")

    return union_g

Exemple #9

0

Afficher le fichier

Fichier : run_rdfa_template.py Projet : niklasl/oort

def to_rdf_etree(sources):
    graph = ConjunctiveGraph()
    for source in sources:
        graph.load(source, format=guess_format(source))
    io = StringIO()
    graph.serialize(io, format="pretty-xml")
    io.seek(0)
    return etree.parse(io)

Exemple #10

0

Afficher le fichier

Fichier : test_builtin_ordering.py Projet : Bazmundi/fuxi

def build_network(rules):
    if isinstance(rules, basestring):
        rules = StringIO(rules)
    graph = ConjunctiveGraph()
    graph.load(rules, publicID='test', format='n3')
    network = NetworkFromN3(graph,
                            additionalBuiltins={STRING_NS.startsWith:StringStartsWith})
    network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph)))
    return network

Exemple #11

0

Afficher le fichier

def build_network(rules):
    if isinstance(rules, basestring):
        rules = StringIO(rules)
    graph = ConjunctiveGraph()
    graph.load(rules, publicID='test', format='n3')
    network = NetworkFromN3(
        graph, additionalBuiltins={STRING_NS.startsWith: StringStartsWith})
    network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph)))
    return network

Exemple #12

0

Afficher le fichier

Fichier : film.py Projet : walidazizi/rdflib

class Store:
    def __init__(self):
        self.graph = ConjunctiveGraph()
        if os.path.exists(storefn):
            self.graph.load(storeuri, format='n3')
        self.graph.bind('dc', 'http://purl.org/dc/elements/1.1/')
        self.graph.bind('foaf', 'http://xmlns.com/foaf/0.1/')
        self.graph.bind('imdb',
                        'http://www.csd.abdn.ac.uk/~ggrimnes/dev/imdb/IMDB#')
        self.graph.bind('rev', 'http://purl.org/stuff/rev#')

    def save(self):
        self.graph.serialize(storeuri, format='n3')

    def who(self, who=None):
        if who is not None:
            name, email = (r_who.match(who).group(1),
                           r_who.match(who).group(2))
            self.graph.add(
                (URIRef(storeuri), DC['title'], Literal(title % name)))
            self.graph.add(
                (URIRef(storeuri + '#author'), RDF.type, FOAF['Person']))
            self.graph.add(
                (URIRef(storeuri + '#author'), FOAF['name'], Literal(name)))
            self.graph.add(
                (URIRef(storeuri + '#author'), FOAF['mbox'], Literal(email)))
            self.save()
        else:
            return self.graph.objects(URIRef(storeuri + '#author'),
                                      FOAF['name'])

    def new_movie(self, movie):
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add((movieuri, RDF.type, IMDB['Movie']))
        self.graph.add((movieuri, DC['title'], Literal(movie['title'])))
        self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year']))))
        self.save()

    def new_review(self, movie, date, rating, comment=None):
        review = BNode(
        )  # @@ humanize the identifier (something like #rev-$date)
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add(
            (movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review))))
        self.graph.add((review, RDF.type, REV['Review']))
        self.graph.add((review, DC['date'], Literal(date)))
        self.graph.add((review, REV['maxRating'], Literal(5)))
        self.graph.add((review, REV['minRating'], Literal(0)))
        self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author')))
        self.graph.add((review, REV['rating'], Literal(rating)))
        if comment is not None:
            self.graph.add((review, REV['text'], Literal(comment)))
        self.save()

    def movie_is_in(self, uri):
        return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph

Exemple #13

0

Afficher le fichier

def build_network2(rules):
    graph = ConjunctiveGraph()
    graph.load(StringIO(rules), publicID='test', format='n3')
    rule_store, rule_graph = SetupRuleStore(
        StringIO(rules),
        additionalBuiltins={STRING_NS.startsWith: StringStartsWith})
    from FuXi.Rete.Network import ReteNetwork
    network = ReteNetwork(rule_store)
    network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph)))
    return network

Exemple #14

0

Afficher le fichier

Fichier : test_builtin_ordering.py Projet : Bazmundi/fuxi

def build_network2(rules):
    graph = ConjunctiveGraph()
    graph.load(StringIO(rules), publicID='test', format='n3')    
    rule_store, rule_graph=SetupRuleStore(
                      StringIO(rules),
                      additionalBuiltins={STRING_NS.startsWith:StringStartsWith})
    from FuXi.Rete.Network import ReteNetwork
    network = ReteNetwork(rule_store)
    network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph)))
    return network

Exemple #15

0

Afficher le fichier

class Store:
    def __init__(self):
        self.graph = ConjunctiveGraph()
        if os.path.exists(storefn):
            self.graph.load(storeuri, format="n3")
        self.graph.bind("dc", DC)
        self.graph.bind("foaf", FOAF)
        self.graph.bind("imdb", IMDB)
        self.graph.bind("rev", "http://purl.org/stuff/rev#")

    def save(self):
        self.graph.serialize(storeuri, format="n3")

    def who(self, who=None):
        if who is not None:
            name, email = (r_who.match(who).group(1),
                           r_who.match(who).group(2))
            self.graph.add(
                (URIRef(storeuri), DC["title"], Literal(title % name)))
            self.graph.add(
                (URIRef(storeuri + "#author"), RDF.type, FOAF["Person"]))
            self.graph.add(
                (URIRef(storeuri + "#author"), FOAF["name"], Literal(name)))
            self.graph.add(
                (URIRef(storeuri + "#author"), FOAF["mbox"], Literal(email)))
            self.save()
        else:
            return self.graph.objects(URIRef(storeuri + "#author"),
                                      FOAF["name"])

    def new_movie(self, movie):
        movieuri = URIRef("http://www.imdb.com/title/tt%s/" % movie.movieID)
        self.graph.add((movieuri, RDF.type, IMDB["Movie"]))
        self.graph.add((movieuri, DC["title"], Literal(movie["title"])))
        self.graph.add((movieuri, IMDB["year"], Literal(int(movie["year"]))))
        self.save()

    def new_review(self, movie, date, rating, comment=None):
        review = BNode(
        )  # @@ humanize the identifier (something like #rev-$date)
        movieuri = URIRef("http://www.imdb.com/title/tt%s/" % movie.movieID)
        self.graph.add(
            (movieuri, REV["hasReview"], URIRef("%s#%s" % (storeuri, review))))
        self.graph.add((review, RDF.type, REV["Review"]))
        self.graph.add((review, DC["date"], Literal(date)))
        self.graph.add((review, REV["maxRating"], Literal(5)))
        self.graph.add((review, REV["minRating"], Literal(0)))
        self.graph.add((review, REV["reviewer"], URIRef(storeuri + "#author")))
        self.graph.add((review, REV["rating"], Literal(rating)))
        if comment is not None:
            self.graph.add((review, REV["text"], Literal(comment)))
        self.save()

    def movie_is_in(self, uri):
        return (URIRef(uri), RDF.type, IMDB["Movie"]) in self.graph

Exemple #16

0

Afficher le fichier

Fichier : labchords2RDF.py Projet : apassant/motools

def labchords2RDF(infilename, outfilename, format="xml", audiofilename=None, withdescriptions=False):
	if withdescriptions:
		commonchords = ConjunctiveGraph()
		commonchords.load("CommonChords.rdf")
		extrachords = ConjunctiveGraph()
	
	infile = open(infilename, 'r')
	lines = infile.readlines()

	mi = mopy.MusicInfo()

	homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools")
	mi.add(homepage)
	program = mopy.foaf.Agent()
	program.name = "labchords2RDF.py"
	program.homepage = homepage
	mi.add(program)

	
	tl = RelativeTimeLine("#tl")
	tl.label = "Timeline derived from "+infilename
	tl.maker = program
	mi.add(tl)
	
	intervalNum = 0
	for line in lines:
		i = Interval("#i"+str(intervalNum))
		try:
			[start_s, end_s, label] = parseLabLine(line)
			i.beginsAtDuration = secondsToXSDDuration(start_s)
			i.endsAtDuration = secondsToXSDDuration(end_s)
			#i.label = "Interval containing "+label+" chord."
			i.onTimeLine = tl
			
			# Produce chord object for the label :
			chordURI = "http://purl.org/ontology/chord/symbol/"+label.replace("#","s").replace(",","%2C")

			if withdescriptions and \
			   len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \
			   len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0:
				# Deref to grab chord info
				print "loading "+chordURI+"..."
				extrachords.load(chordURI)
				
			c = mopy.chord.Chord(chordURI)
			c_event = mopy.chord.ChordEvent("#ce"+str(intervalNum))
			c_event.chord = c
			c_event.label = label
			c_event.time = i
		except Exception, e:
			raise Exception("Problem parsing input file at line "+str(intervalNum+1)+" !\n"+str(e))
		mi.add(i)
		mi.add(c)
		mi.add(c_event)
		intervalNum+=1

Exemple #17

0

Afficher le fichier

class ConstraintParser(object):
    def __init__(self, input_graphs):
        self.root = Node(OWL.Thing, [])
        self.g = ConjunctiveGraph()
        self.journal = {OWL.Thing: self.root}
        for path in input_graphs:
            print('Loading ', path)
            self.g.load(path, format='turtle')

    def get_node(self, uri):
        if uri in self.journal:
            return self.journal[uri]
        else:
            logging.debug('Creating node for : ' + unicode(uri))
            new_node = Node(uri, [])
            self.journal[uri] = new_node
            return new_node

    def get_all_children(self, uri):
        children = self.get_node(uri).children
        nodes_to_visit = children.copy()
        result = children.copy()
        childs_visited = 0
        while (len(nodes_to_visit) > 0):
            current_node = nodes_to_visit.pop()
            nodes_to_visit = nodes_to_visit.union(current_node.children)
            result.add(current_node)
            childs_visited += 1
            logging.debug("Child : " + str(childs_visited) + ' ' +
                          unicode(current_node))
        return result

    def parse_hierarchy(self):
        types = self.g.objects(None, RDF.type)
        all_super_classes = set()
        # first find upper classes under root
        for t in types:
            super_classes = list(self.g.objects(t, RDFS.subClassOf))
            all_super_classes = all_super_classes.union(set(super_classes))
            logging.debug(
                unicode(t) + ' has super classes: ' + ''.join(super_classes))
            if len(super_classes) == 0 or (len(super_classes) == 1
                                           and super_classes[0] == OWL.Thing):
                # super_class is root
                tmp_node = self.get_node(t)  # Node(t, [self.root])
                tmp_node.parents.add(self.root)
                self.root.children.add(tmp_node)
            else:
                tmp_node = self.get_node(t)
                tmp_node.parents.union(
                    set([self.get_node(p) for p in super_classes]))
                for p in super_classes:
                    self.get_node(p).children.add(tmp_node)
        logging.debug(all_super_classes)

Exemple #18

0

Afficher le fichier

Fichier : test_sparql_naf2.py Projet : pombredanne/rdfextras

class TestSparqlOPT_FILTER2(unittest.TestCase):
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')
    def test_OPT_FILTER(self):
        results = self.graph.query(QUERY,
                                   DEBUG=False)
        results = list(results)
        self.failUnless(
            results == [(doc1,)],
            "expecting : %s .  Got: %s"%([(doc1,)],repr(results)))

Exemple #19

0

Afficher le fichier

Fichier : test_sparql_naf2.py Projet : trishnaguha/rdfextras

class TestSparqlOPT_FILTER2(unittest.TestCase):
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')

    def test_OPT_FILTER(self):
        results = self.graph.query(QUERY, DEBUG=False)
        results = list(results)
        self.failUnless(
            results == [(doc1, )],
            "expecting : %s .  Got: %s" % ([(doc1, )], repr(results)))

Exemple #20

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : trishnaguha/rdfextras

 def test_simple_recursion(self):
     graph = ConjunctiveGraph()
     graph.load(StringIO(BASIC_KNOWS_DATA), format='n3')
     results = graph.query(KNOWS_QUERY, processor="sparql", DEBUG=False)
     results = set(results)
     person1 = URIRef('ex:person.1')
     person2 = URIRef('ex:person.2')
     nose.tools.assert_equal(
         results,
         set([(person1, None), (person1, Literal('person 3')),
              (person2, Literal('person 3'))]))

Exemple #21

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : trishnaguha/rdfextras

 def test_secondary_recursion(self):
     graph = ConjunctiveGraph()
     graph.load(StringIO(SUBCLASS_DATA), format='n3')
     results = graph.query(SUBCLASS_QUERY, processor="sparql", DEBUG=False)
     results = set(results)
     ob = URIRef('ex:ob')
     class1 = URIRef('ex:class.1')
     class2 = URIRef('ex:class.2')
     class3 = URIRef('ex:class.3')
     nose.tools.assert_equal(
         results, set([(ob, class1), (ob, class2), (ob, class3)]))

Exemple #22

0

Afficher le fichier

Fichier : Install.py Projet : mattprintz/xpibuilder

class Install(RDFFile):
    """
    TODO: Add documentation
    """

    def __init__(self, fileName):
        self.graph = ConjunctiveGraph()
        self.subject = URIRef("urn:mozilla:install-manifest")
        try:
            self.graph.load(fileName)
        except IOError, e:
            pass

Exemple #23

0

Afficher le fichier

Fichier : charsheet.py Projet : corydodt/Playtools

def main(filename, name):
    all_sections = getCharSheetSections()

    charactersheet = NS('http://trinket.thorne.id.au/2007/%s.n3#' % name)
    character = URIRef(charactersheet + name)

    graph = ConjunctiveGraph()
    for f in glob.glob(os.path.join(sibpath(__file__, 'data'), '*.n3')):
        if f.endswith('monster.n3'):
            continue
        try: graph.load(f, format='n3')
        except Exception, e:
            print 'Could not load', f, 'because', e

Exemple #24

0

Afficher le fichier

Fichier : Wrapper.py Projet : KiranAjayakumar/python-dlp

 def _convertRDF(self) :
     """
     Convert an RDF/XML result into an RDFLib triple store. This method can be overwritten
     in a subclass for a different conversion method.
     @return: converted result
     @rtype: RDFLib Graph
     """
     from rdflib import ConjunctiveGraph
     retval = ConjunctiveGraph()
     # this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny
     #(and, as far as I could see, meaningless) error message...
     retval.load(self.response,publicID=' ')
     return retval

Exemple #25

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : RDFLib/rdfextras

 def test_simple_recursion(self):
     graph = ConjunctiveGraph()
     graph.load(StringIO(BASIC_KNOWS_DATA), format='n3')
     results = graph.query(KNOWS_QUERY,
                           processor="sparql", 
                           DEBUG=False)
     results = set(results)
     person1 = URIRef('ex:person.1')
     person2 = URIRef('ex:person.2')
     nose.tools.assert_equal(
       results,
       set([(person1, None), (person1, Literal('person 3')),
            (person2, Literal('person 3'))]))

Exemple #26

0

Afficher le fichier

Fichier : 04_oci.py Projet : miku/index-oci

    def test_citation_prov_ttl(self):
        g1 = ConjunctiveGraph()
        g1.load(self.citation_prov_ttl_path, format="nquads")

        g2 = ConjunctiveGraph()
        for c in [
                self.citation_1, self.citation_2, self.citation_3,
                self.citation_4, self.citation_5, self.citation_6
        ]:
            for s, p, o, g in c.get_citation_prov_rdf(self.base_url).quads(
                (None, None, None, None)):
                g2.add((s, p, o, g))

        self.assertTrue(isomorphic(g1, g2))

Exemple #27

0

Afficher le fichier

Fichier : test_sparql_recurse.py Projet : RDFLib/rdfextras

 def test_secondary_recursion(self):
     graph = ConjunctiveGraph()
     graph.load(StringIO(SUBCLASS_DATA), format='n3')
     results = graph.query(SUBCLASS_QUERY,
                           processor="sparql", 
                           DEBUG=False)
     results = set(results)
     ob = URIRef('ex:ob')
     class1 = URIRef('ex:class.1')
     class2 = URIRef('ex:class.2')
     class3 = URIRef('ex:class.3')
     nose.tools.assert_equal(
       results,
       set([(ob, class1), (ob, class2), (ob, class3)]))

Exemple #28

0

Afficher le fichier

Fichier : 04_oci.py Projet : miku/index-oci

    def test_citation_data_ttl(self):
        g1 = ConjunctiveGraph()
        g1.load(self.citation_data_ttl_path, format="nt11")

        g2 = ConjunctiveGraph()
        for c in [
                self.citation_1, self.citation_2, self.citation_3,
                self.citation_4, self.citation_5, self.citation_6
        ]:
            for s, p, o in c.get_citation_rdf(self.base_url, False, False,
                                              False):
                g2.add((s, p, o))

        self.assertTrue(isomorphic(g1, g2))

Exemple #29

0

Afficher le fichier

def render_graph(result, cfg, **kwargs):
    """
    Render for output a result that can be parsed as an RDF graph
    """

    # Mapping from MIME types to formats accepted by RDFlib
    rdflib_formats = {
        'text/rdf+n3': 'n3',
        'text/turtle': 'turtle',
        'application/x-turtle': 'turtle',
        'text/turtle': 'turtle',
        'application/rdf+xml': 'xml',
        'text/rdf': 'xml',
        'application/rdf+xml': 'xml',
    }

    try:
        got = kwargs.get('format', 'text/rdf+n3')
        fmt = rdflib_formats[got]
    except KeyError:
        raise KrnlException('Unsupported format for graph processing: {!s}',
                            got)

    g = ConjunctiveGraph()
    g.load(StringInputSource(result), format=fmt)

    display = cfg.dis[0] if is_collection(cfg.dis) else cfg.dis
    if display in ('png', 'svg'):
        try:
            literal = len(cfg.dis) > 1 and cfg.dis[1].startswith('withlit')
            opt = {'lang': cfg.lan, 'literal': literal, 'graphviz': []}
            data, metadata = draw_graph(g, fmt=display, options=opt)
            return {'data': data, 'metadata': metadata}
        except Exception as e:
            raise KrnlException('Exception while drawing graph: {!r}', e)
    elif display == 'table':
        it = rdf_iterator(g, add_vtype=cfg.typ, lang=cfg.lan)
        n, data = html_table(it, limit=cfg.lmt, withtype=cfg.typ)
        data += div('Shown: {}, Total rows: {}',
                    n if cfg.lmt else 'all',
                    len(g),
                    css="tinfo")
        data = {'text/html': div(data)}
    elif len(g) == 0:
        data = {'text/html': div(div('empty graph', css='krn-warn'))}
    else:
        data = {'text/plain': g.serialize(format='nt').decode('utf-8')}

    return {'data': data, 'metadata': {}}

Exemple #30

0

Afficher le fichier

Fichier : film.py Projet : RDFLib/rdflib

class Store:
    def __init__(self):
        self.graph = ConjunctiveGraph()
        if os.path.exists(storefn):
            self.graph.load(storeuri, format='n3')
        self.graph.bind('dc', DC)
        self.graph.bind('foaf', FOAF)
        self.graph.bind('imdb', IMDB)
        self.graph.bind('rev', 'http://purl.org/stuff/rev#')

    def save(self):
        self.graph.serialize(storeuri, format='n3')

    def who(self, who=None):
        if who is not None:
            name, email = (r_who.match(who).group(1), r_who.match(who).group(2))
            self.graph.add((URIRef(storeuri), DC['title'], Literal(title % name)))
            self.graph.add((URIRef(storeuri + '#author'), RDF.type, FOAF['Person']))
            self.graph.add((URIRef(storeuri + '#author'),
                            FOAF['name'], Literal(name)))
            self.graph.add((URIRef(storeuri + '#author'),
                            FOAF['mbox'], Literal(email)))
            self.save()
        else:
            return self.graph.objects(URIRef(storeuri + '#author'), FOAF['name'])

    def new_movie(self, movie):
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add((movieuri, RDF.type, IMDB['Movie']))
        self.graph.add((movieuri, DC['title'], Literal(movie['title'])))
        self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year']))))
        self.save()

    def new_review(self, movie, date, rating, comment=None):
        review = BNode()  # @@ humanize the identifier (something like #rev-$date)
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add((movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review))))
        self.graph.add((review, RDF.type, REV['Review']))
        self.graph.add((review, DC['date'], Literal(date)))
        self.graph.add((review, REV['maxRating'], Literal(5)))
        self.graph.add((review, REV['minRating'], Literal(0)))
        self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author')))
        self.graph.add((review, REV['rating'], Literal(rating)))
        if comment is not None:
            self.graph.add((review, REV['text'], Literal(comment)))
        self.save()

    def movie_is_in(self, uri):
        return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph

Exemple #31

0

Afficher le fichier

def process_tools():
    """
    Go through all bio.tools entries in bioschemas JSON-LD and produce an single RDF file.
    """
    tool_files = get_bioschemas_files_in_repo()
    print(len(tool_files))
    rdf_graph = ConjunctiveGraph()

    for tool_file in tool_files:
        print(tool_file)
        rdf_graph.load(tool_file, format="json-ld")

    rdf_graph.serialize(
        format="turtle",
        destination="bioschemas-dump.ttl"
        #destination=os.path.join(directory, tpe_id + "bioschemas.jsonld")
    )

Exemple #32

0

Afficher le fichier

Fichier : connection.py Projet : paulovn/sparql-kernel

def render_graph(result, cfg, **kwargs):
    """
    Render to output a result that can be parsed as an RDF graph
    """
    # Mapping from MIME types to formats accepted by RDFlib
    rdflib_formats = {'text/rdf+n3': 'n3',
                      'text/turtle': 'turtle',
                      'application/x-turtle': 'turtle',
                      'text/turtle': 'turtle',
                      'application/rdf+xml': 'xml',
                      'text/rdf': 'xml',
                      'application/rdf+xml': 'xml'}


    try:
        got = kwargs.get('format', 'text/rdf+n3')
        fmt = rdflib_formats[got]
    except KeyError:
        raise KrnlException('Unsupported format for graph processing: {!s}', got)

    g = ConjunctiveGraph()
    g.load(StringInputSource(result), format=fmt)

    display = cfg.dis[0] if is_collection(cfg.dis) else cfg.dis
    if display in ('png', 'svg'):
        try:
            literal = len(cfg.dis) > 1 and cfg.dis[1].startswith('withlit')
            opt = {'lang': cfg.lan, 'literal': literal, 'graphviz': []}
            data, metadata = draw_graph(g, fmt=display, options=opt)
            return {'data': data,
                    'metadata': metadata}
        except Exception as e:
            raise KrnlException('Exception while drawing graph: {!r}', e)
    elif display == 'table':
        it = rdf_iterator(g, set(cfg.lan), add_vtype=cfg.typ)
        n, data = html_table(it, limit=cfg.lmt, withtype=cfg.typ)
        data += div('Shown: {}, Total rows: {}', n if cfg.lmt else 'all',
                    len(g), css="tinfo")
        data = {'text/html': div(data)}
    elif len(g) == 0:
        data = {'text/html': div(div('empty graph', css='krn-warn'))}
    else:
        data = {'text/plain': g.serialize(format='nt').decode('utf-8')}

    return {'data': data,
            'metadata': {}}

Exemple #33

0

Afficher le fichier

Fichier : test_sparql_date_filter.py Projet : pombredanne/rdfextras

class DateFilterTest(unittest.TestCase):
    # debug = True
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')
    def test_DATE_FILTER1(self):
        for query in [QUERY1,QUERY2,QUERY3]:
            # print query
            #pQuery = Parse(query)
            #print RenderSPARQLAlgebra(pQuery)
            results = self.graph.query(query,
                                       processor="sparql",
                                       DEBUG=False)
            results = list(results)
            self.failUnless(
                len(results) and results == [(ANSWER1,)],
                "expecting : %s .  Got: %s"%([(ANSWER1,)],repr(results)))

Exemple #34

0

Afficher le fichier

class DateFilterTest(unittest.TestCase):
    # debug = True
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')

    def test_DATE_FILTER1(self):
        for query in [QUERY1, QUERY2, QUERY3]:
            # print query
            #pQuery = Parse(query)
            #print RenderSPARQLAlgebra(pQuery)
            # Skip until issue is resolved
            if query == QUERY1 and rdflib.py3compat.PY3:
                raise SkipTest('Known issue with Python 3')
            results = self.graph.query(query, processor="sparql", DEBUG=False)
            results = list(results)
            self.failUnless(
                len(results) and results == [(ANSWER1, )],
                "expecting : %s .  Got: %s" % ([(ANSWER1, )], repr(results)))

Exemple #35

0

Afficher le fichier

Fichier : inference.py Projet : Wikitalia/edgesense

def catalyst_graph_for(file):
    if file.startswith('/'):
        file = 'file://'+file
    logging.info("InferenceStore catalyst_graph_for started")

    # quads = jsonld.to_rdf(file, {'format': 'application/nquads'})
    logging.info("InferenceStore JSON-LD loaded")

    g = ConjunctiveGraph()
    g.namespace_manager = namespace_manager
    # g.parse(data=quads, format='nquads')
    g.load(file, format="json-ld")
    logging.info("InferenceStore base graph loaded")

    # get the inference engine
    get_inference_store().get_inference(g)
    logging.info("InferenceStore inference graph loaded")

    return g

Exemple #36

0

Afficher le fichier

Fichier : dgraphdbstore.py Projet : lfarid/LindaWorkbench

class DeepGraphStore():
    store_name = 'SQLite'

    def __init__(self, create=False, parse=None):
        self.parse = parse
        self.create = create
        self.graph = None

    def setUp(self):
        self.path = "" + random_file_generating()
        self.graph = Graph(store=self.store_name)
        self.graph.open(self.path, create=self.create)

        if self.create:
            if not self.parse:
                self.graph.parse("http://njh.me/foaf.rdf", format='xml')
            else:
                self.graph.parse(self.parse)
            self.graph.commit()

    def open(self, path):
        self.graph = ConjunctiveGraph(self.store_name)
        self.path = path
        self.graph.open(self.path, create=False)

    def query(self, sparql_query):
        return self.graph.query(sparql_query)

    def parse(self, path_to_file_):
        self.graph.parse(path_to_file_)

    def load(self, triples):
        self.graph.load(triples)

    def close(self):
        self.graph.close()

    def size(self):
        size = self.graph.__len__()
        size = len(self.graph)
        # self.close()
        return size

Exemple #37

0

Afficher le fichier

Fichier : dgraphdbstore.py Projet : LinDA-tools/LindaWorkbench

class DeepGraphStore():
    store_name = 'SQLite'

    def __init__(self, create=False, parse=None):
        self.parse = parse
        self.create = create
        self.graph = None

    def setUp(self):
        self.path = "" + random_file_generating()
        self.graph = Graph(store=self.store_name)
        self.graph.open(self.path, create=self.create)

        if self.create:
            if not self.parse:
                self.graph.parse("http://njh.me/foaf.rdf", format='xml')
            else:
                self.graph.parse(self.parse)
            self.graph.commit()

    def open(self, path):
        self.graph = ConjunctiveGraph(self.store_name)
        self.path = path
        self.graph.open(self.path, create=False)

    def query(self, sparql_query):
        return self.graph.query(sparql_query)

    def parse(self, path_to_file_):
        self.graph.parse(path_to_file_)

    def load(self, triples):
        self.graph.load(triples)

    def close(self):
        self.graph.close()

    def size(self):
        size = self.graph.__len__()
        size = len(self.graph)
        # self.close()
        return size

Exemple #38

0

Afficher le fichier

Fichier : rdfa.py Projet : AuroraSkywalker/watchdog

    def runTest(self):
        testfile = self.testbase + ".htm"
        resultsf = self.testbase + ".ttl"
        self.failIf(not os.path.isfile(resultsf), "missing expected results file.")

        store1 = RGraph()
        store1.load(resultsf, publicID=self.pubId, format="n3")
        pcontents = store1.serialize(format='nt')
        pg = Graph()
        for a, b, c in store1:
            pg.triples.add(tuple(map(self.nodeToString, (a,b,c))))
            #print tuple(map(self.nodeToString, (a,b,c)))

        store2 = RGraph()
        store2.load(testfile, publicID=self.pubId, format="rdfa")
        qcontents = store2.serialize(format='nt')
        qg = Graph()
        for a, b, c in store2:
            qg.triples.add(tuple(map(self.nodeToString, (a,b,c))))

        self.failIf(not hash(pg) == hash(qg),
                "In %s: results do not match.\n%s\n\n%s" % (self.shortDescription(), pcontents, qcontents))

Exemple #39

0

Afficher le fichier

def populate_ontology():
    ont_path = path_kg + 'traffic_ontology.xml'
    metadata = pd.read_csv(path_src + 'trafficMetaData.csv', sep=',')
    g = ConjunctiveGraph()
    g.load(ont_path)
    g.add((URIRef(base_uri), RDF.type, OWL.Ontology))
    g.bind("owl", OWL)
    g.bind("rdf", RDF)
    g.bind("rdfs", RDFS)
    # g.bind("city", base_uri)
    # populate from metadata: [Path, from[name], to[name], from[has[street]], to[has[street]]]
    populate_from_metadata(metadata, g)
    poi = parse_log()
    for entry in poi:
        point = entry[0][0].split('_')[0] + "_" + entry[0][0].split('_')[1]
        metadata_entry = metadata[metadata['REPORT_ID'] == int(entry[0][0].split('_')[2])]
        address_id = metadata_entry[point + '_NAME'].values[0]

        poi_list = entry[0][1]
        for tmp_poi in poi_list:
            # generate an id for the poi
            tmp_poi_id = str(abs(hash(point + '_' + str(address_id) + '_' + tmp_poi)))
            g.add((base_uri[tmp_poi_id], RDF.type, base_uri['Point_of_interest']))
            g.add((base_uri[tmp_poi_id], RDF.type, base_uri[tmp_poi[0].upper() + tmp_poi[1:]]))
            g.add((base_uri[tmp_poi_id], base_uri['locatedAt'], base_uri[str(address_id)]))

    simple_sequence = []
    events = pd.read_csv(path_processed + 'events.csv')
    mapping = pd.read_csv(path_processed + 'mapping.csv').T.to_dict()
    for k, v in mapping.iteritems():
        g.add((base_uri[v['Unnamed: 0']], base_uri['occursAt'], base_uri[str(v['occursAt'])]))
        g.add((base_uri[v['Unnamed: 0']], RDF.type, base_uri[v['type']]))

    for e in events['Id']:
        simple_sequence.append(e)
    with open(path_processed + 'sequence.txt', "wb") as seq_file:
        seq_file.write(','.join(simple_sequence))
    g.serialize(path_kg + 'traffic_individuals.xml', format='xml')

Exemple #40

0

Afficher le fichier

Fichier : genCommonChords.py Projet : apassant/motools

def main():
#	f = open("CommonChords.rdf",'w');
	g = ConjunctiveGraph()
	
	prefix = "http://purl.org/ontology/chord/symbol/"
	notes = ['C','D','E','F','G','A','B']
	mods = ['b','','s']
	bases = ['', ':maj', ':min', ':dim', ':aug', ':maj7', ':min7', ':7', ':dim7', \
			':hdim7', ':minmaj7', ':maj6', ':min6', ':9', ':maj9', ':min9', ':sus4', ':sus2']

	chordURI = prefix+'N'
	print "loading "+chordURI
	g.load(chordURI)
	for note in notes:
		for mod in mods:
			for base in bases:
				chordURI = prefix+note+mod+base
				#f.write(prefix+note+mod+base+"\n")
				print "loading "+chordURI
				g.load(chordURI)
				
	print "Writing graph out..."
	g.serialize('CommonChords.rdf','xml')

Exemple #41

0

Afficher le fichier

Fichier : link_places.py Projet : johnscancella/open-oni

    def handle(self, **options):
        _logger.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
            path = urllib2.quote('%s,_%s' %
                                 (_clean(place.city), _clean(place.state)))
            url = URIRef('http://dbpedia.org/resource/%s' % path)

            # attempt to get a graph from it
            graph = ConjunctiveGraph()
            try:
                _logger.debug("looking up %s" % url)
                graph.load(url)
            except urllib2.HTTPError, e:
                _logger.error(e)

            # if we've got more than 3 assertions extract some stuff from
            # the graph and save back some info to the db, would be nice
            # to have a triple store underneath where we could persist
            # all the facts eh?

            if len(graph) >= 3:
                place.dbpedia = url
                place.latitude = graph.value(url, geo['lat'])
                place.longitude = graph.value(url, geo['long'])
                for object in graph.objects(URIRef(url), owl['sameAs']):
                    if object.startswith('http://sws.geonames.org'):
                        place.geonames = object
                place.save()
                _logger.info("found dbpedia resource %s" % url)
            else:
                _logger.warn("couldn't find dbpedia resource for %s" % url)

            reset_queries()

Exemple #42

0

Afficher le fichier

Fichier : link_places.py Projet : open-oni/open-oni

    def handle(self, **options):
        _logger.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
            path = urllib2.quote('%s,_%s' % (_clean(place.city), 
                                             _clean(place.state)))
            url = URIRef('http://dbpedia.org/resource/%s' % path)

            # attempt to get a graph from it
            graph = ConjunctiveGraph()
            try: 
                _logger.debug("looking up %s" % url)
                graph.load(url)
            except urllib2.HTTPError, e:
                _logger.error(e)

            # if we've got more than 3 assertions extract some stuff from 
            # the graph and save back some info to the db, would be nice
            # to have a triple store underneath where we could persist
            # all the facts eh?

            if len(graph) >= 3:
                place.dbpedia = url
                place.latitude = graph.value(url, geo['lat'])
                place.longitude = graph.value(url, geo['long'])
                for object in graph.objects(URIRef(url), owl['sameAs']):
                    if object.startswith('http://sws.geonames.org'):
                        place.geonames = object
                place.save()
                _logger.info("found dbpedia resource %s" % url)
            else:
                _logger.warn("couldn't find dbpedia resource for %s" % url)

            reset_queries()

Exemple #43

0

Afficher le fichier

Fichier : GScsv2RDF.py Projet : apassant/motools

def GScsv2RDF(infilename, outfilename, format="xml", withdescriptions=False):
	if withdescriptions:
		commonchords = ConjunctiveGraph()
		commonchords.load("CommonChords.rdf")
		extrachords = ConjunctiveGraph()

	lines = open(infilename).readlines()
	
	#
	# Initial model bits
	#
	mi = mopy.MusicInfo()

	homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools")
	mi.add(homepage)
	program = mopy.foaf.Agent()
	program.name = "GScsv2RDF.py"
	program.homepage = homepage
	mi.add(program)

	tl = TimeLine("#tl")
	tl.label = "Timeline derived from "+infilename
	tl.maker = program
	mi.add(tl)

	[artistStr, titleStr] = [f.strip() for f in lines[0].split("\t")]
	# Add artist & title metadata
	signal = Signal()
	signal.time = sig_int = Interval()
	sig_int.label="Whole signal interval";
	sig_int.beginsAtDuration = secondsToXSDDuration(0);
	sig_int.onTimeLine = tl
	signal.published_as = track = Track()
	artist = MusicArtist()
	artist.made = track
	artist.name = artistStr
	track.title = titleStr
	mi.add(sig_int)
	mi.add(signal)
	mi.add(track)
	mi.add(artist)
	
	
	
	
	lineNum = 1
	segmentNum = 0
	thisSegment_i = None
	chordSymbol=''

	t_secs = 0.0
	
	for line in lines[1:]:
#		print "parsing line "+str(lineNum)
		try:
			lastChordSymbol = chordSymbol
			t_secs = getTimestamp(line)
			chordSymbol = getChordSymbol(line)
			if chordSymbol != lastChordSymbol:
#				print " handling new chord symbol"
				segmentNum += 1

				lastSegment_i = thisSegment_i
				thisSegment_i = Interval("#i_"+str(segmentNum))
				thisSegment_i.beginsAtDuration = secondsToXSDDuration(t_secs)
				if lastSegment_i != None:
#					print " terminating last interval"
					lastSegment_i.endsAtDuration = secondsToXSDDuration(t_secs)
					thisSegment_i.intervalAfter = lastSegment_i
					lastSegment_i.intervalBefore = thisSegment_i
				mi.add(thisSegment_i)
				
			
				chordURI = "http://purl.org/ontology/chord/symbol/"+chordSymbol.replace("#","s").replace(",","%2C")

				if withdescriptions and \
				   len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \
				   len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0:
					# Deref to grab chord info
					print "loading <"+chordURI+">..."
					extrachords.load(chordURI)

				c = Chord(chordURI)
				c_event = ChordEvent("#ce_"+str(segmentNum))
				c_event.chord = c
				c_event.time = thisSegment_i
				c_event.label = chordSymbol
					
				mi.add(c); mi.add(c_event);
#				print " added new chord event for "+chordURI
							
		except Exception, e:
			print("ERROR : Problem parsing input file at line "+str(lineNum)+" !\n")
			raise
		lineNum+=1

Exemple #44

0

Afficher le fichier

Fichier : RDFInterface.py Projet : apassant/motools

def importRDFFile(filename, format="xml", strict=True):
	g = ConjunctiveGraph()
	g.load(filename, format=format)
	return importRDFGraph(g, strict)

Exemple #45

0

Afficher le fichier

def query_test(t):
    uri, name, comment, data, graphdata, query, resfile, syntax = t

    # the query-eval tests refer to graphs to load by resolvable filenames
    rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True

    if uri in skiptests:
        raise SkipTest()

    def skip(reason='(none)'):
        print "Skipping %s from now on." % uri
        f = open("skiptests.list", "a")
        f.write("%s\t%s\n" % (uri, reason))
        f.close()

    try:
        g = ConjunctiveGraph()
        if data:
            g.default_context.load(data, format=_fmt(data))

        if graphdata:
            for x in graphdata:
                g.load(x, format=_fmt(x))

        if not resfile:
            # no result - syntax test

            if syntax:
                translateQuery(parseQuery(open(query[7:]).read()),
                               base=urljoin(query, '.'))
            else:
                # negative syntax test
                try:
                    translateQuery(parseQuery(open(query[7:]).read()),
                                   base=urljoin(query, '.'))

                    assert False, 'Query should not have parsed!'
                except:
                    pass  # it's fine - the query should not parse
            return

        # eval test - carry out query
        res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.'))

        if resfile.endswith('ttl'):
            resg = Graph()
            resg.load(resfile, format='turtle', publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith('rdf'):
            resg = Graph()
            resg.load(resfile, publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith('srj'):
            res = Result.parse(open(resfile[7:]), format='json')
        elif resfile.endswith('tsv'):
            res = Result.parse(open(resfile[7:]), format='tsv')

        elif resfile.endswith('csv'):
            res = Result.parse(open(resfile[7:]), format='csv')

            # CSV is lossy, round-trip our own resultset to
            # lose the same info :)

            # write bytes, read strings...
            s = BytesIO()
            res2.serialize(s, format='csv')
            print s.getvalue()
            s = StringIO(s.getvalue().decode('utf-8'))  # hmm ?
            res2 = Result.parse(s, format='csv')

        else:
            res = Result.parse(open(resfile[7:]), format='xml')

        if not DETAILEDASSERT:
            eq(res.type, res2.type, 'Types do not match')
            if res.type == 'SELECT':
                eq(set(res.vars), set(res2.vars), 'Vars do not match')
                comp = bindingsCompatible(
                    set(frozenset(x.iteritems()) for x in res.bindings),
                    set(frozenset(x.iteritems()) for x in res2.bindings))
                assert comp, 'Bindings do not match'
            elif res.type == 'ASK':
                eq(res.askAnswer, res2.askAnswer, 'Ask answer does not match')
            elif res.type in ('DESCRIBE', 'CONSTRUCT'):
                assert isomorphic(res.graph,
                                  res2.graph), 'graphs are not isomorphic!'
            else:
                raise Exception('Unknown result type: %s' % res.type)
        else:

            eq(res.type, res2.type,
               'Types do not match: %r != %r' % (res.type, res2.type))
            if res.type == 'SELECT':
                eq(
                    set(res.vars), set(res2.vars),
                    'Vars do not match: %r != %r' %
                    (set(res.vars), set(res2.vars)))
                assert bindingsCompatible(
                    set(frozenset(x.iteritems()) for x in res.bindings),
                    set(frozenset(x.iteritems()) for x in res2.bindings)
                ), 'Bindings do not match: \n%s\n!=\n%s' % (_bindingsTable(
                    res.bindings), _bindingsTable(res2.bindings))
            elif res.type == 'ASK':
                eq(
                    res.askAnswer, res2.askAnswer,
                    "Ask answer does not match: %r != %r" %
                    (res.askAnswer, res2.askAnswer))
            elif res.type in ('DESCRIBE', 'CONSTRUCT'):
                assert isomorphic(res.graph,
                                  res2.graph), 'graphs are not isomorphic!'
            else:
                raise Exception('Unknown result type: %s' % res.type)

    except Exception, e:

        if isinstance(e, AssertionError):
            failed_tests.append(uri)
            fails[str(e)] += 1
        else:
            error_tests.append(uri)
            errors[str(e)] += 1

        if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
            print "======================================"
            print uri
            print name
            print comment

            if not resfile:
                if syntax:
                    print "Positive syntax test"
                else:
                    print "Negative syntax test"

            if data:
                print "----------------- DATA --------------------"
                print ">>>", data
                print open(data[7:]).read()
            if graphdata:
                print "----------------- GRAPHDATA --------------------"
                for x in graphdata:
                    print ">>>", x
                    print open(x[7:]).read()

            print "----------------- Query -------------------"
            print ">>>", query
            print open(query[7:]).read()
            if resfile:
                print "----------------- Res -------------------"
                print ">>>", resfile
                print open(resfile[7:]).read()

            try:
                pq = parseQuery(open(query[7:]).read())
                print "----------------- Parsed ------------------"
                pprintAlgebra(translateQuery(pq, base=urljoin(query, '.')))
            except:
                print "(parser error)"

            print decodeStringEscape(unicode(e))

            import pdb
            pdb.post_mortem(sys.exc_info()[2])
            # pdb.set_trace()
            # nose.tools.set_trace()
        raise

Exemple #46

0

Afficher le fichier

def update_test(t):

    # the update-eval tests refer to graphs on http://example.org
    rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False

    uri, name, comment, data, graphdata, query, res, syntax = t

    if uri in skiptests:
        raise SkipTest()

    try:
        g = ConjunctiveGraph()

        if not res:
            if syntax:
                translateUpdate(parseUpdate(open(query[7:])))
            else:
                try:
                    translateUpdate(parseUpdate(open(query[7:])))
                    raise AssertionError("Query shouldn't have parsed!")
                except:
                    pass  # negative syntax test
            return

        resdata, resgraphdata = res

        # read input graphs
        if data:
            g.default_context.load(data, format=_fmt(data))

        if graphdata:
            for x, l in graphdata:
                g.load(x, publicID=URIRef(l), format=_fmt(x))

        req = translateUpdate(parseUpdate(open(query[7:])))
        evalUpdate(g, req)

        # read expected results
        resg = ConjunctiveGraph()
        if resdata:
            resg.default_context.load(resdata, format=_fmt(resdata))

        if resgraphdata:
            for x, l in resgraphdata:
                resg.load(x, publicID=URIRef(l), format=_fmt(x))

        eq(
            set(x.identifier for x in g.contexts() if x != g.default_context),
            set(x.identifier for x in resg.contexts()
                if x != resg.default_context))
        assert isomorphic(g.default_context, resg.default_context), \
            'Default graphs are not isomorphic'

        for x in g.contexts():
            if x == g.default_context:
                continue
            assert isomorphic(x, resg.get_context(x.identifier)), \
                "Graphs with ID %s are not isomorphic" % x.identifier

    except Exception, e:

        if isinstance(e, AssertionError):
            failed_tests.append(uri)
            fails[str(e)] += 1
        else:
            error_tests.append(uri)
            errors[str(e)] += 1

        if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
            print "======================================"
            print uri
            print name
            print comment

            if not res:
                if syntax:
                    print "Positive syntax test"
                else:
                    print "Negative syntax test"

            if data:
                print "----------------- DATA --------------------"
                print ">>>", data
                print open(data[7:]).read()
            if graphdata:
                print "----------------- GRAPHDATA --------------------"
                for x, l in graphdata:
                    print ">>>", x, l
                    print open(x[7:]).read()

            print "----------------- Request -------------------"
            print ">>>", query
            print open(query[7:]).read()

            if res:
                if resdata:
                    print "----------------- RES DATA --------------------"
                    print ">>>", resdata
                    print open(resdata[7:]).read()
                if resgraphdata:
                    print "----------------- RES GRAPHDATA -------------------"
                    for x, l in resgraphdata:
                        print ">>>", x, l
                        print open(x[7:]).read()

            print "------------- MY RESULT ----------"
            print g.serialize(format='trig')

            try:
                pq = translateUpdate(parseUpdate(open(query[7:]).read()))
                print "----------------- Parsed ------------------"
                pprintAlgebra(pq)
                # print pq
            except:
                print "(parser error)"

            print decodeStringEscape(unicode(e))

            import pdb
            pdb.post_mortem(sys.exc_info()[2])
        raise

Exemple #47

0

Afficher le fichier

    def handle(self, **options):
        LOGGER.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
            path = urllib2.quote('%s,_%s' %
                                 (_clean(place.city), _clean(place.state)))
            url = URIRef('http://dbpedia.org/resource/%s' % path)

            # attempt to get a graph from it
            graph = ConjunctiveGraph()
            try:
                LOGGER.debug("looking up %s" % url)
                graph.load(url)
            except urllib2.HTTPError as e:
                LOGGER.error(e)

            # if we've got more than 3 assertions extract some stuff from
            # the graph and save back some info to the db, would be nice
            # to have a triple store underneath where we could persist
            # all the facts eh?

            if len(graph) >= 3:
                place.dbpedia = url
                place.latitude = graph.value(url, geo['lat'])
                place.longitude = graph.value(url, geo['long'])
                for object in graph.objects(URIRef(url), owl['sameAs']):
                    if object.startswith('http://sws.geonames.org'):
                        place.geonames = object
                place.save()
                LOGGER.info("found dbpedia resource %s" % url)
            else:
                LOGGER.warn("couldn't find dbpedia resource for %s" % url)

            reset_queries()
        LOGGER.info("finished looking up places in dbpedia")

        LOGGER.info("dumping place_links.json fixture")

        # so it would be nice to use django.core.serializer here
        # but it serializes everything about the model, including
        # titles that are linked to ... and this could theoretically
        # change over time, so we only preserve the facts that have
        # been harvested from dbpedia, so they can overlay over
        # the places that have been extracted during title load

        json_src = []
        places_qs = models.Place.objects.filter(dbpedia__isnull=False)
        for p in places_qs.order_by('name'):
            json_src.append({
                'name': p.name,
                'dbpedia': p.dbpedia,
                'geonames': p.geonames,
                'longitude': p.longitude,
                'latitude': p.latitude
            })
            reset_queries()
        json.dump(json_src,
                  file('core/fixtures/place_links.json', 'w'),
                  indent=2)
        LOGGER.info("finished dumping place_links.json fixture")

Exemple #48

0

Afficher le fichier

Fichier : tei2onto2.py Projet : klaffenboeck/contextus

def convert(teifile, namespace):
	#graph_uri = "http://contextus.net/resource/blue_velvet/"
	
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")
	
	graph.bind("default", ns)
	
	to_update = ""

	for prefix, nsuri in graph.namespaces(): 
		#print("prefix: " + str(prefix) + " - " + str(nsuri))
		if nsuri in ns:
			to_update = nsuri
			
	for s, p, o in graph:
#    		print s, p, o
    		if to_update != "" and to_update in s:
    			graph.remove((s, p, o))
			s = URIRef(s.replace(to_update, ns))			
			graph.add((s, p, o))
	
	act = ""
	scene = ""
	line = ""
	char = 0
	loc = 0
	
	
	#timeline = ns['timeline/narrative']
	#graph.add((timeline, RDF.type, ome['Timeline']))

	tree = ET.parse(teifile)
	cast = dict()
	
	titleNode = tree.find('//title')
	
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')

		if roleNode != None:
			id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")
		
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):		

			charname = roleNode.get("about")
			
			if(charname.find(":") > -1):
				nmsp,nom = charname.split(":", 1)		
				charcode =  "character/" + str(char)
				charref = nmsp + ":" + charcode + "]"
				role = extractCURIEorURI(graph, charref,nom[0:-1])
				char += 1		
				#print("1:" + charname + ": adding id " + id + " to " + role)
			else:
				role = extractCURIEorURI(graph, charname)
				#print("2:" + charname + ": adding id " + id + " to " + role)

			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print(charname + ": adding id " + id + " to " + role)
		
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	groupCount = 1
	prior_event = None
	
	actItems = tree.findall('/text/body/div1')
	ref = ""
	
	for actItem in actItems:
	
		if actItem.get("type") == "act":
			act = actItem.get("n")
		
		sceneItems = actItem.findall('div2')
		
		for sceneItem in sceneItems:
			
			#print("Found sceneItems!")
			
			if sceneItem.get("type") == "scene":
				scene = sceneItem.get("n")		
			
			# Work out the location of this scene
			location = None
			stageItems = sceneItem.findall("stage")
			
			#internalnum = 1
			stagenum = 0
			speechnum = 1
			
			for stageItem in stageItems:
				if stageItem.get("type") == "location":
					# The RDFa parser doesn't handle the type - so we can grab that here.
					
					if stageItem.get("about") != None:
						locname = stageItem.get("about")
					
						# Adding location type/oml:space for location
						if stageItem.get("typeof") and stageItem.get("about"):
							type = extractCURIEorURI(graph, stageItem.get("typeof"))
							#print "1. Location: " + str(location) + " Type: " + str(type)
						elif stageItem.get("about"):	
							#print "2. Location: " + str(locname)											
							type = extractCURIEorURI(graph, oml['Space'])						
						
						
						# Get location value and add rdfs:label is location is not using the TEI value
						if(locname.find(":") > -1):
							nmsp,nom = locname.split(":", 1)		
							loccode =  "location/" + str(loc)
							locref = nmsp + ":" + loccode + "]"
							location = extractCURIEorURI(graph, locref, nom[0:-1])
							loc += 1
							graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1])))
						else:
							location = extractCURIEorURI(graph, stageItem.get("about"))
						
						# Add location to graph
						graph.add((location, RDF.type, type))	
					else:
						location = ""
					
						
					#print("Adding location type: " + type + " (" + location + ")")
	
	
			if cast:
				# Work out a list of all cast in a given section
				currentCast = list()
				speakers = list()
			
	
			# Iterate through elements within stageItem
				# Find speaker events and add to list of current cast for inclusion in social event
				# Find reference events and add to ongoing social event ?
				# Find stage events
					# If event is an entrance then
						# create social event for people talking before entrance
						# create travel event i.e. entrance
						# add new arrival to current cast list
					# If event is exit event then
						# create social event for people talking before exit
						# create travel event i.e. exit
							# if leavers are not named directly the calculate who is leaving
						# remove leavers from current cast list
				# If reach end of scene then create social event with current cast list
				
				#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
				
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]	
			
			refersTo = list()
			#parent = None
			speakerNodes = list()
			speakerRef = list()
			
			xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
			stagecount = 0
			stage_array = list()
						
			for node in sceneItem.getiterator():
				#print("Node: " + node.tag)	
				
				
				"""
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""		
						
				if node.tag == "sp":
					id = node.get("who")
					
					if id and cast:
						speakers.append(cast[id[1:]])	
						speakerNodes.append(node)
						
						if perseusid == None:
							speakerRef.append(ref)
						else:
							#speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
							speechRef  = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])";
							speakerRef.append(speechRef)
						#print("Line ref: " + ref)
						
						if cast[id[1:]] not in currentCast:
							currentCast.append(cast[id[1:]])
							
					#internalnum = 1
					speechnum += 1
					stagecount = 0
					
					
					previousl = 0
					
					for subnode in node.getiterator():
						if subnode.tag == "l":
							previousl += 1
						
						if subnode.tag == "stage":
							#print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
							stage_array.append(previousl)
							stagecount += 1
							
					
						
				elif node.tag == "stage":
					
					if stagecount > 0:
						s_max = len(stage_array)
						diff = s_max - stagecount
						
						#if diff == 0:
						#	stagenum += 1
					
						entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)";
						#internalnum += 1
						stagecount -= 1
					else:
						stagenum += 1
						entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])";				
					
					if node.get("type") == "entrance":		
					
						# Add Social Events for all the people who spoke since the last break (if there were any)
						
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
					
						# Add Travel Event
						
						graph.add((event, RDF.type, omj['Travel']))
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
						
						#print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
	
						#print("Found entrence event!")
						if location:
							graph.add((event, ome['to'], location))		
							
						involved = node.get("about")
						
						if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
							involved = involved[1:-1]
							
						chunks = involved.split()
						
						chunk_count = len(chunks)
						
						if chunk_count > 1:
							#type = extractCURIEorURI(graph, "[omb:Group]")
							#graph.add((group, RDF.type, type))
							graph.add((group, RDF.type, omb['Group']))
							
						event_label = ""	
						en = 1
						
						for chunk in chunks:
							striped = chunk.strip()
							
							if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"):
								striped = striped[1:-1]
								currentCast.append(cast[striped])								
							
							if chunk_count > 1:
								graph.add((group, ome['contains'], cast[striped]))
								
								if en == chunk_count:
									event_label = event_label[0:-2] + " and " + striped
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive")))
								elif en < chunk_count:
									event_label += striped + ", "									
									
							else:
								#print("Adding person as subject-entity to entry event "   + str(eventCount))
								graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives")))
								graph.add((event, ome['has-subject-entity'], cast[striped]))
								
							en += 1
									
							
						if chunk_count > 1:
							graph.add((event, ome['has-subject-entity'], group))	
							#print("Adding group as subject-entity to entry event "   + str(eventCount))
							groupCount = groupCount + 1
							group = ns['group/'+str(groupCount)]	
		
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
									
					if node.get("type") == "exit":		
						
						# Add Social Events for all the people who spoke since the last break (if there were any)
						update = list()
						update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
						eventCount = update[0]
						prior_event = update[1]
						
						event = ns['event/'+str(eventCount)]
						
						speakers = list()
						speakerNodes = list()
						speakerRef = list()
						
						# Add Travel Event
					
						graph.add((event, RDF.type, omj['Travel']))		
						
						if perseusid == None:
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref)))
						else:
							#exitRef = xpointer
							#graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
							graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef)))
	
						#print("Found entrence event!")
						if location != None:
							graph.add((event, ome['from'], location))		
							
						involved = node.get("about")	
						
						if involved.strip() == "" or "-all" in involved:
							# Remove everyone
													
							#print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#for peep in currentCast:	
							#	print(peep)
							
							if len(currentCast) > 1:							
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
															
							event_label = ""
							en = 1
							
							for peep in currentCast:	
								short_ref = ""
								for key, value in cast.iteritems():
									if peep == value:	
										short_ref = key
							
								if len(currentCast) > 1:
									graph.add((group, ome['contains'], peep))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += short_ref + ", "
																	
								else:
									#print("Adding person as subject-entity to exuant event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], peep))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
	
							if len(currentCast) > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exuant event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
							
							currentCast = list()
						
						elif "!" in involved:
							#print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							#print("Event: " + involved);
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							involved = involved.strip()	
							
							if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"):
								involved = involved[2:-1]	
							
							#print("involved: " + involved)
							
							striped = involved.strip()	
							
							c_ids = striped.split()
							
							chunks = list()
							
							for stay in c_ids:
								#print("Staying: " + cast[stay])
								chunks.append(cast[stay])							
							
							staying = list()
							going = list()
							
							for player in currentCast:
								#print("Player: " + player)							
								if player in chunks:
									staying.append(player)
								else:
									going.append(player)
									
							going_count = len(going)	
							
							if going_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))	
								graph.add((group, RDF.type, omb['Group']))
								

							event_label = ""
							en = 1
								
							for ghost in going:							
								#print("ghost: " + ghost)
								
								short_ref = ""
								for key, value in cast.iteritems():
									if ghost == value:	
										short_ref = key
										
										
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if going_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(going):
										event_label = event_label[0:-2] + " and " + short_ref
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(going):
										event_label += short_ref + ", "	
										
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves")))
									
								en += 1
								
								
							if going_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
		
										
						else:
							#print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))	
							
							if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
								involved = involved[1:-1]	
								
							striped = involved.strip()							
							chunks = striped.split()
							
							#print("striped: " + striped)
					
							chunk_count = len(chunks)
						
							if chunk_count > 1:
								#type = extractCURIEorURI(graph, "[omb:Group]")
								#graph.add((group, RDF.type, type))
								graph.add((group, RDF.type, omb['Group']))
								
								
							event_label = ""
							en = 1								
							
							for chunk in chunks:							
								#print("chunk: " + chunk)			
									
								ghost = cast[chunk]
								
								#print("ghost: " + ghost)
								
								if ghost in currentCast:
									currentCast.remove(ghost)
									#print("Current cast count: "  + str(len(currentCast)))	
								
								if chunk_count > 1:
									graph.add((group, ome['contains'], ghost))
									
									if en == len(currentCast):
										event_label = event_label[0:-2] + " and " + chunk
										graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave")))	
									elif en < len(currentCast):
										event_label += chunk + ", "										
									
								else:
									#print("Adding person as subject-entity to exit event "   + str(eventCount))
									graph.add((event, ome['has-subject-entity'], ghost))
									graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves")))
									
								en += 1	
								
							if chunk_count > 1:
								graph.add((event, ome['has-subject-entity'], group))	
								#print("Adding group as subject-entity to exit event "   + str(eventCount))
								groupCount = groupCount + 1
								group = ns['group/'+str(groupCount)]	
	
		
							
							
						if(prior_event):
							graph.add((event, ome['follows'], prior_event))
							graph.add((prior_event, ome['precedes'], event))
		
						prior_event = event					
	
						eventCount = eventCount + 1
						event = ns['event/'+str(eventCount)]
						
				#elif node.tag == "rs":	
				#	#print("Found rs node")
				#	if parent:
				#		#print("Parent type is " + parent.tag)
				#		if parent.tag == "p" or  parent.tag == "l":
				#			refersTo.append(node.get("about"))
							
				#parent = node
					
	
			# Add Social Events for all the people who spoke since the last break (if there were any)
			#print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
			update = list()
			update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location)
			eventCount = update[0]
			prior_event = update[1]
			
			event = ns['event/'+str(eventCount)]
			group = ns['group/'+str(groupCount)]
				
			speakers = list()
			speakerNodes = list()
			currentCast = list()
			speakerRef = list()
		
		
		
	print graph.serialize(format='xml')

Exemple #49

0

Afficher le fichier

#import rdfextras
from rdflib.namespace import DC, FOAF
#rdfextras.registerplugins() # so we can Graph.query()
owlNS = Namespace("http://www.w3.org/2002/07/owl#")
owlClass = owlNS["Class"]
owlObjectProperty = owlNS["ObjectProperty"]
owlDatatypeProperty = owlNS["DatatypeProperty"]
rdfNS = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfProperty = rdfNS["Property"]
rdfType = rdfNS["type"]
rdfsNS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
rdfsSubClassOf = rdfsNS["subClassOf"]
rdfsDomain = rdfsNS["domain"]
rdfsRange = rdfsNS["range"]
graph = ConjunctiveGraph()
graph.load("./data/Film_Tbox.owl")

s = graph.serialize(format='n3')
#print(s)
#print("graph has %s statements." % len(graph))


def isSubClassOf(subClass, superClass, graph):
    if subClass == superClass: return True
    for parentClass in graph.objects(subClass, rdfsSubClassOf):
        if isSubClassOf(parentClass, superClass, graph):
            return True
        else:
            return False

Exemple #50

0

Afficher le fichier

Fichier : test_sparql_literal_patterns.py Projet : RDFLib/rdfextras

    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
    @prefix : <tag://example.org,2007/literals-test#> .

    <http://example.org/thing>
        :plain "plain";
        :integer 1;
        :float 1.1e0;
        :decimal 1.1 ; 
        :string "string"^^xsd:string;
        :date "2007-04-28"^^xsd:date;
        :escape "a \\"test\\"";
        rdfs:label "Thing"@en, "Sak"@sv .
"""
graph = ConjunctiveGraph()
graph.load(StringIO(testRdf), format='n3')

PROLOGUE = """
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX t: <tag://example.org,2007/literals-test#>
"""

thing = URIRef("http://example.org/thing")

SPARQL = PROLOGUE+" SELECT ?uri WHERE { ?uri %s . } "
TEST_DATA = [
    ('plain', SPARQL % 't:plain "plain"', [(thing,)]),
    ('integer', SPARQL % 't:integer 1', [(thing,)]),
    ('decimal', SPARQL % 't:decimal 1.1', [(thing,)]),
    ('float', SPARQL % 't:float 1.1e0', [(thing,)]),

Exemple #51

0

Afficher le fichier

class Store:
    def __init__(self, tripleFile):
        self.graph = ConjunctiveGraph()
        self.storefn = abspath(tripleFile)
        self.storeuri = 'file://' + self.storefn
        if exists(self.storefn):
            self.graph.load(self.storeuri, format='n3')

        self.graph.bind('mo', MusicOntology)
        self.graph.bind('ourvocab', OurVocab)
        self.graph.bind('dc', DC)
        self.graph.bind('foaf', foaf)
        self.graph.bind('geo', geo)
        self.graph.bind('dbpediaowl', dbpediaowl)
        self.graph.bind('rev', 'http://purl.org/stuff/rev#')

    def save(self):
        self.graph.serialize(self.storeuri, format='n3')

    def addTrack(self, mbid, track):
        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % mbid)
        self.graph.add((trackuri, RDF.type, MusicOntology.Track))
        self.graph.add((trackuri, DC.title, Literal(track['name'])))
        self.graph.add(
            (trackuri, OurVocab.has_playcount, Literal(track['playcount'])))
        self.graph.add((trackuri, OurVocab.has_listener_count,
                        Literal(track['listeners'])))

        if track['artist']['mbid'] != '':
            artisturi = URIRef('http://musicbrainz.org/artist/%s#_' %
                               track['artist']['mbid'])
            self.graph.add((artisturi, RDF.type, MusicOntology.MusicArtist))
            self.graph.add((trackuri, MusicOntology.performer, artisturi))
            self.graph.add(
                (artisturi, foaf.name, Literal(track['artist']['name'])))

        if isinstance(track['toptags'],
                      dict) and 'tag' in track['toptags'].keys():
            for tag in track['toptags']['tag']:
                if isinstance(tag, dict):
                    self.graph.add(
                        (trackuri, OurVocab.has_tag, Literal(tag['name'])))

    def addArtist(self, trackMBID, artistData, trackData):
        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID)

        #If there is no mbid, it means there is no earlier artist entry in triplestore
        if trackData['artist']['mbid'] == '':
            artisturi = URIRef(artistData['artist']['value'].encode('utf-8'))
            if artistData['artist']['type'] == 'artist':
                self.graph.add(
                    (artisturi, RDF.type, MusicOntology.MusicArtist))
            else:
                self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup))
            self.graph.add((trackuri, MusicOntology.performer, artisturi))
            self.graph.add(
                (artisturi, foaf.name,
                 Literal(trackData['artist']['name'].encode('utf-8'))))

        #if there is an artist entry, make sure the artist/band association is appropriate
        else:
            artisturi = URIRef('http://musicbrainz.org/artist/%s#_' %
                               trackData['artist']['mbid'])
            if artistData['artist']['type'] == "band" and\
                    (artisturi, RDF.type, MusicOntology.MusicArtist) in self.graph:
                self.graph.remove(
                    (artisturi, RDF.type, MusicOntology.MusicArtist))
                self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup))

        #now the location data!
        if 'hometown' not in artistData.keys():
            return

        if "http" in artistData['hometown']['value']:
            townuri = URIRef(artistData['hometown']['value'].encode('utf-8'))
            if (townuri, RDF.type, dbpediaowl.Place) not in self.graph:
                self.graph.add((townuri, RDF.type, dbpediaowl.Place))
                if "hometownName" in artistData.keys():
                    self.graph.add((townuri, foaf.name,
                                    Literal(artistData['hometownName']
                                            ['value'].encode('utf-8'))))
                if "coordinates" in artistData.keys():
                    self.graph.add((townuri, geo.geometry,
                                    Literal(artistData['coordinates']
                                            ['value'].encode('utf-8'))))
            self.graph.add((artisturi, dbpediaowl.hometown, townuri))
        else:
            self.graph.add((artisturi, dbpediaowl.hometown,
                            Literal(artistData['hometown']['value'])))

    def _matchAlbum(self, trackInfo, albumFiles):
        """
        A function to return the correct match of an album given a track.
        Deprecated for most cases where the match is done using mbids.
        Use only for cases where there is no mbid link betweeb album and track.
        """
        try:
            albumName = trackInfo['album']['name']
            artistName = trackInfo['artist']['name']
        except:
            return None

        for af in albumFiles:
            albumInfo = json.load(file(af))
            albumInfo = albumInfo['album']
            if albumName == albumInfo['name'] and artistName == albumInfo[
                    'artist']:
                return af

    def addAlbum(self, trackMBID, albumInfo):
        """
        A function to add album data into triple store. At the moment, only the releasedate is taken
        from the album data. More to be added soon.
        """
        try:
            albumInfo = albumInfo['album']
        except:
            return

        if 'releasedate' not in albumInfo.keys():
            return

        trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID)
        self.graph.add(
            (trackuri, OurVocab.has_releasedate,
             Literal(albumInfo['releasedate'].strip().encode('utf-8'))))

Exemple #52

0

Afficher le fichier

Fichier : mirna_base.py Projet : admukhty/IHP

class MirbaseDB(object):
    def __init__(self, db_path):
        self.g = ConjunctiveGraph()
        self.path = db_path
        self.choices = set()
        self.labels = {}

    def create_graph(self):
        self.g.open(self.path + "data.rdf", create=True)
        data = self.parse_mirbase(self.path)
        #g = ConjunctiveGraph(store="SPARQLUpdateStore")
        # g.bind()
        mirna_class = URIRef("http://purl.obolibrary.org/obo/SO_0000276")
        for mid in data:
            mirna_instance = URIRef(MIRBASE + data[mid]["acc"])
            self.g.add((mirna_instance, RDF.type, mirna_class))
            label = Literal(data[mid]["name"])
            self.g.add((mirna_instance, RDFS.label, label))
            description = Literal(data[mid]["description"])
            self.g.add((mirna_instance, RDFS.comment, description))
            for p in data[mid]["previous_names"]:
                if p.strip():
                    previous_name = Literal(p)
                    self.g.add((mirna_instance, MIRBASE["previous_acc"], previous_name))
            for mature in data[mid]["mature"]:
                mature_instance = URIRef(MIRBASE + data[mid]["mature"][mature]["acc"])
                self.g.add((mature_instance, RDF.type, mirna_class))
                mature_label = Literal(data[mid]["mature"][mature]["name"])
                self.g.add((mature_instance, RDFS.label, mature_label))
                for mature_p in data[mid]["mature"][mature]["previous_names"]:
                    if mature_p.strip():
                        mature_previous_name = Literal(mature_p)
                        self.g.add((mature_instance, MIRBASE["previous_acc"], mature_previous_name))
                self.g.add((mirna_instance, MIRBASE["stemloopOf"], mature_instance))


    def parse_mirbase(self, mirbase_root):
        mirna_dic = {}
        with open(mirbase_root + "mirna.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mname = props[2]
                mid = props[0]
                macc = props[1]
                mdesc = props[4]
                mprev = props[3].split(";")
                if int(props[-1]) != 22: # not h**o sapiens
                    continue
                mirna_dic[mid] = {}
                mirna_dic[mid]["name"] = mname
                mirna_dic[mid]["acc"] = macc
                mirna_dic[mid]["previous_names"] = mprev
                mirna_dic[mid]["description"] = mdesc
        mature_dic = {}
        with open(mirbase_root + "mirna_mature.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mname = props[1]
                mid = props[0]
                macc = props[3]
                # mdesc = props[4]
                mprev = props[2].split(";")
                if not mname.startswith("hsa-"): # not h**o sapiens
                    continue
                mature_dic[mid] = {}
                mature_dic[mid]["name"] = mname
                mature_dic[mid]["previous_names"] = mprev
                mature_dic[mid]["acc"] = macc
        with open(mirbase_root + "mirna_pre_mature.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mid, matureid = props[:2]
                if mid in mirna_dic:
                    if "mature" not in mirna_dic[mid]:
                        mirna_dic[mid]["mature"] = {}
                    mirna_dic[mid]["mature"][matureid] = mature_dic[matureid]
        # pp.pprint(mirna_dic)
        return mirna_dic

    def map_label(self, label):
        label = label.lower()
        label = label.replace("microrna", "mir")
        label = label.replace("mirna", "mir")
        if not label.startswith("hsa-"):
            label = "hsa-" + label

        result = process.extractOne(label, self.choices)
        # result = process.extract(label, choices, limit=3)
        """if result[1] != 100:
            print
            print "original:", label.encode("utf-8"), result
            # if label[-1].isdigit():
            #     label += "a"
            # else:
            new_label = label + "-1"
            revised_result = process.extractOne(new_label, self.choices)
            if revised_result[1] != 100:
                new_label = label + "a"
                revised_result = process.extractOne(new_label, self.choices)
            if revised_result[1] > result[1]:
                result = revised_result
                print "revised:", label.encode("utf-8"), result"""

        return result


    def load_graph(self):
        self.g.load(self.path + "data.rdf")
        # print "Opened graph with {} triples".format(len(self.g))
        self.get_label_to_acc()
        self.choices = self.labels.keys()

    def get_label_to_acc(self):
        for subj, pred, obj in self.g.triples((None, RDFS.label, None)):
            self.labels[str(obj)] = str(subj)
        for subj, pred, obj in self.g.triples((None, RDFS.label, None)):
            self.labels[str(obj)] = str(subj)

    def save_graph(self):
        self.g.serialize(self.path + "data.rdf", format='pretty-xml')
        print('Triples in graph after add: ', len(self.g))
        self.g.close()

Exemple #53

0

Afficher le fichier

def convert(teifile, namespace):
    #graph_uri = "http://contextus.net/resource/blue_velvet/"

    ns = Namespace(namespace)

    graph = ConjunctiveGraph()
    graph.load(teifile, format="rdfa")

    graph.bind("default", ns)

    to_update = ""

    for prefix, nsuri in graph.namespaces():
        #print("prefix: " + str(prefix) + " - " + str(nsuri))
        if nsuri in ns:
            to_update = nsuri

    for s, p, o in graph:
        #    		print s, p, o
        if to_update != "" and to_update in s:
            graph.remove((s, p, o))
            s = URIRef(s.replace(to_update, ns))
            graph.add((s, p, o))

    act = ""
    scene = ""
    line = ""
    char = 0
    loc = 0

    #timeline = ns['timeline/narrative']
    #graph.add((timeline, RDF.type, ome['Timeline']))

    tree = ET.parse(teifile)
    cast = dict()

    titleNode = tree.find('//title')

    castItems = tree.findall('/text/body/div1/castList//castItem')
    for castItem in castItems:
        actorNode = castItem.find('actor')
        roleNode = castItem.find('role')

        if roleNode != None:
            id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")

        #print("Found castItem!")

        actor = None
        role = None

        # Check to see if we already have an entry
        if (roleNode != None and roleNode.get("about")):

            charname = roleNode.get("about")

            if (charname.find(":") > -1):
                nmsp, nom = charname.split(":", 1)
                charcode = "character/" + str(char)
                charref = nmsp + ":" + charcode + "]"
                role = extractCURIEorURI(graph, charref, nom[0:-1])
                char += 1
                #print("1:" + charname + ": adding id " + id + " to " + role)
            else:
                role = extractCURIEorURI(graph, charname)
                #print("2:" + charname + ": adding id " + id + " to " + role)

            cast[id] = role
            graph.add((role, RDF.type, omb['Character']))
            #print(charname + ": adding id " + id + " to " + role)

        if (actorNode != None and actorNode.get("about")):
            actor = extractCURIEorURI(graph, actorNode.get("about"))
            graph.add((actor, RDF.type, omb['Being']))

        if actor != None and role != None:
            graph.add((actor, omb['portrays'], role))
            graph.add((role, omb['portrayed-by'], actor))

    eventCount = 1
    groupCount = 1
    prior_event = None

    actItems = tree.findall('/text/body/div1')
    ref = ""

    for actItem in actItems:

        if actItem.get("type") == "act":
            act = actItem.get("n")

        sceneItems = actItem.findall('div2')

        for sceneItem in sceneItems:

            #print("Found sceneItems!")

            if sceneItem.get("type") == "scene":
                scene = sceneItem.get("n")

            # Work out the location of this scene
            location = None
            stageItems = sceneItem.findall("stage")

            #internalnum = 1
            stagenum = 0
            speechnum = 1

            for stageItem in stageItems:
                if stageItem.get("type") == "location":
                    # The RDFa parser doesn't handle the type - so we can grab that here.

                    if stageItem.get("about") != None:
                        locname = stageItem.get("about")

                        # Adding location type/oml:space for location
                        if stageItem.get("typeof") and stageItem.get("about"):
                            type = extractCURIEorURI(graph,
                                                     stageItem.get("typeof"))
                            #print "1. Location: " + str(location) + " Type: " + str(type)
                        elif stageItem.get("about"):
                            #print "2. Location: " + str(locname)
                            type = extractCURIEorURI(graph, oml['Space'])

                        # Get location value and add rdfs:label is location is not using the TEI value
                        if (locname.find(":") > -1):
                            nmsp, nom = locname.split(":", 1)
                            loccode = "location/" + str(loc)
                            locref = nmsp + ":" + loccode + "]"
                            location = extractCURIEorURI(
                                graph, locref, nom[0:-1])
                            loc += 1
                            graph.add((
                                location,
                                rdflib.URIRef(
                                    'http://www.w3.org/2000/01/rdf-schema#label'
                                ), Literal(nom[0:-1])))
                        else:
                            location = extractCURIEorURI(
                                graph, stageItem.get("about"))

                        # Add location to graph
                        graph.add((location, RDF.type, type))
                    else:
                        location = ""

                    #print("Adding location type: " + type + " (" + location + ")")

            if cast:
                # Work out a list of all cast in a given section
                currentCast = list()
                speakers = list()

            # Iterate through elements within stageItem
            # Find speaker events and add to list of current cast for inclusion in social event
            # Find reference events and add to ongoing social event ?
            # Find stage events
            # If event is an entrance then
            # create social event for people talking before entrance
            # create travel event i.e. entrance
            # add new arrival to current cast list
            # If event is exit event then
            # create social event for people talking before exit
            # create travel event i.e. exit
            # if leavers are not named directly the calculate who is leaving
            # remove leavers from current cast list
            # If reach end of scene then create social event with current cast list

            #Also need to check if social event before exit has same composition as social event after exit since then they should be merged

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            refersTo = list()
            #parent = None
            speakerNodes = list()
            speakerRef = list()

            xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(
                perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
            stagecount = 0
            stage_array = list()

            for node in sceneItem.getiterator():
                #print("Node: " + node.tag)
                """
				if node.tag == "lb":
					if node.get("ed") == "F1":
						line = node.get("n")	
						if titleNode != None:
							ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line)	
						else:
							ref = str(act) + "." + str(scene) + "." + str(line)
							
						#xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line)	 + "'])"
						xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:"  + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene)
						#print("Ref: " + xpointer)
				"""

                if node.tag == "sp":
                    id = node.get("who")

                    if id and cast:
                        speakers.append(cast[id[1:]])
                        speakerNodes.append(node)

                        if perseusid == None:
                            speakerRef.append(ref)
                        else:
                            #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)"
                            speechRef = xpointer + "#xpointer(//div2/sp[" + str(
                                speechnum) + "])"
                            speakerRef.append(speechRef)
                        #print("Line ref: " + ref)

                        if cast[id[1:]] not in currentCast:
                            currentCast.append(cast[id[1:]])

                    #internalnum = 1
                    speechnum += 1
                    stagecount = 0

                    previousl = 0

                    for subnode in node.getiterator():
                        if subnode.tag == "l":
                            previousl += 1

                        if subnode.tag == "stage":
                            #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n")
                            stage_array.append(previousl)
                            stagecount += 1

                elif node.tag == "stage":

                    if stagecount > 0:
                        s_max = len(stage_array)
                        diff = s_max - stagecount

                        #if diff == 0:
                        #	stagenum += 1

                        entRef = xpointer + "#xpointer(//div2/sp[" + str(
                            speechnum - 1) + "]/l[" + str(
                                stage_array[diff]) + "]/stage)"
                        #internalnum += 1
                        stagecount -= 1
                    else:
                        stagenum += 1
                        entRef = xpointer + "#xpointer(//div2/stage[" + str(
                            stagenum) + "])"

                    if node.get("type") == "entrance":

                        # Add Social Events for all the people who spoke since the last break (if there were any)

                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)"
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                        #print("Found entrence event!")
                        if location:
                            graph.add((event, ome['to'], location))

                        involved = node.get("about")

                        if (len(involved) > 0 and involved[0] == "["
                                and involved[-1] == "]"):
                            involved = involved[1:-1]

                        chunks = involved.split()

                        chunk_count = len(chunks)

                        if chunk_count > 1:
                            #type = extractCURIEorURI(graph, "[omb:Group]")
                            #graph.add((group, RDF.type, type))
                            graph.add((group, RDF.type, omb['Group']))

                        event_label = ""
                        en = 1

                        for chunk in chunks:
                            striped = chunk.strip()

                            if (len(striped) > 0 and striped[0] == "["
                                    and striped[-1] == "]"):
                                striped = striped[1:-1]
                                currentCast.append(cast[striped])

                            if chunk_count > 1:
                                graph.add(
                                    (group, ome['contains'], cast[striped]))

                                if en == chunk_count:
                                    event_label = event_label[
                                        0:-2] + " and " + striped
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(event_label + " arrive")))
                                elif en < chunk_count:
                                    event_label += striped + ", "

                            else:
                                #print("Adding person as subject-entity to entry event "   + str(eventCount))
                                graph.add((
                                    event,
                                    rdflib.URIRef(
                                        'http://www.w3.org/2000/01/rdf-schema#label'
                                    ), Literal(striped + " arrives")))
                                graph.add((event, ome['has-subject-entity'],
                                           cast[striped]))

                            en += 1

                        if chunk_count > 1:
                            graph.add(
                                (event, ome['has-subject-entity'], group))
                            #print("Adding group as subject-entity to entry event "   + str(eventCount))
                            groupCount = groupCount + 1
                            group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                    if node.get("type") == "exit":

                        # Add Social Events for all the people who spoke since the last break (if there were any)
                        update = list()
                        update = getSocial(graph, ns, speakers, speakerNodes,
                                           speakerRef, cast, currentCast,
                                           eventCount, event, prior_event,
                                           location)
                        eventCount = update[0]
                        prior_event = update[1]

                        event = ns['event/' + str(eventCount)]

                        speakers = list()
                        speakerNodes = list()
                        speakerRef = list()

                        # Add Travel Event

                        graph.add((event, RDF.type, omj['Travel']))

                        if perseusid == None:
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), Literal(ref)))
                        else:
                            #exitRef = xpointer
                            #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef)))
                            graph.add((
                                event,
                                rdflib.URIRef(
                                    "http://www.w3.org/2000/01/rdf-schema#seeAlso"
                                ), URIRef(entRef)))

                        #print("Found entrence event!")
                        if location != None:
                            graph.add((event, ome['from'], location))

                        involved = node.get("about")

                        if involved.strip() == "" or "-all" in involved:
                            # Remove everyone

                            #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #for peep in currentCast:
                            #	print(peep)

                            if len(currentCast) > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for peep in currentCast:
                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if peep == value:
                                        short_ref = key

                                if len(currentCast) > 1:
                                    graph.add((group, ome['contains'], peep))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exuant event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         peep))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if len(currentCast) > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exuant event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                            currentCast = list()

                        elif "!" in involved:
                            #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            #print("Event: " + involved);

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            involved = involved.strip()

                            if (len(involved) > 0 and involved[0] == "!"
                                    and involved[1] == "("
                                    and involved[-1] == ")"):
                                involved = involved[2:-1]

                            #print("involved: " + involved)

                            striped = involved.strip()

                            c_ids = striped.split()

                            chunks = list()

                            for stay in c_ids:
                                #print("Staying: " + cast[stay])
                                chunks.append(cast[stay])

                            staying = list()
                            going = list()

                            for player in currentCast:
                                #print("Player: " + player)
                                if player in chunks:
                                    staying.append(player)
                                else:
                                    going.append(player)

                            going_count = len(going)

                            if going_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for ghost in going:
                                #print("ghost: " + ghost)

                                short_ref = ""
                                for key, value in cast.iteritems():
                                    if ghost == value:
                                        short_ref = key

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if going_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(going):
                                        event_label = event_label[
                                            0:-2] + " and " + short_ref
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(going):
                                        event_label += short_ref + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(short_ref + " leaves")))

                                en += 1

                            if going_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        else:
                            #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: "  + str(eventCount) + ", current cast count: "  + str(len(currentCast)))

                            if (len(involved) > 0 and involved[0] == "["
                                    and involved[-1] == "]"):
                                involved = involved[1:-1]

                            striped = involved.strip()
                            chunks = striped.split()

                            #print("striped: " + striped)

                            chunk_count = len(chunks)

                            if chunk_count > 1:
                                #type = extractCURIEorURI(graph, "[omb:Group]")
                                #graph.add((group, RDF.type, type))
                                graph.add((group, RDF.type, omb['Group']))

                            event_label = ""
                            en = 1

                            for chunk in chunks:
                                #print("chunk: " + chunk)

                                ghost = cast[chunk]

                                #print("ghost: " + ghost)

                                if ghost in currentCast:
                                    currentCast.remove(ghost)
                                    #print("Current cast count: "  + str(len(currentCast)))

                                if chunk_count > 1:
                                    graph.add((group, ome['contains'], ghost))

                                    if en == len(currentCast):
                                        event_label = event_label[
                                            0:-2] + " and " + chunk
                                        graph.add((
                                            event,
                                            rdflib.URIRef(
                                                'http://www.w3.org/2000/01/rdf-schema#label'
                                            ),
                                            Literal(event_label + " leave")))
                                    elif en < len(currentCast):
                                        event_label += chunk + ", "

                                else:
                                    #print("Adding person as subject-entity to exit event "   + str(eventCount))
                                    graph.add(
                                        (event, ome['has-subject-entity'],
                                         ghost))
                                    graph.add((
                                        event,
                                        rdflib.URIRef(
                                            'http://www.w3.org/2000/01/rdf-schema#label'
                                        ), Literal(chunk + " leaves")))

                                en += 1

                            if chunk_count > 1:
                                graph.add(
                                    (event, ome['has-subject-entity'], group))
                                #print("Adding group as subject-entity to exit event "   + str(eventCount))
                                groupCount = groupCount + 1
                                group = ns['group/' + str(groupCount)]

                        if (prior_event):
                            graph.add((event, ome['follows'], prior_event))
                            graph.add((prior_event, ome['precedes'], event))

                        prior_event = event

                        eventCount = eventCount + 1
                        event = ns['event/' + str(eventCount)]

                #elif node.tag == "rs":
                #	#print("Found rs node")
                #	if parent:
                #		#print("Parent type is " + parent.tag)
                #		if parent.tag == "p" or  parent.tag == "l":
                #			refersTo.append(node.get("about"))

                #parent = node

            # Add Social Events for all the people who spoke since the last break (if there were any)
            #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers)))
            update = list()
            update = getSocial(graph, ns, speakers, speakerNodes, speakerRef,
                               cast, currentCast, eventCount, event,
                               prior_event, location)
            eventCount = update[0]
            prior_event = update[1]

            event = ns['event/' + str(eventCount)]
            group = ns['group/' + str(groupCount)]

            speakers = list()
            speakerNodes = list()
            currentCast = list()
            speakerRef = list()

    print graph.serialize(format='xml')

Exemple #54

0

Afficher le fichier

def convert(teifile, namespace):
	#graph_uri = "http://contextus.net/resource/blue_velvet/"
	ns = Namespace(namespace)

	graph = ConjunctiveGraph()
	graph.load(teifile, format="rdfa")

	tree = ET.parse(teifile)
	cast = dict()
	castItems = tree.findall('/text/body/div1/castList//castItem')
	for castItem in castItems:
		actorNode = castItem.find('actor')
		roleNode = castItem.find('role')
		id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id")
		
		#print("Found castItem!")

		actor = None
		role = None

		# Check to see if we already have an entry
		if(roleNode != None and roleNode.get("about")):
			role = extractCURIEorURI(graph, roleNode.get("about"))
			cast[id] = role
			graph.add((role, RDF.type, omb['Character']))
			#print("Adding id " + id + " to " + role)
		
		if(actorNode != None and actorNode.get("about")):
			actor = extractCURIEorURI(graph, actorNode.get("about"))
			graph.add((actor, RDF.type, omb['Being']))

		if actor != None and role != None:
			graph.add((actor, omb['portrays'], role))
			graph.add((role, omb['portrayed-by'], actor))

	eventCount = 1
	prior_event = None
	sceneItems = tree.findall('/text/body/div1/div2')
	for sceneItem in sceneItems:
		
		#print("Found sceneItems!")
		
		# Work out the location of this scene
		location = None
		stageItems = sceneItem.findall("stage")
		for stageItem in stageItems:
			if stageItem.get("type") == "location":
				# The RDFa parser doesn't handle the type - so we can grab that here.
				if stageItem.get("typeof") and stageItem.get("about"):
					type = extractCURIEorURI(graph, stageItem.get("typeof"))
					location = extractCURIEorURI(graph, stageItem.get("about"))
					graph.add((location, RDF.type, type))
				elif stageItem.get("about"):
					type = extractCURIEorURI(graph, "[loc:Space]")
					location = extractCURIEorURI(graph, stageItem.get("about"))
					graph.add((location, RDF.type, type))		
					
				#print("Adding location type: " + type + " (" + location + ")")


		if cast:
			# Work out a list of all cast in a given section
			currentCast = list()
			previousCast = list()

		# Iterate through elements within stageItem
			# Find speaker events and add to list of current cast for inclusion in social event
			# Find reference events and add to ongoing social event ?
			# Find stage events
				# If event is an entrance then
					# create social event for people talking before entrance
					# create travel event i.e. entrance
					# add new arrival to current cast list
				# If event is exit event then
					# create social event for people talking before exit
					# create travel event i.e. exit
						# if leavers are not named directly the calculate who is leaving
					# remove leavers from current cast list
			# If reach end of scene then create social event with current cast list
			
			#Also need to check if social event before exit has same composition as social event after exit since then they should be merged
			
		event = ns['event/'+str(eventCount)]
		group = ns['group/'+str(eventCount)]				
					
					
		for node in sceneItem.getiterator():
			#print("Node: " + node.tag)	
			if(node.tag == "sp")
				id = speechItem.get("who")
				if id and cast:
					currentCast.append(cast[id[1:]])
			elif(node.tag == "stage")	
				if node.get("type") == "entrance":		
				
					# Add Social Event if there are people in the CurrentCast list
				
				
					# Add Travel Event
					graph.add((event, RDF.type, omj['Travel']))

					#print("Found entrence event!")
					if location:
						graph.add((event, ome['to'], location))		
						
					involved = stageItem.get("about")
					
					if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"):
						involved = involved[1:-1]
						
					chunks = involved.split()
					
					chunk_count = len(chunks)
					
					if chunk_count > 1:
						type = extractCURIEorURI(graph, "[omb:Group]")
						graph.add((group, RDF.type, type))		
					
					for chunk in chunks:
						striped = chunk.strip()
						peep = extractCURIEorURI(graph, striped)
						
						if chunk_count > 1:
							graph.add((group, ome['contains'], peep))
						else:
							graph.add((event, ome['has-subject-entity'], peep))
						
					if chunk_count > 1:
						graph.add((event, ome['has-subject-entity'], group))
	
					if(prior_event):
						graph.add((event, ome['follows'], prior_event))
						graph.add((prior_event, ome['precedes'], event))
	
					prior_event = event					

					eventCount = eventCount + 1
					event = ns['event/'+str(eventCount)]
					group = ns['group/'+str(eventCount)]				
		
	

				
	print graph.serialize(format='xml')

Exemple #55

0

Afficher le fichier

    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
    @prefix : <tag://example.org,2007/literals-test#> .

    <http://example.org/thing>
        :plain "plain";
        :integer 1;
        :float 1.1e0;
        :decimal 1.1 ; 
        :string "string"^^xsd:string;
        :date "2007-04-28"^^xsd:date;
        :escape "a \\"test\\"";
        rdfs:label "Thing"@en, "Sak"@sv .
"""
graph = ConjunctiveGraph()
graph.load(StringIO(testRdf), format='n3')

PROLOGUE = """
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX t: <tag://example.org,2007/literals-test#>
"""

thing = URIRef("http://example.org/thing")

SPARQL = PROLOGUE + " SELECT ?uri WHERE { ?uri %s . } "
TEST_DATA = [('plain', SPARQL % 't:plain "plain"', [(thing, )]),
             ('integer', SPARQL % 't:integer 1', [(thing, )]),
             ('decimal', SPARQL % 't:decimal 1.1', [(thing, )]),
             ('float', SPARQL % 't:float 1.1e0', [(thing, )]),
             ('langlabel_en', SPARQL % 'rdfs:label "Thing"@en', [(thing, )]),

Exemple #56

0

Afficher le fichier

Fichier : mma2RDF.py Projet : apassant/motools

def mma2RDF(infilename, outfilename, format="xml", audiofilename=None, withdescriptions=False):
    if withdescriptions:
        commonchords = ConjunctiveGraph()
        commonchords.load("CommonChords.rdf")
        extrachords = ConjunctiveGraph()

        # Compile mma file and grab output
    lines = os.popen(mmabin + ' "' + infilename + '" -nrw').readlines()
    print "\n".join(lines)

    #
    # Initial model bits
    #
    mi = mopy.MusicInfo()

    homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools")
    mi.add(homepage)
    program = mopy.foaf.Agent()
    program.name = "mma2RDF.py"
    program.homepage = homepage
    mi.add(program)

    tl = TimeLine("#tl")
    tl.label = "Timeline derived from " + infilename
    tl.maker = program
    mi.add(tl)

    # extract tempo from mma file
    tempo = 60
    mmafile = open(infilename, "r")
    for line in mmafile:
        if line.startswith("Tempo "):
            tempo = int(line[len("Tempo ") :].strip().split()[0])
            print "Found tempo = " + str(tempo)
            break

    lineNum = 1
    thisBar_i = None
    i = None
    t_secs = 0.0
    for line in lines:
        print "parsing line " + str(lineNum)
        try:
            # i = None
            barNum = getBarNum(line)

            lastBar_i = thisBar_i
            thisBar_i = Interval("#i_" + str(barNum))
            thisBar_i.beginsAtDuration = secondsToXSDDuration(t_secs)
            if lastBar_i != None:
                lastBar_i.endsAtDuration = secondsToXSDDuration(t_secs)
                thisBar_i.intervalAfter = lastBar_i
                lastBar_i.intervalBefore = thisBar_i
            mi.add(thisBar_i)

            chordMMASymbols = getChordSymbols(line)
            beatNum = 1
            for chordMMASymbol in chordMMASymbols:
                if chordMMASymbol != "/":
                    print " handling new chord symbol"
                    if i != None:
                        print " terminating last interval"
                        i.endsAtDuration = secondsToXSDDuration(t_secs)
                        mi.add(i)

                    i = Interval("#i_" + str(barNum) + "_" + str(beatNum))
                    i.onTimeLine = tl
                    i.beginsAtDuration = secondsToXSDDuration(t_secs)

                    chordURI = "http://purl.org/ontology/chord/symbol/" + mmaSymbolToChordSymbol(
                        chordMMASymbol
                    ).replace("#", "s").replace(",", "%2C")

                    if (
                        withdescriptions
                        and len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0
                        and len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0
                    ):
                        # Deref to grab chord info
                        print "loading <" + chordURI + ">..."
                        extrachords.load(chordURI)

                    c = Chord(chordURI)
                    c_event = ChordEvent("#ce_" + str(barNum) + "_" + str(beatNum))
                    c_event.chord = c
                    c_event.time = i
                    c_event.label = mmaSymbolToChordSymbol(chordMMASymbol)

                    mi.add(c)
                    mi.add(c_event)
                    mi.add(i)
                    print " added new chord event for " + chordURI
                else:
                    if beatNum == 1:  # Need to continue the last seen chord
                        print " continuing last bar's chord"
                        # i = Interval("i_"+str(barNum)+"_"+str(beatNum))
                        # i.onTimeLine = tl
                        # i.beginsAtDuration = secondsToXSDDuration(t_secs)
                        # c_event = ChordEvent("ce_"+str(barNum)+"_"+str(beatNum))
                        # c_event.chord = c
                        # c_event.time = i
                        # mi.add(c_event); mi.add(i)

                beatNum += 1
                t_secs += 60.0 / tempo
        except Exception, e:
            print ("ERROR : Problem parsing input file at line " + str(lineNum) + " !\n")
            raise
        lineNum += 1

Exemple #57

0

Afficher le fichier

class PreProcessor(object):
    def __init__(self, kg_path):
        self.kg_path = kg_path
        self.ent_dict = dict()
        self.rel_dict = dict()
        self.g = ConjunctiveGraph()
        self.unique_msgs = self.ent_dict.copy()

    def load_knowledge_graph(self,
                             format='xml',
                             exclude_rels=[],
                             clean_schema=True,
                             amberg_params=None,
                             excluded_entities=None):
        self.g.load(self.kg_path, format=format)
        # remove triples with excluded relation
        remove_rel_triples(self.g, exclude_rels)
        # remove triples with relations between class-level constructs
        if clean_schema:
            remove_rel_triples(self.g, schema_relations)
        if excluded_entities is not None:
            remove_ent_triples(self.g, excluded_entities)
        if amberg_params:
            path_to_events = amberg_params[0]
            max_events = amberg_params[1]
            self.merged = get_merged_dataframe(path_to_events, max_events)
            self.unique_msgs, unique_vars, unique_mods, unique_fes = get_unique_entities(
                self.merged)
            update_amberg_ontology(self.g, self.ent_dict, self.unique_msgs,
                                   unique_mods, unique_fes, unique_vars,
                                   self.merged)

        self.update_entity_relation_dictionaries()

    def update_entity_relation_dictionaries(self):
        """
        Given an existing entity dictionary, update it to *ontology*
        :param ontology:
        :param ent_dict: the existing entity dictionary
        :return:
        """
        ent_counter = 0
        fixed_ids = set([id for id in self.ent_dict.values()])
        # sorting ensures equal random splits on equal seeds
        for h in sorted(
                set(self.g.subjects(None, None)).union(
                    set(self.g.objects(None, None)))):
            uni_h = unicode(h)
            if uni_h not in self.ent_dict:
                while ent_counter in fixed_ids:
                    ent_counter += 1
                self.ent_dict.setdefault(uni_h, ent_counter)
                ent_counter += 1
        # add new relations to dict
        for r in sorted(set(self.g.predicates(None, None))):
            uni_r = unicode(r)
            if uni_r not in self.rel_dict:
                self.rel_dict.setdefault(uni_r, len(self.rel_dict))

    def load_unique_msgs_from_txt(self, path, max_events=None):
        """
        Assuming csv text files with two columns
        :param path:
        :return:
        """
        with open(path, "rb") as f:
            for line in f:
                split = line.split(',')
                try:
                    emb_id = int(split[1].strip())
                except:
                    print("Error reading id of {0} in given dictionary".format(
                        line))
                    # skip this event entitiy, treat it as common entitiy later on
                    continue
                self.ent_dict[split[0]] = emb_id
        # sort ascending w.r.t. embedding id, in case of later stripping
        # self.ent_dict = sorted(self.ent_dict.items(), key=operator.itemgetter(1), reverse=False)
        self.unique_msgs = self.ent_dict.copy()
        if max_events is not None:
            all_msgs = sorted(self.unique_msgs.items(),
                              key=operator.itemgetter(1),
                              reverse=False)
            self.unique_msgs = dict(all_msgs[:max_events])
            excluded_events = dict(all_msgs[max_events:]).keys()
            return excluded_events

    def prepare_sequences(self, path_to_input, use_dict=True):
        """
        Dumps pickle for sequences and dictionary
        :param data_frame:
        :param file_name:
        :param index:
        :param classification_event:
        :return:
        """
        print("Preparing sequential data...")
        with open(path_to_input, "rb") as f:
            result = []
            for line in f:
                entities = line.split(',')
                if use_dict:
                    result.append([
                        int(e.strip()) for e in entities
                        if int(e.strip()) in self.unique_msgs.values()
                    ])
                else:
                    result.append([int(e.strip()) for e in entities])
        print("Processed {0} sequences".format(len(result)))
        return result

    def get_vocab_size(self):
        return len(self.unique_msgs)

    def get_ent_dict(self):
        return self.ent_dict

    def get_rel_dict(self):
        return self.rel_dict

    def get_kg(self):
        return self.g

    def get_unique_msgs(self):
        return self.unique_msgs

    def get_merged(self):
        return self.merged

Exemple #58

0

Afficher le fichier

Fichier : test_dawg.py Projet : ericpeden/rdflib

def query_test(t):
    uri, name, comment, data, graphdata, query, resfile, syntax = t

    # the query-eval tests refer to graphs to load by resolvable filenames
    rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True

    if uri in skiptests:
        raise SkipTest()

    def skip(reason="(none)"):
        print "Skipping %s from now on." % uri
        f = open("skiptests.list", "a")
        f.write("%s\t%s\n" % (uri, reason))
        f.close()

    try:
        g = ConjunctiveGraph()
        if data:
            g.default_context.load(data, format=_fmt(data))

        if graphdata:
            for x in graphdata:
                g.load(x, format=_fmt(x))

        if not resfile:
            # no result - syntax test

            if syntax:
                translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, "."))
            else:
                # negative syntax test
                try:
                    translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, "."))

                    assert False, "Query should not have parsed!"
                except:
                    pass  # it's fine - the query should not parse
            return

        # eval test - carry out query
        res2 = g.query(open(query[7:]).read(), base=urljoin(query, "."))

        if resfile.endswith("ttl"):
            resg = Graph()
            resg.load(resfile, format="turtle", publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith("rdf"):
            resg = Graph()
            resg.load(resfile, publicID=resfile)
            res = RDFResultParser().parse(resg)
        elif resfile.endswith("srj"):
            res = Result.parse(open(resfile[7:]), format="json")
        elif resfile.endswith("tsv"):
            res = Result.parse(open(resfile[7:]), format="tsv")

        elif resfile.endswith("csv"):
            res = Result.parse(open(resfile[7:]), format="csv")

            # CSV is lossy, round-trip our own resultset to
            # lose the same info :)

            # write bytes, read strings...
            s = BytesIO()
            res2.serialize(s, format="csv")
            print s.getvalue()
            s = StringIO(s.getvalue().decode("utf-8"))  # hmm ?
            res2 = Result.parse(s, format="csv")

        else:
            res = Result.parse(open(resfile[7:]), format="xml")

        if not DETAILEDASSERT:
            eq(res.type, res2.type, "Types do not match")
            if res.type == "SELECT":
                eq(set(res.vars), set(res2.vars), "Vars do not match")
                comp = bindingsCompatible(set(res), set(res2))
                assert comp, "Bindings do not match"
            elif res.type == "ASK":
                eq(res.askAnswer, res2.askAnswer, "Ask answer does not match")
            elif res.type in ("DESCRIBE", "CONSTRUCT"):
                assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!"
            else:
                raise Exception("Unknown result type: %s" % res.type)
        else:
            eq(res.type, res2.type, "Types do not match: %r != %r" % (res.type, res2.type))
            if res.type == "SELECT":
                eq(set(res.vars), set(res2.vars), "Vars do not match: %r != %r" % (set(res.vars), set(res2.vars)))
                assert bindingsCompatible(set(res), set(res2)), "Bindings do not match: \n%s\n!=\n%s" % (
                    res.serialize(format="txt", namespace_manager=g.namespace_manager),
                    res2.serialize(format="txt", namespace_manager=g.namespace_manager),
                )
            elif res.type == "ASK":
                eq(
                    res.askAnswer,
                    res2.askAnswer,
                    "Ask answer does not match: %r != %r" % (res.askAnswer, res2.askAnswer),
                )
            elif res.type in ("DESCRIBE", "CONSTRUCT"):
                assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!"
            else:
                raise Exception("Unknown result type: %s" % res.type)

    except Exception, e:

        if isinstance(e, AssertionError):
            failed_tests.append(uri)
            fails[str(e)] += 1
        else:
            error_tests.append(uri)
            errors[str(e)] += 1

        if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
            print "======================================"
            print uri
            print name
            print comment

            if not resfile:
                if syntax:
                    print "Positive syntax test"
                else:
                    print "Negative syntax test"

            if data:
                print "----------------- DATA --------------------"
                print ">>>", data
                print open(data[7:]).read()
            if graphdata:
                print "----------------- GRAPHDATA --------------------"
                for x in graphdata:
                    print ">>>", x
                    print open(x[7:]).read()

            print "----------------- Query -------------------"
            print ">>>", query
            print open(query[7:]).read()
            if resfile:
                print "----------------- Res -------------------"
                print ">>>", resfile
                print open(resfile[7:]).read()

            try:
                pq = parseQuery(open(query[7:]).read())
                print "----------------- Parsed ------------------"
                pprintAlgebra(translateQuery(pq, base=urljoin(query, ".")))
            except:
                print "(parser error)"

            print decodeStringEscape(unicode(e))

            import pdb

            pdb.post_mortem(sys.exc_info()[2])
            # pdb.set_trace()
            # nose.tools.set_trace()
        raise