Example #1
0
 def construct(self, strOrTriple, initBindings={}, initNs={}):
     """
     Executes a SPARQL Construct
     :param strOrTriple: can be either
     
       * a string in which case it it considered a CONSTRUCT query
       * a triple in which case it acts as the rdflib `triples((s,p,o))`
     
     :param initBindings:  A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query)
     :param initNs:  A mapping from a namespace prefix to a namespace
     
     :returns: an instance of rdflib.ConjuctiveGraph('IOMemory')
     """
     if isinstance(strOrTriple, str):
         query = strOrTriple
         if initNs:
             prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()])
             query = prefixes + query
     else:
         s,p,o = strOrTriple
         t='%s %s %s'%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o'))
         query='construct {%s} where {%s}'%(t,t)
     query = dict(query=query)
     
     url = self.url+"?"+urlencode(query)
     req = Request(url)
     req.add_header('Accept','application/rdf+xml')
     log.debug("Request url: %s\n  with headers: %s" % (req.get_full_url(), req.header_items()))        
     subgraph = ConjunctiveGraph('IOMemory')
     subgraph.parse(urlopen(req))
     return subgraph
Example #2
0
    def generate(cls, utensils):
        graph = ConjunctiveGraph()
        load_rdf_file(STORE['actions'], graph)

        for utensil in utensils:
            for action in utensil.actions:
                map(rdfSubject.db.add, graph.triples((action.resUri, None, None)))
Example #3
0
    def __call__(self, url, **kwargs):

        if not url:
            return []

        graph = ConjunctiveGraph()
        graph.parse(url)
        output = {}

        for subject, predicate, context in graph:
            key = self.strip(subject)
            prop = self.strip(predicate)
            value = self.defrag(context)

            output.setdefault(key, {
                'label': key,
                'uri': unicode(subject)
            })

            if prop in output[key]:
                old = output[key][prop]
                if not isinstance(old, list):
                    output[key][prop] = [old]
                output[key][prop].append(value)
            else:
                output[key][prop] = value

        return output.values()
Example #4
0
def _test_serializer(inputpath, expectedpath, context, serpar):
    test_tree, test_graph = _load_test_data(inputpath, expectedpath, context)

    if isinstance(test_tree, ConjunctiveGraph):
        expected = test_tree.serialize(format="json-ld")
    else:
        expected = _to_json(_to_ordered(test_tree))

    if test_graph is not None:
        # toRdf, expected are nquads
        result_tree = to_tree(test_graph, context_data=context)
        result = _to_json(_to_ordered(result_tree))

    elif inputpath.startswith('fromRdf'):
        # fromRdf, expected in json-ld
        g = ConjunctiveGraph()
        data = open(p.join(test_dir, inputpath), 'rb').read()
        g.parse(data=data, format="nquads", context=context)
        result = g.serialize(format="json-ld", base=context)

    else:
        # json
        f = open(p.join(test_dir, inputpath), 'rb')
        result = json.load(f)[0]
        f.close()

    if isinstance(result, ConjunctiveGraph):
        assert isomorphic(result, expected), \
            "Expected graph of %s:\n%s\nGot graph of %s:\n %s" % (
                expected.serialize(format='n3'),
                result.serialize(format='n3'))
    else:
        assert jsonld_compare(expected, result) == True, \
                "Expected JSON:\n%s\nGot:\n%s" % (expected, result)
Example #5
0
def ConvertToRDFN3 (filename, destinationFileName):
    _graph = ConjunctiveGraph()
    _graph.parse(filename, format="nt")

    of = open(destinationFileName, "wb")
    of.write(_graph.serialize(format="n3"))
    of.close()
Example #6
0
    def discussion_as_graph(self, discussion_id):
        self.ensure_discussion_storage(None)
        from assembl.models import Discussion
        d_storage_name = self.discussion_storage_name()
        d_graph_iri = URIRef(self.discussion_graph_iri())
        v = get_virtuoso(self.session, d_storage_name)
        discussion_uri = URIRef(
            Discussion.uri_generic(discussion_id, self.local_uri()))
        subjects = list(v.query(
            """SELECT DISTINCT ?s WHERE {
            ?s assembl:in_conversation %s }""" % (discussion_uri.n3())))
        subjects.append([discussion_uri])
        # print len(subjects)
        cg = ConjunctiveGraph(identifier=d_graph_iri)
        for (s,) in subjects:
            # Absurdly slow. DISTINCT speeds up a lot, but I get numbers.
            for p, o in v.query(
                'SELECT ?p ?o WHERE { graph %s { %s ?p ?o }}' % (
                        d_graph_iri.n3(), s.n3())):
                    cg.add((s, p, o))

        for (s, o, g) in v.query(
                '''SELECT ?s ?o ?g WHERE {
                GRAPH ?g {?s catalyst:expressesIdea ?o } .
                ?o assembl:in_conversation %s }''' % (discussion_uri.n3())):
            cg.add((s, CATALYST.expressesIdea, o, g))

        # TODO: Add roles

        return cg
Example #7
0
def validate_sparql_endpoint(form, field):
    try:
        g = ConjunctiveGraph('SPARQLStore')
        g.open(field.data)
        g.query('SELECT * WHERE { ?s ?p ?o } LIMIT 1')
    except:
        raise ValidationError('This is not a valid SPARQL endpoint.')
Example #8
0
 def _user_graph(self, uri):
     userGraph = Graph()
     try:
         userGraph.parse(uri)
     except Exception, e:
         u = "http://www.w3.org/2007/08/pyRdfa/extract?space-preserve=true&uri=" + uri
         userGraph.parse(u, identifier=uri)
Example #9
0
class FOAF(callbacks.Privmsg):

    DATAFILE = "/var/www/rc98.net/zoia.rdf"

    def __init__(self, irc):
        self.g = Graph()
        #      self.g.parse('http://rc98.net/zoia.rdf')
        self.g.parse(self.DATAFILE, format="xml")
        self.uri = rdflib.URIRef("http://www.code4lib.org/id/zoia")
        self.FOAF = Namespace("http://xmlns.com/foaf/0.1/")
        super(callbacks.Plugin, self).__init__(irc)

    def _uri_of_user(self, nick):
        result = self.g.query(
            """
          PREFIX foaf: <http://xmlns.com/foaf/0.1/> 
          SELECT ?uri WHERE 
          {<http://www.code4lib.org/id/zoia> foaf:knows ?uri . ?uri foaf:nick ?nick .}
          """,
            initBindings={"nick": nick},
        )
        if len(result) > 0:
            userURI = list(result)[0][0]
            return userURI
        else:
            return None

    def _user_graph(self, uri):
        userGraph = Graph()
        try:
            userGraph.parse(uri)
        except Exception, e:
            u = "http://www.w3.org/2007/08/pyRdfa/extract?space-preserve=true&uri=" + uri
            userGraph.parse(u, identifier=uri)
        return userGraph
Example #10
0
    def get_graph(self, with_mappings=False, include_mapping_target=False, acceptance=False, target_uri=None):
        """Get Graph instance of this EDMRecord.

        :param target_uri: target_uri if you want a sub-selection of the whole graph
        :param acceptance: if the acceptance data should be listed
        :param include_mapping_target: Boolean also include the mapping target triples in graph
        :param with_mappings: Boolean integrate the ProxyMapping into the graph
        """
        rdf_string = self.source_rdf
        if acceptance and self.acceptance_rdf:
            rdf_string = self.acceptance_rdf

        graph = ConjunctiveGraph(identifier=self.named_graph)
        graph.namespace_manager = namespace_manager
        graph.parse(data=rdf_string, format='nt')
        if with_mappings:
            proxy_resources, graph = ProxyResource.update_proxy_resource_uris(self.dataset, graph)
            self.proxy_resources.add(*proxy_resources)
            for proxy_resource in proxy_resources:
                graph = graph + proxy_resource.to_graph(include_mapping_target=include_mapping_target)
        if target_uri and not target_uri.endswith("/about") and target_uri != self.document_uri:
            g = Graph(identifier=URIRef(self.named_graph))
            subject = URIRef(target_uri)
            for p, o in graph.predicate_objects(subject=subject):
                g.add((subject, p, o))
            graph = g
        return graph
Example #11
0
    def rdfFromText(self, text):
        """Take text, return an RDF graph."""
        postdata = {}
        postdata['licenseID'] = self.api_key
        postdata['paramsXML'] = ' '.join(['<c:params xmlns:c="http://s.opencalais.com/1/pred/"'
                ,'xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">'
                ,'<c:processingDirectives c:contentType="text/raw"'
                ,'c:outputFormat="text/xml"'
                ,'c:enableMetadataType="GenericRelations,SocialTags">'
                ,'</c:processingDirectives>'
                ,'<c:userDirectives c:allowDistribution="false"'
                ,'c:allowSearch="false"'
                ,'c:externalID="{0}"'.format(uuid.uuid4())
                ,'c:submitter="{0}">'.format(self.app_name)
                ,'</c:userDirectives>'
                ,'<c:externalMetadata></c:externalMetadata>'
                ,'</c:params>'])

        postdata['content'] = text

        poststring = urllib.urlencode(postdata)
        data = self.post_data("{0}".format(self.api_url), poststring, timeout=60*5)
        graph = Graph()
        inpt = StringInputSource(data)
        try: graph.parse(inpt, 'xml')
        except:
            print data
            raise
        return graph
Example #12
0
def instance_view_jsonld(request):
    from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager
    from rdflib import URIRef, ConjunctiveGraph
    ctx = request.context
    user_id = authenticated_userid(request) or Everyone
    permissions = get_permissions(
        user_id, ctx.get_discussion_id())
    instance = ctx._instance
    if not instance.user_can(user_id, CrudPermissions.READ, permissions):
        return HTTPUnauthorized()
    discussion = ctx.get_instance_of_class(Discussion)
    if not discussion:
        raise HTTPNotFound()
    aqsm = AssemblQuadStorageManager()
    uri = URIRef(aqsm.local_uri() + instance.uri()[6:])
    d_storage_name = aqsm.discussion_storage_name(discussion.id)
    v = get_virtuoso(instance.db, d_storage_name)
    cg = ConjunctiveGraph(v, d_storage_name)
    result = cg.triples((uri, None, None))
    #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri)
    # Something is wrong here.
    triples = '\n'.join([
        '%s %s %s.' % (uri.n3(), p.n3(), o.n3())
        for (s, p, o) in result
        if '_with_no_name_entry' not in o])
    return aqsm.quads_to_jsonld(triples)
Example #13
0
 def discussion_as_graph(self, discussion_id):
     from assembl.models import Discussion, AgentProfile
     local_uri = self.local_uri()
     discussion = Discussion.get(discussion_id)
     d_storage_name = self.discussion_storage_name()
     d_graph_iri = URIRef(self.discussion_graph_iri())
     v = get_virtuoso(self.session, d_storage_name)
     discussion_uri = URIRef(
         Discussion.uri_generic(discussion_id, local_uri))
     subjects = [s for (s,) in v.query(
         """SELECT DISTINCT ?s WHERE {
         ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))]
     subjects.append(discussion_uri)
     participant_ids = list(discussion.get_participants(True))
     profiles = {URIRef(AgentProfile.uri_generic(id, local_uri))
                 for id in participant_ids}
     subjects.extend(profiles)
     # add pseudo-accounts
     subjects.extend((URIRef("%sAgentAccount/%d" % (local_uri, id))
                      for id in participant_ids))
     # print len(subjects)
     cg = ConjunctiveGraph(identifier=d_graph_iri)
     self.add_subject_data(v, cg, subjects)
     # add relationships of non-pseudo accounts
     for ((account, p, profile), g) in v.triples((None, SIOC.account_of, None)):
         if profile in profiles:
             cg.add((account, SIOC.account_of, profile, g))
             # Tempting: simplify with this.
             # cg.add((profile, FOAF.account, account, g))
     for (s, o, g) in v.query(
             '''SELECT ?s ?o ?g WHERE {
             GRAPH ?g {?s catalyst:expressesIdea ?o } .
             ?o assembl:in_conversation %s }''' % (discussion_uri.n3())):
         cg.add((s, CATALYST.expressesIdea, o, g))
     return cg
Example #14
0
    def partsites(self):
        context = aq_inner(self.context)

        _partsiteType = _mcltype.FundedSite

        # title
        _title = _terms.title
        # description
        _description = _terms.description

        # Temporary rdf read
        rdfDataSource = "https://edrn-dev.jpl.nasa.gov/ksdb/publishrdf/?rdftype=fundedsite"
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(rdfDataSource))
        statements = _parseRDF(graph)

        partsites = []
        for uri, i in statements.items():
            partsite = dict(url=uri, title="", description="")
            if _title in i:
                partsite["title"] = unicode(i[_title][0])
            if _description in i:
                partsite["description"] = strip_tags(unicode(i[_description][0]))

            partsites.append(partsite)

        return partsites
Example #15
0
def fill_graph_by_subject(basegraph, newgraph, subject, loop_count=0):
    """
    Fills an Graph with all triples with an certain subject. Includes the necessary triples for the objects until the deepth of 5.
    :param basegraph: Graph with the data for the new Graph
    :param newgraph: Instance of the new Graph
    :param subject: subject of triples which is looked for in the basegraph
    :return: Graph
    """
    subject_list=[BNode,URIRef]

    if not issubclass(type(basegraph),Graph):
        log.error("The given basegraph is not a subclass of Graph!")
        return ConjunctiveGraph()
    elif subject == "":
        log.info("The given subject was empty. Returning the basegraph")
        return basegraph
    elif type(subject) not in subject_list:
        log.info("The given subject was not of type BNode or URIRef. Returning the basegraph")
        return basegraph
    elif not issubclass(type(newgraph),Graph):
        newgraph=ConjunctiveGraph()

    loop_count += 1
    for s, p, o in basegraph.triples((subject, None, None)):
        newgraph.add((s, p, o))
        if type(o) in subject_list and loop_count < 6:  # it will do: (S1,P1,O1) -> if O1 has an own Description: (O1,P2,O2)... 5 times
            newgraph = fill_graph_by_subject(basegraph, newgraph, o, loop_count)
    return newgraph
def __load_graph(file_p, tmp_dir=None):
    errors = ""
    current_graph = ConjunctiveGraph()

    if tmp_dir is not None:
        file_path = tmp_dir + os.sep + "tmp_rdf_file.rdf"
        shutil.copyfile(file_p, file_path)
    else:
        file_path = file_p

    try:
        with open(file_path) as f:
            json_ld_file = json.load(f)
            if isinstance(json_ld_file, dict):
                json_ld_file = [json_ld_file]

            for json_ld_resource in json_ld_file:
                # Trick to force the use of a pre-loaded context if the format
                # specified is JSON-LD
                cur_context = json_ld_resource["@context"]
                json_ld_resource["@context"] = context_json

                current_graph.parse(data=json.dumps(json_ld_resource), format="json-ld")
            
            return current_graph
    except Exception as e:
        errors = " | " + str(e)  # Try another format

    if tmp_dir is not None:
        os.remove(file_path)

    raise IOError("[1]", "It was impossible to handle the format used for storing the file '%s'%s" %
                  (file_path, errors))
Example #17
0
class RecursionTests(unittest.TestCase):
    # debug = True
    def setUp(self):
        self.graph = ConjunctiveGraph()
        self.graph.load(StringIO(testContent), format='n3')

    def test_simple_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(BASIC_KNOWS_DATA), format='n3')
        results = graph.query(KNOWS_QUERY,
                              processor="sparql", 
                              DEBUG=False)
        results = set(results)
        person1 = URIRef('ex:person.1')
        person2 = URIRef('ex:person.2')
        nose.tools.assert_equal(
          results,
          set([(person1, None), (person1, Literal('person 3')),
               (person2, Literal('person 3'))]))

    def test_secondary_recursion(self):
        graph = ConjunctiveGraph()
        graph.load(StringIO(SUBCLASS_DATA), format='n3')
        results = graph.query(SUBCLASS_QUERY,
                              processor="sparql", 
                              DEBUG=False)
        results = set(results)
        ob = URIRef('ex:ob')
        class1 = URIRef('ex:class.1')
        class2 = URIRef('ex:class.2')
        class3 = URIRef('ex:class.3')
        nose.tools.assert_equal(
          results,
          set([(ob, class1), (ob, class2), (ob, class3)]))
Example #18
0
    def query_graph(self, subj=None, pred=None, obj=None, exhaustive=False):
	"""Return a graph of  all triples with subect `sub`, predicate `pred`
	OR object `obj. If `exhaustive`, return all subelements of the given
	arguments (If sub is http://127.0.0.1/api/v1/wine/, return 
	http://127.0.0.1/api/v1/wine/{s} for all s). Arguments must be of type
	URIRef or Literal"""
	g = ConjunctiveGraph()
	count = 0
	if not isinstance(subj, list):
	    subj = [subj]
	for sub in subj:
	    for uri_s, uri_p, uri_o in sorted(self.graph):
		s, p, o = str(uri_s), str(uri_p), str(uri_o)
		if exhaustive:
		    s = s.rpartition('/')[0]
		    p = p.rpartition('/')[0]
		    o = o.rpartition('/')[0]
		else:
		    s = s[:-1] if s.endswith('/') else s
		    p = p[:-1] if p.endswith('/') else p
		    o = o[:-1] if o.endswith('/') else o
		if (sub and sub == s) or (pred and pred == p) or (obj and obj == o):
		    g.add((uri_s, uri_p, uri_o))
		    count += 1
	return g
Example #19
0
 def open(self):
     # XXX: If we have a source that's read only, should we need to set the
     # store separately??
     g0 = ConjunctiveGraph('SPARQLUpdateStore')
     g0.open(tuple(self.conf['rdf.store_conf']))
     self.graph = g0
     return self.graph
Example #20
0
def getPropFile(fname):
    g = ConjunctiveGraph(identifier=URIRef(ads_baseurl))
    bindgraph(g)
    recordstree=ElementTree.parse(fname)
    rootnode=recordstree.getroot()
    xobj=XMLObj(recordstree)
    trec={}
    trec['propname']=rootnode.attrib['name']
    trec['propid']=rootnode.attrib['id']
    trec['title']=xobj.title
    trec['category']=xobj.category
    #we used a proposalType here, as this is somewhat different from justscienceprocess. add to ontology
    trec['abstract']=xobj.abstract
    trec['pi']=[xobj.elementAttribute('pi', 'last'),xobj.elementAttribute('pi', 'first')]
    #print trec
    propuri=getPropURI(trec['propid'])
    #This is FALSE. TODO..fix to ads normed name or lookitup How? Blanknode? WOW.
    qplabel=trec['pi'][0]+'_'+trec['pi'][1]
    fullname=trec['pi'][0]+', '+trec['pi'][1]
    auth_uri = uri_agents["PersonName/"+qplabel+"/"+str(uuid.uuid4())]
    gdadd(g, auth_uri, [
            a, agent.PersonName,
            agent.fullName, Literal(fullname)
            ])
    gadd(g, propuri, a, adsbase.ObservationProposal)
    gdadd(g, propuri, [
            adsobsv.observationProposalId, Literal(trec['propid']),
            adsobsv.observationProposalType, Literal("CHANDRA/"+trec['category']),
            adsbase.principalInvestigator, auth_uri,
            adsbase.title, Literal(trec['title'])
        ]
    )
    serializedstuff=g.serialize(format='xml')
    return serializedstuff
Example #21
0
    def test_flowcells_index_rdfa(self):
        model = ConjunctiveGraph()

        response = self.client.get(reverse('flowcell_index'))
        self.assertEqual(response.status_code, 200)
        model.parse(data=smart_text(response.content), format='rdfa')

        add_default_schemas(model)
        inference = Infer(model)
        errmsgs = list(inference.run_validation())
        self.assertEqual(len(errmsgs), 0, errmsgs)

        body =  """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>

        select ?flowcell
        where {
           ?flowcell a libns:IlluminaFlowcell .
        }"""
        bindings = set(['flowcell'])
        count = 0
        for r in model.query(body):
            count += 1

        self.assertEqual(count, len(FlowCell.objects.all()))
Example #22
0
class OntoInspector(object):

    """Class that includes methods for querying an RDFS/OWL ontology"""

    def __init__(self, uri, language=""):
        super(OntoInspector, self).__init__()

        self.rdfGraph = ConjunctiveGraph()
        try:
            self.rdfGraph.parse(uri, format="xml")
        except:
            try:
                self.rdfGraph.parse(uri, format="n3")
            except:
                raise exceptions.Error("Could not parse the file! Is it a valid RDF/OWL ontology?")

        finally:
            # let's cache some useful info for faster access
            self.baseURI = self.get_OntologyURI() or uri
            self.allclasses = self.__getAllClasses(classPredicate)
            self.toplayer = self.__getTopclasses()
            self.tree = self.__getTree()


    def get_OntologyURI(self, ....):
        # todo
        pass
Example #23
0
    def check(kws):
        cg = ConjunctiveGraph()
        cg.parse(**kws)

        for g in cg.contexts():
            gid = g.identifier
            assert isinstance(gid, Identifier)
Example #24
0
    def describe(self, s_or_po, initBindings={}, initNs={}):
        """
        Executes a SPARQL describe of resource

        :param s_or_po:  is either

          * a subject ... should be a URIRef
          * a tuple of (predicate,object) ... pred should be inverse functional
          * a describe query string

        :param initBindings: A mapping from a Variable to an RDFLib term (used
            as initial bindings for SPARQL query)
        :param initNs: A mapping from a namespace prefix to a namespace
        """
        if isinstance(s_or_po, str):
            query = s_or_po
            if initNs:
                prefixes = ''.join(["prefix %s: <%s>\n" % (p, n)
                                    for p, n in initNs.items()])
                query = prefixes + query
        elif isinstance(s_or_po, URIRef) or isinstance(s_or_po, BNode):
            query = "describe %s" % (s_or_po.n3())
        else:
            p, o = s_or_po
            query = "describe ?s where {?s %s %s}" % (p.n3(), o.n3())
        query = dict(query=query)

        url = self.url + "?" + urlencode(query)
        req = Request(url)
        req.add_header('Accept', 'application/rdf+xml')
        log.debug("opening url: %s\n  with headers: %s" %
                  (req.get_full_url(), req.header_items()))
        subgraph = ConjunctiveGraph()
        subgraph.parse(urlopen(req))
        return subgraph
Example #25
0
def _construct(compiler, sources, query=None):
    dataset = ConjunctiveGraph()
    if not isinstance(sources, list):
        sources = [sources]
    for sourcedfn in sources:
        source = sourcedfn['source']
        graph = dataset.get_context(URIRef(sourcedfn.get('dataset') or source))
        if isinstance(source, (dict, list)):
            context_data = sourcedfn['context']
            if not isinstance(context_data, list):
                context_data = compiler.load_json(context_data )['@context']
            context_data = [compiler.load_json(ctx)['@context']
                            if isinstance(ctx, unicode) else ctx
                            for ctx in context_data]
            to_rdf(source, graph, context_data=context_data)
        elif isinstance(source, Graph):
            graph += source
        else:
            graph += compiler.cached_rdf(source)
    if not query:
        return graph
    with compiler.path(query).open() as fp:
        result = dataset.query(fp.read())
    g = Graph()
    for spo in result:
        g.add(spo)
    return g
Example #26
0
def _graphFromQuads2(q):
    g = ConjunctiveGraph()
    #g.addN(q) # no effect on nquad output
    for s,p,o,c in q:
        g.get_context(c).add((s,p,o)) # kind of works with broken rdflib nquad serializer code
        #g.store.add((s,p,o), c) # no effect on nquad output
    return g
Example #27
0
class Serializer(PythonSerializer):
    """
    Convert a queryset to RDF
    """    
    internal_use_only = False

    def end_serialization(self):
        FOAF = Namespace('http://xmlns.com/foaf/0.1/')
        DC = Namespace('http://purl.org/dc/elements/1.1/')
        
        self.graph = ConjunctiveGraph()
        self.options.pop('stream', None)
        fields = filter(None, self.options.pop('fields','').split(','))
        meta = None
        subject = None
        for object in self.objects:
            if not fields:
                fields = object['fields'].keys()    
            newmeta = object['model']
            if newmeta != meta:
                meta = newmeta
            subject = BNode('%s.%s'%(FOAF[newmeta],object['pk']))
            self.graph.add((subject,FOAF['pk'],Literal(object['pk'])))
            for k in fields:
                if k:
                    self.graph.add((subject,FOAF[k],Literal(object['fields'][k])))

    def getvalue(self):
        if callable(getattr(self.graph, 'serialize', None)):
            return self.graph.serialize()
Example #28
0
def test(request, ttype, test):
    # Get the request_host (without the port)
    
    request_path = request.path
    request_host = 'http://'+request.get_host()+request_path.replace(request.path_info,'')
    # print request_host
    
    if ttype in ['publications', 'mascc'] :
        if test in [re.search(r'ttl/(.*)\.ttl',n).group(1) for n in glob('/var/www/semweb.cs.vu.nl/plugins/ttl/*.ttl')] :
            filename = "/var/www/semweb.cs.vu.nl/plugins/ttl/{}.ttl".format(test)
            cg = ConjunctiveGraph()
            cg.parse(filename, format='n3')
        
            # print "Request received"
            
            if ttype == 'publications' :
                response = HttpResponseRedirect(request_host+'/publications/{}'.format(urllib.quote(cg.serialize(format='turtle'),safe='')))
            elif ttype == 'mascc' :
                response = HttpResponseRedirect(request_host+'/mascc/{}'.format(urllib.quote(cg.serialize(format='turtle'),safe='')))
            else :
                response = HttpResponseBadRequest()
        else :
            response = HttpResponseNotFound()
    elif ttype == 'bad' :
        response = HttpResponseRedirect(request_host+'/mascc/bad_request')
    else :
        response = HttpResponseNotFound()
                    
    return response
Example #29
0
def test_rdf(mfile):
    g = ConjunctiveGraph()
    try:
        g = g.parse(mfile, format='xml')
        return True
    except Exception as inst:
        return False
Example #30
0
 def __init__(self, store=None, id=None):
     if store is not None and id is not None:
         ConjunctiveGraph.__init__(self, store, id)
     else:
         ConjunctiveGraph.__init__(self)
     for (key,val) in namespaces.iteritems():
         self.bind(key, val)
Example #31
0
class Store:
    def __init__(self):
        self.graph = ConjunctiveGraph()
        if os.path.exists(storefn):
            self.graph.load(storeuri, format='n3')
        self.graph.bind('dc', DC)
        self.graph.bind('foaf', FOAF)
        self.graph.bind('imdb', IMDB)
        self.graph.bind('rev', 'http://purl.org/stuff/rev#')

    def save(self):
        self.graph.serialize(storeuri, format='n3')

    def who(self, who=None):
        if who is not None:
            name, email = (r_who.match(who).group(1),
                           r_who.match(who).group(2))
            self.graph.add(
                (URIRef(storeuri), DC['title'], Literal(title % name)))
            self.graph.add(
                (URIRef(storeuri + '#author'), RDF.type, FOAF['Person']))
            self.graph.add(
                (URIRef(storeuri + '#author'), FOAF['name'], Literal(name)))
            self.graph.add(
                (URIRef(storeuri + '#author'), FOAF['mbox'], Literal(email)))
            self.save()
        else:
            return self.graph.objects(URIRef(storeuri + '#author'),
                                      FOAF['name'])

    def new_movie(self, movie):
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add((movieuri, RDF.type, IMDB['Movie']))
        self.graph.add((movieuri, DC['title'], Literal(movie['title'])))
        self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year']))))
        self.save()

    def new_review(self, movie, date, rating, comment=None):
        review = BNode(
        )  # @@ humanize the identifier (something like #rev-$date)
        movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID)
        self.graph.add(
            (movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review))))
        self.graph.add((review, RDF.type, REV['Review']))
        self.graph.add((review, DC['date'], Literal(date)))
        self.graph.add((review, REV['maxRating'], Literal(5)))
        self.graph.add((review, REV['minRating'], Literal(0)))
        self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author')))
        self.graph.add((review, REV['rating'], Literal(rating)))
        if comment is not None:
            self.graph.add((review, REV['text'], Literal(comment)))
        self.save()

    def movie_is_in(self, uri):
        return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph
def transpose_to_rdf(doc, con, text, context_included, name, f):
    g = ConjunctiveGraph()
    if text not in con:
        if not context_included:
            if mp:
                with lock:
                    get_context(con, text)
            else:
                get_context(con, text)
    if not args.debug:
        opener = open
        if ".bz" in name:
            opener = bz2.open
        if context_included:
            g.parse(data=json.dumps(doc), format='json-ld')
        else:
            g.parse(data=json.dumps(doc), format='json-ld', context=con[text])
        with opener(name, "at") as fd:
            print(str(g.serialize(format='nt').decode('utf-8').rstrip()),
                  file=fd)
    else:
        if context_included:
            g.parse(data=json.dumps(doc), format='json-ld')
        else:
            g.parse(data=json.dumps(doc), format='json-ld', context=con[text])
        print(str(g.serialize(format=f).decode('utf-8').rstrip()))
Example #33
0
def get_graph():
    path = '../data/triplestore_linkedmdb'
    graph = ConjunctiveGraph('Sleepycat')
    graph.open(path, create=False)
    return graph
Example #34
0
def bound_graph(identifier=None):
    g = ConjunctiveGraph(identifier=identifier)
    g.bind('core', CORE)
    g.bind('wot', WOT)
    g.bind('map', MAP)
    return g
Example #35
0
from __future__ import print_function
import rdfalchemy
from rdfalchemy.samples.doap import FOAF
from rdfalchemy.samples.foaf import Person
from rdfalchemy.orm import mapper
from rdflib import ConjunctiveGraph

import logging
log = logging.getLogger('rdfalchemy')
if not log.handlers:
    log.addHandler(logging.StreamHandler())
#log.setLevel(10)

Person.db = ConjunctiveGraph()
Person.knows = rdfalchemy.rdfMultiple(FOAF.knows, range_type=FOAF.Person)


def test_start():
    assert len(Person.db) == 0
    p = Person(last="Cooper", first="Philip")
    assert len(Person.db) == 3
    del p


def test_addBNodeKnowsL():
    Person.knows = rdfalchemy.rdfList(FOAF.knows, range_type=FOAF.Person)
    mapper()
    p1 = Person.get_by(first="Philip")
    p2 = Person(last="Cooper", first="Ben")
    p3 = Person(last="Cooper", first="Matt")
    assert len(Person.db) == 9
Example #36
0
def test_import_jsonld_into_named_graph():
    """Test named graphs we use."""
    graph = ConjunctiveGraph()

    serialized_document = json.dumps(jsonld.expand(
        jsonld.flatten(JSONLD_DOCUMENT, ), ),
                                     indent=4)

    graph.parse(
        data=serialized_document,
        format='json-ld',
        # All the semantic data about my blog is stored in a particular
        # named graph.
        publicID=PUBLIC_ID,
    )

    assert list(
        map(
            operator.itemgetter(Variable('g')),
            graph.query(
                'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g',
            ).bindings,
        )) == [
            PUBLIC_ID,
        ]

    assert graph.query('''
        SELECT * WHERE {
            ?subclass_of_robot rdfs:subClassOf ex:Robot .
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('subclass_of_robot'):
                           URIRef('https://example.org/Rover'),
                       }]

    # THIS FAILS!
    # The information about ex:Rover cannot be found if I specify the GRAPH.
    # Meaning, this information is not in one of the named graphs.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                ex:Rover rdfs:subClassOf ex:Robot .
            }
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('g'):
                           PUBLIC_ID,
                       }]

    # `publicID` was used for the part of data which was on the top level
    # of the document.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                blog:JSONLD-and-named-graphs a schema:blogPost .
            }
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('g'):
                           PUBLIC_ID,
                       }]
Example #37
0
def getObsFile(fname):
    g = ConjunctiveGraph(identifier=URIRef(ads_baseurl))
    bindgraph(g)
    recordstree = ElementTree.parse(fname)
    rootnode = recordstree.getroot()
    xobj = XMLObj(recordstree)
    trec = {}
    trec['obsname'] = rootnode.attrib['name']
    trec['obsid'] = rootnode.attrib['obsid']
    trec['instrument_name'] = xobj.elementAttribute('instrument', 'name')
    trec['obsvtype'] = xobj.type
    trec['time'] = xobj.observed_time
    trec['created_time'] = xobj.public_avail
    #Bug: in some of Sherry's stuff this is null
    #print "Created",trec['created_time']
    trec['date'] = xobj.start_date
    trec['ra'] = xobj.ra
    trec['dec'] = xobj.dec
    trec['proposal_id'] = xobj.elementAttribute('proposal', 'id')
    #print trec
    obsuri = getObsURI(trec['obsid'])
    daturi = getDatURI(trec['obsid'], fragment="I")
    daturi2 = getDatURI(trec['obsid'], fragment="S")
    gadd(g, daturi, a, adsobsv.Datum)
    gadd(g, daturi2, a, adsobsv.Datum)
    gadd(g, obsuri, a, adsobsv.SimpleObservation)
    #Connect the data product and the observation
    access_url = "http://cda.harvard.edu/chaser/ocatList.do?obsid=" + trec[
        'obsid']
    gdadd(g, daturi, [
        adsobsv.dataProductId,
        Literal(trec['obsid'] + "/I"), adsobsv.forSimpleObservation, obsuri,
        adsobsv.dataURL,
        URIRef(access_url)
    ])
    gdadd(g, daturi2, [
        adsobsv.dataProductId,
        Literal(trec['obsid'] + "/S"), adsobsv.forSimpleObservation, obsuri,
        adsobsv.dataURL,
        URIRef(access_url)
    ])
    addVals(g, daturi, [
        adsobsv.calibLevel,
        2,
        asInt,
        adsbase.dataType,
        "image",
        Literal,
    ])
    #These are untrue anyway: creation time is not public time, but we are using it now.
    if trec['created_time'] != None:
        addVals(g, daturi, [
            pav.createdOn, trec['created_time'],
            asDateTime('%b %d %Y %H:%M%p')
        ])
        addVals(g, daturi2, [
            pav.createdOn, trec['created_time'],
            asDateTime('%b %d %Y %H:%M%p')
        ])
    addVals(g, daturi2, [
        adsobsv.calibLevel,
        2,
        asInt,
        adsbase.dataType,
        "spectra",
        Literal,
    ])
    tname = trec['obsname'].strip()
    gdadd(g, obsuri, [
        adsobsv.observationId,
        Literal(trec['obsid']), adsobsv.observationType,
        Literal(trec['obsvtype']), adsbase.atObservatory,
        uri_infra['observatory/CHANDRA'], adsobsv.atTelescope,
        uri_infra['telescope/CHANDRA'], adsbase.usingInstrument,
        uri_infra['instrument/CHANDRA_' + trec['instrument_name']],
        adsobsv.hasDatum, daturi, adsobsv.hasDatum, daturi2, adsbase.title,
        Literal(tname), adsbase.asAResultOfProposal,
        getPropURI(trec['proposal_id'])
    ])
    #fstring: Sep 17 2000  8:01PM %b %d %Y %H:%M%p
    emmin = 0.1e-10
    emmax = 100e-10
    addVals(g, obsuri, [
        adsbase.atTime,
        trec['date'],
        asDateTime('%b %d %Y %H:%M%p'),
        adsobsv.observedTime,
        float(trec['time']) * 1000,
        asDuration,
        adsobsv.tExptime,
        float(trec['time']) * 1000,
        asDouble,
        adsobsv.wavelengthStart,
        emmin,
        asDouble,
        adsobsv.wavelengthEnd,
        emmax,
        asDouble,
    ])

    if tname != '':
        tnameuri = uri_target["CHANDRA/" + quote_plus(tname)]

        gadd(g, obsuri, adsbase.target, tnameuri)
        addVals(g, tnameuri, [
            a,
            adsobsv.AstronomicalSourceName,
            None,
            adsbase.name,
            tname,
            Literal,
        ])

    for domain in getEMDomains(float(emmin), float(emmax)):
        addVal(g, obsuri, adsobsv.wavelengthDomain, domain)

    print "RA?DEC", trec['ra'], trec['dec']
    if trec['ra'] != None and trec['dec'] != None:
        gdbnadd(g, obsuri, adsobsv.associatedPosition, [
            a, adsobsv.Pointing, adsobsv.ra,
            asDouble(trec['ra']), adsobsv.dec,
            asDouble(trec['dec'])
        ])

    #should this be under uri_agents or collaboration instead?
    #the typing for this should be done in a conf file
    cnameuri = uri_conf["project/CHANDRA"]
    gadd(g, obsuri, adsobsv.observationMadeBy, cnameuri)
    #    gdadd(graph, cnameuri, [ This stuff is thought off as configuration
    #            a, adsbase.Project,
    #            agent.fullName, Literal(cname)
    #    ])
    serializedstuff = g.serialize(format='xml')
    return serializedstuff
Example #38
0
def getPubFile(fname):

    # Do we really need to create one per file? Could be
    # cached/made global but leave that for later if it
    # ever is determined to be a problem.
    #
    hparser = HTMLParser.HTMLParser()

    g = ConjunctiveGraph(identifier=URIRef(ads_baseurl))
    bindgraph(g)
    recordstree = ElementTree.parse(fname)
    rootnode = recordstree.getroot()
    xobj = XMLObj(recordstree)
    trec = {}

    # Change by Doug:
    # It looks like the bibcode elements have been percent encoded
    # in the input XML files - e.g.
    #
    #cat ../chandradata/Publications/2000A%26A...359..489C.xml
    # <paper>
    #  <bibcode>2000A%26A...359..489C</bibcode>
    #  <classified_by>CDA</classified_by>
    #  <paper_type>science</paper_type>
    #  <flags>
    #      <data_use>indirect</data_use>
    #      <multi_observatory />
    #      <followup />
    #  </flags>
    # </paper>
    #
    # so we have to decode it here.
    #
    # For now we *only* replace %26 by & but add a check
    # to fail if a % is found as a safety check.
    # I include the HTML unescape routine in case upstream
    # changes its format.
    #
    trec['bibcode'] = hparser.unescape(xobj.bibcode)
    trec['bibcode'] = trec['bibcode'].replace('%26', '&')
    if trec['bibcode'].find('%') != -1:
        raise ValueError(
            "Problem cleaning bibcode: original='{0}' after='{1}'".format(
                xobj.bibcode, trec['bibcode']))

    trec['classified_by'] = xobj.classified_by
    #this above coild also be figured by bibgroup
    #shouldnt this be a curated statement. But what is the curation. Not a source curation
    #later.
    trec['paper_type'] = xobj.paper_type
    #trec['obsids']=[e.text for e in xobj.rec.findall('data')[0].findall('obsid')]
    boolobsids = False
    if len(xobj.rec.findall('data')) > 0:
        if len(xobj.rec.findall('data')[0].findall('obsid')) > 0:
            print "1"
            trec['obsids'] = [
                e.text for e in xobj.rec.findall('data')[0].findall('obsid')
            ]
            boolobsids = True
    else:
        print "2"
        trec['obsids'] = []
    #print trec
    bibcode_uri = uri_bib[trec['bibcode']]
    gadd(g, bibcode_uri, adsbib.paperType, Literal(trec['paper_type']))
    print bibcode_uri
    if len(trec['obsids']) > 0:
        gadd(g, bibcode_uri, adsobsv.datum_p, Literal(str(boolobsids).lower()))
    for obsid in trec['obsids']:
        obsuri = getObsURI(obsid)
        daturi = getDatURI(obsid)
        #obsuri=uri_obs['CHANDRA_'+obsid]
        #daturi=uri_dat['CHANDRA_'+obsid]
        gadd(g, bibcode_uri, adsbase.aboutScienceProcess, obsuri)
        gadd(g, bibcode_uri, adsbase.aboutScienceProduct, daturi)

        #This is temporary. must map papertype to scienceprocesses and use those ones exactly

    serializedstuff = g.serialize(format='xml')
    return serializedstuff
Example #39
0
    min = Property(PTREC.hasDateTimeMin)
    max = Property(PTREC.hasDateTimeMax)

    def __repr__(self):
        return "TemporalData(%r, %r)" % (self.min, self.max)


class Event(Subject):
    RDF_TYPE = PTREC.Event
    type = Term(RDF.type)

    def __repr__(self):
        return "Event(%r)" % (self.type, )


graph = ConjunctiveGraph()


def get_index_event(cohort_line, graph=graph):
    """
    Splits `cohort_line`, a string formatted like "<ccfid> <YYYY-mm-dd> <n>",
    into the CCFID and operation event identifier, and returns the result of
    querying `graph` for that operation.

    The patient record graph will be loaded into `graph`, and must be an
    RDF/XML file named "<ccfid>.rdf" in the directory specified by the
    environment variable `RDF_DIRECTORY`.

    For example, "12345678 2009-01-01 2" will load "12345678.rdf" into `graph`
    and find that patient's second operation event on January 1, 2009.
Example #40
0
class BerkeleyDBTestCase(unittest.TestCase):
    def setUp(self):
        self.store_name = "BerkeleyDB"
        self.path = mktemp()
        self.g = ConjunctiveGraph(store=self.store_name)
        self.rt = self.g.open(self.path, create=True)
        assert self.rt == VALID_STORE, "The underlying store is corrupt"
        assert (
            len(self.g) == 0
        ), "There must be zero triples in the graph just after store (file) creation"
        data = """
                PREFIX : <https://example.org/>

                :a :b :c .
                :d :e :f .
                :d :g :h .
                """
        self.g.parse(data=data, format="ttl")

    def tearDown(self):
        self.g.close()

    def test_write(self):
        assert (
            len(self.g) == 3
        ), "There must be three triples in the graph after the first data chunk parse"
        data2 = """
                PREFIX : <https://example.org/>
                
                :d :i :j .
                """
        self.g.parse(data=data2, format="ttl")
        assert (
            len(self.g) == 4
        ), "There must be four triples in the graph after the second data chunk parse"
        data3 = """
                PREFIX : <https://example.org/>
                
                :d :i :j .
                """
        self.g.parse(data=data3, format="ttl")
        assert (
            len(self.g) == 4
        ), "There must still be four triples in the graph after the thrd data chunk parse"

    def test_read(self):
        sx = None
        for s in self.g.subjects(
                predicate=URIRef("https://example.org/e"),
                object=URIRef("https://example.org/f"),
        ):
            sx = s
        assert sx == URIRef("https://example.org/d")

    def test_sparql_query(self):
        q = """
            PREFIX : <https://example.org/>
            
            SELECT (COUNT(*) AS ?c)
            WHERE { 
                :d ?p ?o .
            }"""

        c = 0
        for row in self.g.query(q):
            c = int(row.c)
        assert c == 2, "SPARQL COUNT must return 2"

    def test_sparql_insert(self):
        q = """
            PREFIX : <https://example.org/>
            
            INSERT DATA {
                :x :y :z .
            }"""

        self.g.update(q)
        assert len(self.g) == 4, "After extra triple insert, length must be 4"

    def test_multigraph(self):
        q = """
            PREFIX : <https://example.org/>

            INSERT DATA {
                GRAPH :m {
                    :x :y :z .
                }
                GRAPH :n {
                    :x :y :z .
                }                
            }"""

        self.g.update(q)

        q = """
            SELECT (COUNT(?g) AS ?c)
            WHERE {
                SELECT DISTINCT ?g
                WHERE {
                    GRAPH ?g {
                        ?s ?p ?o 
                    }
                }
            }
            """
        c = 0
        for row in self.g.query(q):
            c = int(row.c)
        assert c == 3, "SPARQL COUNT must return 3 (default, :m & :n)"

    def test_open_shut(self):
        assert len(self.g) == 3, "Initially we must have 3 triples from setUp"
        self.g.close()
        self.g = None

        # reopen the graph
        self.g = ConjunctiveGraph("BerkeleyDB")
        self.g.open(self.path, create=False)
        assert (
            len(self.g) == 3
        ), "After close and reopen, we should still have the 3 originally added triples"
Example #41
0
@prefix rdf:        <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

_:a  rdf:type        foaf:Person .
_:a  foaf:name       "Alice" .
_:a  foaf:mbox       <mailto:[email protected]> .
_:a  foaf:mbox       <mailto:[email protected]> .

_:b  rdf:type        foaf:Person .
_:b  foaf:name       "Bob" .
"""

test_query = """
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?name ?mbox
WHERE  { ?x foaf:name  ?name .
         OPTIONAL { ?x  foaf:mbox  ?mbox }
       }
"""

graph = ConjunctiveGraph(plugin.get('IOMemory', Store)())
graph.parse(StringIO(text), format="n3")
print graph.serialize(format='xml')

print
print 'Value Constraints:'
print
results = graph.query(test_query)
#print results.serialize(format='xml')

for row in results:
    print row.name, row.mbox
Example #42
0
#dabib='2005ApJ...629..700N'

##file to read is output of simad1.py and assumes bibcode.simbad
#print "SIMBAD", simbad[dabib]
#sys.exit(-1)

#Issue, some sources will come again and again and have multiple metadata strings. I think this is fine
#as the triplestore will kill repeated triples. But what if they come in different contexts. Wont we #have multiple statements then. I think we can deal with that but it is something to remember.

odir = DATA + "/data/rdf"
if not os.path.isdir(odir):
    os.makedirs(odir)

for bibcode in simbad.keys():
    g = ConjunctiveGraph(identifier=URIRef(None))
    bindgraph(g)
    for aobject in simbad[bibcode]:
        #print bibcode, aobject['id']
        euri=uri_bib[bibcode]
        eleid=quote_plus("_".join(aobject['id'].split()))
        gadd(g,euri, adsbase.hasAstronomicalSource, uri_source[eleid])
        gadd(g,uri_source[eleid], a, adsbase.AstronomicalSource)
        gadd(g,uri_source[eleid], adsbase.name , Literal(aobject['id']))
        gadd(g,uri_source[eleid], adsobsv.curatedAt, uri_conf['SIMBAD'])
        gadd(g,uri_source[eleid], adsbase.hasMetadataString, Literal(str(aobject)))
        
    serializedstuff=g.serialize()
    if not os.path.isdir(DATA+"/data/rdf"):
            os.makedirs(DATA+"/data/rdf")
    fd=open(odir+"/simbad."+quote_plus(bibcode)+".rdf", "w")
Example #43
0
        self[key] = value

    def delete(self, key):
        if key in self:
            del self[key]


DEFAULT_HTTP_CLIENT = Http(_MemCache())
DEFAULT_REQUEST_HEADERS = {
    # NB: the spaces and line-breaks in 'accept' below are a hack
    #     to work around a problem in httplib2:
    #     the cache does not work with arbibtrary long lines
    "accept":
    "application/ld+json, application/n-quads;q=0.9,\r\n application/turtle;q=0.8, application/n-triples;q=0.7,\r\n application/rdf+xml;q=0.6, text/html;q=0.5, */*;q=0.1",
    "user-agent": "hydra-py-v" + __version__,
}

BACKGROUND_KNOWLEDGE = ConjunctiveGraph(
    identifier=URIRef("urn:x-hydra-py:background-knowledge"))

SUBCLASS = RDFS.subClassOf * "*"
SUBPROP = RDFS.subPropertyOf * "*"
LINK_OP = SUBPROP / HYDRA.supportedOperation
RANGE = RDFS.range / SUBCLASS
RANGE_OP = RANGE / HYDRA.supportedOperation
TYPE = RDF.type / SUBCLASS
TYPE_OP = TYPE / HYDRA.supportedOperation

APIDOC_RE = regex(
    r'^<([^>]*)>; rel="http://www.w3.org/ns/hydra/core#apiDocumentation"$')
Example #44
0
class ContextTestCase(unittest.TestCase):
    store = "default"
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = ConjunctiveGraph(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite")
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef("michel")
        self.tarek = URIRef("tarek")
        self.bob = URIRef("bob")
        self.likes = URIRef("likes")
        self.hates = URIRef("hates")
        self.pizza = URIRef("pizza")
        self.cheese = URIRef("cheese")

        self.c1 = URIRef("context-1")
        self.c2 = URIRef("context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)

    def addStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.add((tarek, likes, pizza))
        graph.add((tarek, likes, cheese))
        graph.add((michel, likes, pizza))
        graph.add((michel, likes, cheese))
        graph.add((bob, likes, cheese))
        graph.add((bob, hates, pizza))
        graph.add((bob, hates, michel))  # gasp!

    def removeStuff(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        graph = Graph(self.graph.store, c1)

        graph.remove((tarek, likes, pizza))
        graph.remove((tarek, likes, cheese))
        graph.remove((michel, likes, pizza))
        graph.remove((michel, likes, cheese))
        graph.remove((bob, likes, cheese))
        graph.remove((bob, hates, pizza))
        graph.remove((bob, hates, michel))  # gasp!

    def addStuffInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        # add to default context
        self.graph.add(triple)
        # add to context 1
        graph = Graph(self.graph.store, c1)
        graph.add(triple)
        # add to context 2
        graph = Graph(self.graph.store, c2)
        graph.add(triple)

    def testConjunction(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        self.addStuffInMultipleContexts()
        triple = (self.pizza, self.likes, self.pizza)
        # add to context 1
        graph = Graph(self.graph.store, self.c1)
        graph.add(triple)
        self.assertEqual(len(self.graph), len(graph))

    def testAdd(self):
        self.addStuff()

    def testRemove(self):
        self.addStuff()
        self.removeStuff()

    def testLenInOneContext(self):
        c1 = self.c1
        # make sure context is empty

        self.graph.remove_context(self.graph.get_context(c1))
        graph = Graph(self.graph.store, c1)
        oldLen = len(self.graph)

        for i in range(0, 10):
            graph.add((BNode(), self.hates, self.hates))
        self.assertEqual(len(graph), oldLen + 10)
        self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10)
        self.graph.remove_context(self.graph.get_context(c1))
        self.assertEqual(len(self.graph), oldLen)
        self.assertEqual(len(graph), 0)

    def testLenInMultipleContexts(self):
        if self.store == "SQLite":
            raise SkipTest("Skipping known issue with __len__")
        oldLen = len(self.graph)
        self.addStuffInMultipleContexts()

        # addStuffInMultipleContexts is adding the same triple to
        # three different contexts. So it's only + 1
        self.assertEqual(len(self.graph), oldLen + 1)

        graph = Graph(self.graph.store, self.c1)
        self.assertEqual(len(graph), oldLen + 1)

    def testRemoveInMultipleContexts(self):
        c1 = self.c1
        c2 = self.c2
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        # triple should be still in store after removing it from c1 + c2
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c1)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        graph = Graph(self.graph.store, c2)
        graph.remove(triple)
        self.assertTrue(triple in self.graph)
        self.graph.remove(triple)
        # now gone!
        self.assertTrue(triple not in self.graph)

        # add again and see if remove without context removes all triples!
        self.addStuffInMultipleContexts()
        self.graph.remove(triple)
        self.assertTrue(triple not in self.graph)

    def testContexts(self):
        triple = (self.pizza, self.hates, self.tarek)  # revenge!

        self.addStuffInMultipleContexts()

        def cid(c):
            return c.identifier

        self.assertTrue(self.c1 in map(cid, self.graph.contexts()))
        self.assertTrue(self.c2 in map(cid, self.graph.contexts()))

        contextList = list(map(cid, list(self.graph.contexts(triple))))
        self.assertTrue(self.c1 in contextList, (self.c1, contextList))
        self.assertTrue(self.c2 in contextList, (self.c2, contextList))

    def testRemoveContext(self):
        c1 = self.c1

        self.addStuffInMultipleContexts()
        self.assertEqual(len(Graph(self.graph.store, c1)), 1)
        self.assertEqual(len(self.graph.get_context(c1)), 1)

        self.graph.remove_context(self.graph.get_context(c1))
        self.assertTrue(self.c1 not in self.graph.contexts())

    def testRemoveAny(self):
        Any = None
        self.addStuffInMultipleContexts()
        self.graph.remove((Any, Any, Any))
        self.assertEqual(len(self.graph), 0)

    def testTriples(self):
        tarek = self.tarek
        michel = self.michel
        bob = self.bob
        likes = self.likes
        hates = self.hates
        pizza = self.pizza
        cheese = self.cheese
        c1 = self.c1
        asserte = self.assertEqual
        triples = self.graph.triples
        graph = self.graph
        c1graph = Graph(self.graph.store, c1)
        c1triples = c1graph.triples
        Any = None

        self.addStuff()

        # unbound subjects with context
        asserte(len(list(c1triples((Any, likes, pizza)))), 2)
        asserte(len(list(c1triples((Any, hates, pizza)))), 1)
        asserte(len(list(c1triples((Any, likes, cheese)))), 3)
        asserte(len(list(c1triples((Any, hates, cheese)))), 0)

        # unbound subjects without context, same results!
        asserte(len(list(triples((Any, likes, pizza)))), 2)
        asserte(len(list(triples((Any, hates, pizza)))), 1)
        asserte(len(list(triples((Any, likes, cheese)))), 3)
        asserte(len(list(triples((Any, hates, cheese)))), 0)

        # unbound objects with context
        asserte(len(list(c1triples((michel, likes, Any)))), 2)
        asserte(len(list(c1triples((tarek, likes, Any)))), 2)
        asserte(len(list(c1triples((bob, hates, Any)))), 2)
        asserte(len(list(c1triples((bob, likes, Any)))), 1)

        # unbound objects without context, same results!
        asserte(len(list(triples((michel, likes, Any)))), 2)
        asserte(len(list(triples((tarek, likes, Any)))), 2)
        asserte(len(list(triples((bob, hates, Any)))), 2)
        asserte(len(list(triples((bob, likes, Any)))), 1)

        # unbound predicates with context
        asserte(len(list(c1triples((michel, Any, cheese)))), 1)
        asserte(len(list(c1triples((tarek, Any, cheese)))), 1)
        asserte(len(list(c1triples((bob, Any, pizza)))), 1)
        asserte(len(list(c1triples((bob, Any, michel)))), 1)

        # unbound predicates without context, same results!
        asserte(len(list(triples((michel, Any, cheese)))), 1)
        asserte(len(list(triples((tarek, Any, cheese)))), 1)
        asserte(len(list(triples((bob, Any, pizza)))), 1)
        asserte(len(list(triples((bob, Any, michel)))), 1)

        # unbound subject, objects with context
        asserte(len(list(c1triples((Any, hates, Any)))), 2)
        asserte(len(list(c1triples((Any, likes, Any)))), 5)

        # unbound subject, objects without context, same results!
        asserte(len(list(triples((Any, hates, Any)))), 2)
        asserte(len(list(triples((Any, likes, Any)))), 5)

        # unbound predicates, objects with context
        asserte(len(list(c1triples((michel, Any, Any)))), 2)
        asserte(len(list(c1triples((bob, Any, Any)))), 3)
        asserte(len(list(c1triples((tarek, Any, Any)))), 2)

        # unbound predicates, objects without context, same results!
        asserte(len(list(triples((michel, Any, Any)))), 2)
        asserte(len(list(triples((bob, Any, Any)))), 3)
        asserte(len(list(triples((tarek, Any, Any)))), 2)

        # unbound subjects, predicates with context
        asserte(len(list(c1triples((Any, Any, pizza)))), 3)
        asserte(len(list(c1triples((Any, Any, cheese)))), 3)
        asserte(len(list(c1triples((Any, Any, michel)))), 1)

        # unbound subjects, predicates without context, same results!
        asserte(len(list(triples((Any, Any, pizza)))), 3)
        asserte(len(list(triples((Any, Any, cheese)))), 3)
        asserte(len(list(triples((Any, Any, michel)))), 1)

        # all unbound with context
        asserte(len(list(c1triples((Any, Any, Any)))), 7)
        # all unbound without context, same result!
        asserte(len(list(triples((Any, Any, Any)))), 7)

        for c in [graph, self.graph.get_context(c1)]:
            # unbound subjects
            asserte(set(c.subjects(likes, pizza)), set((michel, tarek)))
            asserte(set(c.subjects(hates, pizza)), set((bob,)))
            asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel]))
            asserte(set(c.subjects(hates, cheese)), set())

            # unbound objects
            asserte(set(c.objects(michel, likes)), set([cheese, pizza]))
            asserte(set(c.objects(tarek, likes)), set([cheese, pizza]))
            asserte(set(c.objects(bob, hates)), set([michel, pizza]))
            asserte(set(c.objects(bob, likes)), set([cheese]))

            # unbound predicates
            asserte(set(c.predicates(michel, cheese)), set([likes]))
            asserte(set(c.predicates(tarek, cheese)), set([likes]))
            asserte(set(c.predicates(bob, pizza)), set([hates]))
            asserte(set(c.predicates(bob, michel)), set([hates]))

            asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)]))
            asserte(
                set(c.subject_objects(likes)),
                set(
                    [
                        (tarek, cheese),
                        (michel, cheese),
                        (michel, pizza),
                        (bob, cheese),
                        (tarek, pizza),
                    ]
                ),
            )

            asserte(
                set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])
            )
            asserte(
                set(c.predicate_objects(bob)),
                set([(likes, cheese), (hates, pizza), (hates, michel)]),
            )
            asserte(
                set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])
            )

            asserte(
                set(c.subject_predicates(pizza)),
                set([(bob, hates), (tarek, likes), (michel, likes)]),
            )
            asserte(
                set(c.subject_predicates(cheese)),
                set([(bob, likes), (tarek, likes), (michel, likes)]),
            )
            asserte(set(c.subject_predicates(michel)), set([(bob, hates)]))

            asserte(
                set(c),
                set(
                    [
                        (bob, hates, michel),
                        (bob, likes, cheese),
                        (tarek, likes, pizza),
                        (michel, likes, pizza),
                        (michel, likes, cheese),
                        (bob, hates, pizza),
                        (tarek, likes, cheese),
                    ]
                ),
            )

        # remove stuff and make sure the graph is empty again
        self.removeStuff()
        asserte(len(list(c1triples((Any, Any, Any)))), 0)
        asserte(len(list(triples((Any, Any, Any)))), 0)
import os

namespaces = dict(
    rdf=Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
    rdfs=Namespace("http://www.w3.org/2000/01/rdf-schema#"),
    cogrobtut=Namespace(model.COGROBTUT),
    dash=Namespace("http://datashapes.org/dash#"),
    shema=Namespace("http://schema.org/"),
    sh=Namespace("http://www.w3.org/ns/shacl#"),
    xsd=Namespace("http://www.w3.org/2001/XMLSchema#"),
)

#path_database = "/home/admin-franka/franka-web-app/flaskr/static/owl/franka_robolab/"
path_database = "/home/anglerau/GitLab/flask-app/flaskr/static/owl/franka_robolab/"
file_extension = ".owl"
kb = ConjunctiveGraph()
list_parsed_files = []
for f in os.listdir(path_database):
    if f.endswith(file_extension):
        list_parsed_files.append(f)
        with open(path_database + f) as onto:
            new_context = Graph(kb.store, f)
            new_context.parse(onto)


def reload_kb():
    for f in os.listdir(path_database):
        if not (f in list_parsed_files):
            with open(path_database + f) as onto:
                new_context = Graph(kb.store, f)
                new_context.parse(onto)
Example #46
0
 def setup_sparql(self):
     self.g = ConjunctiveGraph(store='SPARQLStore')
     self.g.open(self.endpt)
Example #47
0
class rdfSubject(object):
    db = ConjunctiveGraph()
    """Default graph for access to instances of this type"""
    rdf_type = None
    """rdf:type of instances of this class"""

    def __init__(self, resUri=None, **kwargs):
        """The constructor tries hard to do return you an rdfSubject

        :param resUri: the "resource uri". If `None` then create an instance
        with a BNode resUri. Can be given as one of:

           * an instance of an rdfSubject
           * an instance of a BNode or a URIRef
           * an n3 uriref string like: "<urn:isbn:1234567890>"
           * an n3 bnode string like: "_:xyz1234"
        :param kwargs: is a set of values that will be set using the keys to
        find the appropriate descriptor"""

        if not resUri:  # create a bnode
            self.resUri = BNode()
            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, (BNode, URIRef)):  # use the identifier passed
            self.resUri = resUri
            if self.rdf_type \
                and not list(self.db.triples(
                    (self.resUri, RDF.type, self.rdf_type))):
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        elif isinstance(resUri, rdfSubject):  # use the resUri of the subject
            self.resUri = resUri.resUri
            self.db = resUri.db

        elif isinstance(resUri, (str, unicode)):   # create one from a <uri> or
            if resUri[0] == "<" and resUri[-1] == ">":  # _:bnode string
                self.resUri = URIRef(resUri[1:-1])
            elif resUri.startswith("_:"):
                self.resUri = BNode(resUri[2:])

            if self.rdf_type:
                self.db.add((self.resUri, RDF.type, self.rdf_type))

        else:
            raise AttributeError("cannot construct rdfSubject from %s" % (
                str(resUri)))

        if kwargs:
            self._set_with_dict(kwargs)

    def n3(self):
        """n3 repr of this node"""
        return self.resUri.n3()

    @classmethod
    def _getdescriptor(cls, key):
        """__get_descriptor returns the descriptor for the key.
        It essentially cls.__dict__[key] with recursive calls to super"""
        # NOT SURE if mro is the way to do this or if we should call
        # super() or bases?
        for kls in cls.mro():
            if key in kls.__dict__:
                return kls.__dict__[key]
        raise AttributeError(
            "descriptor %s not found for class %s" % (key, cls))

    # short term hack.  Need to go to a sqlalchemy 0.4 style query method
    # obj.query.get_by should map to obj.get_by  ..same for fetch_by
    @classmethod
    def query(cls):
        return cls

    @classmethod
    def get_by(cls, **kwargs):
        """Class Method, returns a single instance of the class
        by a single kwarg.  the keyword must be a descriptor of the
        class.
        example:

        .. code-block:: python

            bigBlue = Company.get_by(symbol='IBM')

        :Note:
            the keyword should map to an rdf predicate
            that is of type owl:InverseFunctional"""
        if len(kwargs) != 1:
            raise ValueError(
                "get_by wanted exactly 1 but got  %i args\n" +
                "Maybe you wanted filter_by" % (len(kwargs)))

        key, value = kwargs.popitem()
        if isinstance(value, (URIRef, BNode, Literal)):
            o = value
        else:
            o = Literal(value)
        pred = cls._getdescriptor(key).pred
        uri = cls.db.value(None, pred, o)
        if uri:
            return cls(uri)
        else:
            raise LookupError("%s = %s not found" % (key, value))

    @classmethod
    def filter_by(cls, **kwargs):
        """Class method returns a generator over classs instances
        meeting the kwargs conditions.

        Each keyword must be a class descriptor

        filter by RDF.type == cls.rdf_type is implicit

        Order helps, the first keyword should be the most restrictive
        """
        filters = []
        for key, value in kwargs.items():
            pred = cls._getdescriptor(key).pred
            # try to make the value be OK for the triple query as an object
            if isinstance(value, Identifier):
                obj = value
            else:
                obj = Literal(value)
            filters.append((pred, obj))
        # make sure we filter by type
        if not (RDF.type, cls.rdf_type) in filters:
            filters.append((RDF.type, cls.rdf_type))
        pred, obj = filters[0]
        log.debug("Checking %s, %s" % (pred, obj))
        for sub in cls.db.subjects(pred, obj):
            log.debug("maybe %s" % sub)
            for pred, obj in filters[1:]:
                log.debug("Checking %s, %s" % (pred, obj))
                try:
                    cls.db.triples((sub, pred, obj)).next()
                except:
                    log.warn("No %s" % sub)
                    break
            else:
                yield cls(sub)

    @classmethod
    def ClassInstances(cls):
        """return a generator for instances of this rdf:type
        you can look in MyClass.rdf_type to see the predicate being used"""
        beenthere = set([])
        for i in cls.db.subjects(RDF.type, cls.rdf_type):
            if not i in beenthere:
                yield cls(i)
                beenthere.add(i)

    @classmethod
    def GetRandom(cls):
        """for develoment just returns a random instance of this class"""
        from random import choice
        xii = list(cls.ClassInstances())
        return choice(xii)

    def __hash__(self):
        return hash("ranD0Mi$h_" + self.n3())

    def __cmp__(self, other):
        if other is None:
            return False
        else:
            return cmp(self.n3(), other.n3())

    def __repr__(self):
        return """%s('%s')""" % (
            self.__class__.__name__, self.n3().encode('utf-8'))

    if rdflibversion.startswith('2'):
        def __str__(self):
            return str(self.resUri)

    def __getitem__(self, pred):
        log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3()))
        val = self.db.value(self.resUri, pred)
        if isinstance(val, Literal):
            val = val.toPython()
        elif isinstance(val, (BNode, URIRef)):
            val = rdfSubject(val)
        return val

    def __delitem__(self, pred):
        log.debug("Deleting with __delitem__ %s for %s" % (pred, self))
        for s, p, o in self.db.triples((self.resUri, pred, None)):
            self.db.remove((s, p, o))
            # finally if the object in the triple was a bnode
            # cascade delete the thing it referenced
            # ?? FIXME Do we really want to cascade if it's an rdfSubject??
            if isinstance(o, (BNode, rdfSubject)):
                rdfSubject(o)._remove(db=self.db, cascade='bnode')

    def _set_with_dict(self, kv):
        """
        :param kv: a dict

          for each key,value pair in dict kv
               set self.key = value

        """
        for key, value in kv.items():
            descriptor = self.__class__._getdescriptor(key)
            descriptor.__set__(self, value)

    def _remove(
            self, db=None, cascade='bnode',
            bnodeCheck=True, objectCascade=False):
        """
        Remove all triples where this rdfSubject is the subject of the triple

        :param db: limit the remove operation to this graph
        :param cascade: must be one of:

            * none --  remove none
            * bnode -- (default) remove all unreferenced bnodes
            * all -- remove all unreferenced bnode(s) AND uri(s)

        :param bnodeCheck: boolean

            * True -- (default) check bnodes and raise exception if there are
              still references to this node
            * False --  do not check.  This can leave orphaned object reference
              in triples.  Use only if you are resetting the value in
              the same transaction
        :param objectCascade: boolean
            * False -- (default) do nothing
            * True -- delete also all triples where this refSubject is the
            object of the triple.
        """
        noderef = self.resUri
        log.debug("Called remove on %s" % self)
        if not db:
            db = self.db

        # we cannot delete a bnode if it is still referenced,
        # i.e. if it is the o of a s,p,o
        if bnodeCheck and isinstance(noderef, BNode):
            for s, p, o in db.triples((None, None, noderef)):
                raise RDFAlchemyError(
                    "Cannot delete BNode %s because %s still references it" % (
                    noderef.n3(), s.n3()))

        # determine an appropriate test for cascade decisions
        if cascade == 'bnode':
            # we cannot delete a bnode if there are still references to it
            def test(node):
                if isinstance(node, (URIRef, Literal)):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True
        elif cascade == 'none':

            def f1(node):
                return False
            test = f1
        elif cascade == 'all':

            def f2(node):
                if isinstance(node, Literal):
                    return False
                for s, p, o in db.triples((None, None, node)):
                    return False
                return True
            test = f2
        else:
            raise AttributeError("unknown cascade argument")

        for s, p, o in db.triples((noderef, None, None)):
            db.remove((s, p, o))
            if test(o):
                rdfSubject(o)._remove(db=db, cascade=cascade)

        if objectCascade:
            for s, p, o in db.triples((None, None, noderef)):
                db.remove((s, p, o))

    def _rename(self, name, db=None):
        """rename a node """
        if not db:
            db = self.db
        if not (isinstance(name, (BNode, URIRef))):
            raise AttributeError("cannot rename to %s" % name)
        for s, p, o in db.triples((self.resUri, None, None)):
            db.remove((s, p, o))
            db.add((name, p, o))
        for s, p, o in db.triples((None, None, self.resUri)):
            db.set((s, p, name))
        self.resUri = name

    def _ppo(self, db=None):
        """Like pretty print...
        Return a 'pretty predicate,object' of self
        returning all predicate object pairs with qnames"""
        db = db or self.db
        for p, o in db.predicate_objects(self.resUri):
            print("%20s = %s" % (db.qname(p), str(o)))
        print(" ")
Example #48
0
def test_named_filter_graph_query():
    g = ConjunctiveGraph()
    g.namespace_manager.bind('rdf', RDF)
    g.namespace_manager.bind('rdfs', RDFS)
    ex = Namespace('https://ex.com/')
    g.namespace_manager.bind('ex', ex)
    g.get_context(ex.g1).parse(format="turtle",
                               data=f"""
    PREFIX ex: <{str(ex)}>
    PREFIX rdfs: <{str(RDFS)}>
    ex:Boris rdfs:label "Boris" .
    ex:Susan rdfs:label "Susan" .
    """)
    g.get_context(ex.g2).parse(format="turtle",
                               data=f"""
    PREFIX ex: <{str(ex)}>
    ex:Boris a ex:Person .
    """)

    assert list(
        g.query(
            "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } ?a a ?type }",
            initNs={'ex': ex})) == [(Literal('Boris'), )]
    assert list(
        g.query(
            "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } FILTER EXISTS { ?a a ?type }}",
            initNs={'ex': ex})) == [(Literal('Boris'), )]
    assert list(
        g.query(
            "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } FILTER NOT EXISTS { ?a a ?type }}",
            initNs={'ex': ex})) == [(Literal('Susan'), )]
    assert list(
        g.query("SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } ?a a ?type }",
                initNs={'ex': ex})) == [(Literal('Boris'), )]
    assert list(
        g.query(
            "SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } FILTER EXISTS { ?a a ?type }}",
            initNs={'ex': ex})) == [(Literal('Boris'), )]
    assert list(
        g.query(
            "SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } FILTER NOT EXISTS { ?a a ?type }}",
            initNs={'ex': ex})) == [(Literal('Susan'), )]
Example #49
0
app = Flask(__name__)
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.config['CACHE_TYPE'] = 'SimpleCache'
app.config['CACHE_DEFAULT_TIMEOUT'] = 300
config = yload(open(join(app.root_path, 'config.yml')).read(),
               Loader=FullLoader)
app.jinja_env.line_statement_prefix = '#'
scache = Cache(app)
extractors = {}
cache = config.get('cache', False)
base = config['base']
store = None

if config.get('store', False):
    store = ConjunctiveGraph('Sleepycat')
    store.open(config['store'], create=True)


@app.route('/')
@scache.cached(timeout=300)
def index():
    #j = sparql('select ?class (count(?class) as ?count) where { ?s a ?class } group by ?class order by DESC(?count)')

    return render_template('index.html',
                           counts=[],
                           base=base,
                           title=config.get('title', 'No title'),
                           description=config.get('description', None),
                           empty_message=config.get('empty_message', None))
Example #50
0
def handle_embedded_annot(data):
    step = 0
    sid = request.sid
    print(sid)
    uri = str(data['url'])
    print('retrieving embedded annotations for ' + uri)
    print("Retrieve KG for uri: " + uri)
    page = requests.get(uri)
    html = page.content
    d = extruct.extract(html,
                        syntaxes=['microdata', 'rdfa', 'json-ld'],
                        errors='ignore')

    print(d)
    kg = ConjunctiveGraph()

    #TODO this is a workaround for Schema.org / json-ld issue
    #print(url_for('static', filename='data/jsonldcontext.json'))
    context_url = "http://*****:*****@context' in md.keys():
            print(md['@context'])
            if ('https://schema.org' in md['@context']) or ('http://schema.org'
                                                            in md['@context']):
                # md['@context'] = 'https://schema.org/docs/jsonldcontext.json'
                md['@context'] = context_url
        kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld")
    for md in d['rdfa']:
        if '@context' in md.keys():
            if ('https://schema.org' in md['@context']) or ('http://schema.org'
                                                            in md['@context']):
                #md['@context'] = 'https://schema.org/docs/jsonldcontext.json'
                md['@context'] = context_url
        kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld")
    for md in d['microdata']:
        if '@context' in md.keys():
            if ('https://schema.org' in md['@context']) or ('http://schema.org'
                                                            in md['@context']):
                #md['@context'] = 'https://schema.org/docs/jsonldcontext.json'
                md['@context'] = context_url
        kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld")

    kgs[sid] = kg

    step += 1
    emit('update_annot', step)
    emit('send_annot', str(kg.serialize(format='turtle').decode()))
    print(len(kg))

    #check if id or doi in uri
    if util.is_DOI(uri):
        uri = util.get_DOI(uri)
        print(f'FOUND DOI: {uri}')
        # describe on lod.openair

    kg = util.describe_loa(uri, kg)
    step += 1
    emit('update_annot', step)
    emit('send_annot', str(kg.serialize(format='turtle').decode()))
    print(len(kg))

    kg = util.describe_opencitation(uri, kg)
    step += 1
    emit('update_annot', step)
    emit('send_annot', str(kg.serialize(format='turtle').decode()))
    print(len(kg))

    kg = util.describe_wikidata(uri, kg)
    step += 1
    emit('update_annot', step)
    emit('send_annot', str(kg.serialize(format='turtle').decode()))
    print(len(kg))

    kg = util.describe_biotools(uri, kg)
    step += 1
    emit('update_annot', step)
    emit('send_annot', str(kg.serialize(format='turtle').decode()))
    print(f'ended with step {step}')
    print(len(kg))
    print(step)
Example #51
0
 def testQueryPlus(self):
     graph = ConjunctiveGraph()
     graph.parse(StringIO(test_data), format="n3")
     result_json = graph.query(test_query).serialize(format='json')
     self.failUnless(result_json.find(correct) > 0)
Example #52
0
    def _process_data(self, document):
        '''
        Creates the RDF graph describing the event
        @param document: the DOM document of the event
        '''
        # Create the graph
        graph = ConjunctiveGraph()
        graph.bind('swc', SWC)
        graph.bind('cfp', CFP)
        graph.bind('ical', ICAL)
        graph.bind('foaf', FOAF)
        graph.bind('dct', DCT)
        graph.bind('lode', LODE)

        # Init the event
        resource_event = LDES[self.get_resource_name()]
        graph.add((resource_event, RDF.type, SWC['AcademicEvent']))

        # Get the title
        if document.find(id='inner_left') != None:
            title = document.find(id='inner_left').find('h1').text
            graph.add((resource_event, RDFS.label, Literal(title)))

        # Get the location
        if document.find(text='City:') != None and document.find(
                text='Country:') != None:
            city = document.find(
                text='City:').findParent().findNextSibling().renderContents()
            country = document.find(text='Country:').findParent(
            ).findNextSibling().renderContents()
            location = get_location(city, country)
            if location == None:
                location = Literal("%s, %s" % (city, country))
            graph.add((resource_event, FOAF['based_near'], location))

        # Get the starting and ending dates
        if document.find(text='Period:') != None:
            text = document.find(text='Period:').findParent().findNextSibling(
            ).renderContents()
            parts = re.search(
                '(?P<begin>[^-,]*)(-(?P<end>[^,]*))?, (?P<year>\d{4})',
                text).groupdict()
            if parts['begin'] != None and parts['year'] != None:
                (month, start_day) = parts['begin'].split(' ')
                begin_date = datetime.strptime(
                    "%s %s %s" % (start_day, month, parts['year']), "%d %B %Y")
                graph.add(
                    (resource_event, ICAL['dtstart'], Literal(begin_date)))
                if parts['end'] != None:
                    end_parts = parts['end'].split(' ')
                    end_date = None
                    if len(end_parts) == 2:
                        end_date = datetime.strptime(
                            "%s %s %s" %
                            (end_parts[1], end_parts[0], parts['year']),
                            "%d %B %Y")
                    elif len(end_parts) == 1:
                        end_date = datetime.strptime(
                            "%s %s %s" % (end_parts[0], month, parts['year']),
                            "%d %B %Y")
                    if end_date != None:
                        graph.add(
                            (resource_event, ICAL['dtend'], Literal(end_date)))

        # Get the data for the CFP
        resource_cfp = LDES[self.get_resource_name() + "_cfp"]
        graph.add((resource_cfp, RDF.type, CFP['CallForPapers']))
        graph.add((resource_cfp, CFP['for'], LDES[self.entity_id]))
        graph.add(
            (resource_cfp, CFP['details'],
             URIRef(BASE + 'data/' + self.get_resource_name() + '_cfp.txt')))

        # Get the deadlines
        deadlines = []
        for a in document.findAll('script'):
            res = re.search('var deadlineList = ([^;]*);', a.renderContents())
            if res != None:
                txt = res.group(1).replace('\n',
                                           '').replace('\t',
                                                       '').replace("'", '"')
                txt = re.sub(r'<span [^>]*>([^<]*)</span>',
                             '\g<1>',
                             txt,
                             flags=re.IGNORECASE)
                txt = txt.replace('Date:',
                                  '"Date":').replace('Title:', '"Title":')
                deadlines = json.loads(txt)
        i = 0
        for deadline in deadlines:
            resource_deadline = LDES[self.get_resource_name() + '_deadline_' +
                                     str(i)]
            graph.add((resource_deadline, RDF.type, ICAL['Vevent']))
            graph.add((resource_deadline, ICAL['dtstart'],
                       Literal(datetime.strptime(deadline['Date'],
                                                 "%d %b %Y"))))
            graph.add((resource_deadline, ICAL['dtend'],
                       Literal(datetime.strptime(deadline['Date'],
                                                 "%d %b %Y"))))
            graph.add((resource_deadline, ICAL['summary'],
                       Literal(deadline['Title'])))
            graph.add((resource_deadline, ICAL['relatedTo'], resource_event))
            i = i + 1

        # Add the topics and persons
        if document.find(id='cfp-content') != None:
            for link in document.find(id='cfp-content').findAll('a'):
                link = link.get('href')
                if link != None:
                    if link[:3] == '/t/' and link not in self.topics_set:
                        try:
                            graph.add(
                                (resource_event, DCT['subject'],
                                 LDES[Topic(link[1:-1]).get_resource_name()]))
                            self.topics_set.add(link[1:-1])
                        except:
                            # Ignore bad topic links
                            pass
                    if link[:3] == '/p/' and link not in self.persons_set:
                        try:
                            graph.add(
                                (resource_event, LODE['involvedAgent'],
                                 LDES[Person(link[1:-1]).get_resource_name()]))
                            self.persons_set.add(link[1:-1])
                        except:
                            # Ignore bad person link
                            pass

        # Set the last modification date
        graph.add(
            (self.get_named_graph(), DCT['modified'], Literal(datetime.now())))

        # Save the data
        self.rdf_data = graph.serialize()
Example #53
0
 def __init__(self, db_path):
     self.g = ConjunctiveGraph()
     self.path = db_path
     self.choices = set()
     self.labels = {}
Example #54
0
def example_1():
    """Creates a ConjunctiveGraph and performs some BerkeleyDB tasks with it
    """
    path = mktemp()

    # Declare we are using a BerkeleyDB Store
    graph = ConjunctiveGraph("BerkeleyDB")

    # Open previously created store, or create it if it doesn't exist yet
    # (always doesn't exist in this example as using temp file location)
    rt = graph.open(path, create=False)

    if rt == NO_STORE:
        # There is no underlying BerkeleyDB infrastructure, so create it
        print("Creating new DB")
        graph.open(path, create=True)
    else:
        print("Using existing DB")
        assert rt == VALID_STORE, "The underlying store is corrupt"

    print("Triples in graph before add:", len(graph))
    print("(will always be 0 when using temp file for DB)")

    # Now we'll add some triples to the graph & commit the changes
    EG = Namespace("http://example.net/test/")
    graph.bind("eg", EG)

    graph.add((EG["pic:1"], EG.name, Literal("Jane & Bob")))
    graph.add((EG["pic:2"], EG.name, Literal("Squirrel in Tree")))

    graph.commit()

    print("Triples in graph after add:", len(graph))
    print("(should be 2)")

    # display the graph in Turtle
    print(graph.serialize())

    # close when done, otherwise BerkeleyDB will leak lock entries.
    graph.close()

    graph = None

    # reopen the graph
    graph = ConjunctiveGraph("BerkeleyDB")

    graph.open(path, create=False)

    print("Triples still in graph:", len(graph))
    print("(should still be 2)")

    graph.close()

    # Clean up the temp folder to remove the BerkeleyDB database files...
    for f in os.listdir(path):
        os.unlink(path + "/" + f)
    os.rmdir(path)
Example #55
0
    def test_serialize(self):
        g = ConjunctiveGraph()
        uri1 = URIRef("http://example.org/mygraph1")
        uri2 = URIRef("http://example.org/mygraph2")

        bob = URIRef("urn:bob")
        likes = URIRef("urn:likes")
        pizza = URIRef("urn:pizza")

        g.get_context(uri1).add((bob, likes, pizza))
        g.get_context(uri2).add((bob, likes, pizza))

        s = g.serialize(format="nquads", encoding="latin-1")
        self.assertEqual(len([x for x in s.split(b"\n") if x.strip()]), 2)

        g2 = ConjunctiveGraph()
        g2.parse(data=s, format="nquads")

        self.assertEqual(len(g), len(g2))
        self.assertEqual(
            sorted(x.identifier for x in g.contexts()),
            sorted(x.identifier for x in g2.contexts()),
        )
Example #56
0
def convert(inputFilesOrDirs,
            inputFormat,
            inputExtensions,
            outputDir,
            outputFormat,
            outputExt,
            recursive=True,
            overwrite=True,
            loggingFunction=None):
    """
    Conversion function.

    @param inputFilesOrDirs : a list of paths (to a file or to a directory)
    @param inputFormat      : input files format (the keys of INPUT_FORMAT_TO_EXTENSIONS)
    @param inputExtensions  : a list of input files extensions (one or more values of INPUT_FORMAT_TO_EXTENSIONS)
    @param outputFormat     : output files format (the keys of OUTPUT_FORMAT_TO_EXTENSIONS)
    @param outputExt        : the output files extension (one of the values of OUTPUT_FORMAT_TO_EXTENSIONS)
    @param recursive        : if inputFilesOrDirs contains directories, descend into these directories to find all files
    @param overwrite        : True to overwrite any existing file.
    """
    if loggingFunction is None:
        loggingFunction = INFO

    # process each input file sequentially:
    for inputFileOrDir in inputFilesOrDirs:

        loggingFunction("Processing input file or directory '%s'" %
                        inputFileOrDir)

        # check if the file exists, and if it's a directory or a file
        isdir = False
        if os.path.exists(inputFileOrDir):
            if os.path.isdir(inputFileOrDir):
                DEBUG("'%s' exists and is a directory" % inputFileOrDir)
                inputFileOrDir = os.path.abspath(inputFileOrDir)
                isdir = True
            else:
                DEBUG("'%s' exists and is a file" % inputFileOrDir)
        else:
            raise IOError("Input file '%s' was not found" % inputFileOrDir)

        DEBUG("Input format: %s" % inputFormat)
        DEBUG("Output format: %s" % outputFormat)

        # find out which extensions we should match
        if inputExtensions is None:
            inputExtensions = INPUT_FORMAT_TO_EXTENSIONS[inputFormat]

        DEBUG("Input extensions: %s" % inputExtensions)

        # find out which output extension we should write
        if outputExt:
            outputExtension = outputExt
        else:
            outputExtension = OUTPUT_FORMAT_TO_EXTENSION[outputFormat]

        DEBUG("Output extension: '%s'" % outputExtension)

        inputFiles = []

        if isdir:
            DEBUG("Now walking the directory (recursive = %s):" % recursive)
            for root, dirnames, filenames in os.walk(inputFileOrDir):
                DEBUG("   * Finding files in '%s'" % root)
                for extension in inputExtensions:
                    for filename in fnmatch.filter(filenames,
                                                   "*%s" % extension):
                        DEBUG("     -> found '%s'" % filename)
                        inputFiles.append(os.path.join(root, filename))
                if not recursive:
                    break

        else:
            inputFiles.append(inputFileOrDir)

        # create the graph, and parse the input files

        for inputFile in inputFiles:

            g = ConjunctiveGraph()
            g.parse(inputFile, format=inputFormat)

            DEBUG("the graph was parsed successfully")

            # if no output directory is specified, just print the output to the stdout
            if outputDir is None:
                output = g.serialize(None, format=outputFormat)
                DEBUG("output:")
                print(output)
            # if an output directory was provided, but it doesn't exist, then exit the function
            elif not os.path.exists(outputDir):
                raise IOError("Output dir '%s' was not found" % outputDir)
            # if the output directory was given and it exists, then figure out the output filename
            # and write the output to disk
            else:
                head, tail = os.path.split(inputFile)
                DEBUG("head, tail: %s, %s" % (head, tail))

                # remove the common prefix from the head and the input directory
                # (otherwise the given input path will also be added to the output path)
                commonPrefix = os.path.commonprefix([head, inputFileOrDir])
                DEBUG("inputFileOrDir: %s" % inputFileOrDir)
                DEBUG("common prefix: %s" % commonPrefix)
                headWithoutCommonPrefix = head[len(commonPrefix) + 1:]
                DEBUG("head without common prefix: %s" %
                      headWithoutCommonPrefix)
                outputAbsPath = os.path.join(os.path.abspath(outputDir),
                                             headWithoutCommonPrefix)
                DEBUG("output absolute path: %s" % outputAbsPath)

                outputFileName = os.path.splitext(tail)[0] + outputExtension
                outputAbsFileName = os.path.join(outputAbsPath, outputFileName)

                DEBUG("output filename: '%s'" % outputAbsFileName)

                # for safety, check that we're not overwriting the input file
                if outputAbsFileName == os.path.abspath(inputFile):
                    IOError("Input file '%s' is the same as output file" %
                            outputAbsFileName)
                else:
                    DEBUG("this file is different from the input filename")

                # check if we need to skip this file
                skipThisFile = os.path.exists(
                    outputAbsFileName) and not overwrite

                if skipThisFile:
                    DEBUG("this file will be skipped")
                else:
                    dirName = os.path.dirname(outputAbsFileName)
                    if not os.path.exists(dirName):
                        DEBUG("Now creating %s since it does not exist yet" %
                              dirName)
                        os.makedirs(dirName)

                    loggingFunction("Writing %s" % outputAbsFileName)
                    g.serialize(outputAbsFileName,
                                auto_compact=True,
                                format=outputFormat)
Example #57
0
class SyncedGraph(CurrentStateGraphApi, AutoDepGraphApi, GraphEditApi):
    """
    graph for clients to use. Changes are synced with the master graph
    in the rdfdb process. 

    self.patch(p: Patch) is the only way to write to the graph.

    Reading can be done with the AutoDepGraphApi methods which set up
    watchers to call you back when the results of the read have
    changed (like knockoutjs). Or you can read with
    CurrentStateGraphApi which doesn't have watchers, but you have to
    opt into using it so it's clear you aren't in an auto-dep context
    and meant to set up watchers.

    You may want to attach to self.initiallySynced deferred so you
    don't attempt patches before we've heard the initial contents of
    the graph. It would be ok to accumulate some patches of new
    material, but usually you won't correctly remove the existing
    statements unless we have the correct graph.

    If we get out of sync, we abandon our local graph (even any
    pending local changes) and get the data again from the server.
    """

    def __init__(self,
                 rdfdbRoot: URIRef,
                 label: str,
                 receiverHost: Optional[str] = None):
        """
        label is a string that the server will display in association
        with your connection

        receiverHost is the hostname other nodes can use to talk to me
        """
        self.isConnected = False
        self.currentClient: Optional[WsClientProtocol] = None
        self.rdfdbRoot = rdfdbRoot
        self.connectSocket()
        self.initiallySynced: defer.Deferred[None] = defer.Deferred()
        self._graph = ConjunctiveGraph()

        AutoDepGraphApi.__init__(self)
        # this needs more state to track if we're doing a resync (and
        # everything has to error or wait) or if we're live

    def lostRdfdbConnection(self) -> None:
        self.isConnected = False
        self.patch(Patch(delQuads=self._graph.quads()))
        log.info(f'cleared graph to {len(self._graph)}')
        log.error('graph is not updating- you need to restart')
        self.connectSocket()       
        
    def connectSocket(self) -> None:
        factory = autobahn.twisted.websocket.WebSocketClientFactory(
            self.rdfdbRoot.replace('http://', 'ws://') + 'syncedGraph',
            # Don't know if this is required by spec, but
            # cyclone.websocket breaks with no origin header.
            origin='foo')
        factory.protocol = lambda: WsClientProtocol(self)

        rr = urllib.parse.urlparse(self.rdfdbRoot)
        conn = reactor.connectTCP(rr.hostname.encode('ascii'), rr.port, factory)
        #WsClientProtocol sets our currentClient. Needs rewrite using agents.

    def resync(self):
        """
        get the whole graph again from the server (e.g. we had a
        conflict while applying a patch and want to return to the
        truth).

        To avoid too much churn, we remember our old graph and diff it
        against the replacement. This way, our callers only see the
        corrections.

        Edits you make during a resync will surely be lost, so I
        should just fail them. There should be a notification back to
        UIs who want to show that we're doing a resync.
        """
        log.info('resync')
        self.currentClient.dropConnection()

    def _resyncGraph(self, response):
        log.warn("new graph in")

        self.currentClient.dropConnection()
        #diff against old entire graph
        #broadcast that change

    def patch(self, p: Patch) -> None:
        """send this patch to the server and apply it to our local
        graph and run handlers"""

        if not self.isConnected or self.currentClient is None:
            log.warn("not currently connected- dropping patch")
            return

        if p.isNoop():
            log.info("skipping no-op patch")
            return

        # these could fail if we're out of sync. One approach:
        # Rerequest the full state from the server, try the patch
        # again after that, then give up.
        debugKey = '[id=%s]' % (id(p) % 1000)
        log.debug("\napply local patch %s %s", debugKey, p)
        try:
            self._applyPatchLocally(p)
        except ValueError as e:
            log.error(e)
            self.resync()
            return
        log.debug('runDepsOnNewPatch')
        self.runDepsOnNewPatch(p)
        log.debug('sendPatch')
        self.currentClient.sendPatch(p)
        log.debug('patch is done %s', debugKey)

    def suggestPrefixes(self, ctx, prefixes):
        """
        when writing files for this ctx, try to use these n3
        prefixes. async, not guaranteed to finish before any
        particular file flush
        """
        treq.post(self.rdfdbRoot + 'prefixes',
                  json.dumps({
                      'ctx': ctx,
                      'prefixes': prefixes
                  }).encode('utf8'))

    def _applyPatchLocally(self, p: Patch):
        # .. and disconnect on failure
        patchQuads(self._graph, p.delQuads, p.addQuads, perfect=True)
        log.debug("graph now has %s statements" % len(self._graph))

    def onPatchFromDb(self, p):
        """
        central server has sent us a patch
        """
        if log.isEnabledFor(logging.DEBUG):
            if len(p.addQuads) > 50:
                log.debug('server has sent us %s', p.shortSummary())
            else:
                log.debug('server has sent us %s', p)
            
        self._applyPatchLocally(p)
        try:
            self.runDepsOnNewPatch(p)
        except Exception:
            # don't reflect this error back to the server; we did
            # receive its patch correctly. However, we're in a bad
            # state since some dependencies may not have rerun
            traceback.print_exc()
            log.warn("some graph dependencies may not have completely run")

        if self.initiallySynced:
            self.initiallySynced.callback(None)
            self.initiallySynced = None
Example #58
0
class MirbaseDB(object):
    def __init__(self, db_path):
        self.g = ConjunctiveGraph()
        self.path = db_path
        self.choices = set()
        self.labels = {}

    def create_graph(self):
        self.g.open(self.path + "data.rdf", create=True)
        data = self.parse_mirbase(self.path)
        #g = ConjunctiveGraph(store="SPARQLUpdateStore")
        # g.bind()
        mirna_class = URIRef("http://purl.obolibrary.org/obo/SO_0000276")
        for mid in data:
            mirna_instance = URIRef(MIRBASE + data[mid]["acc"])
            self.g.add((mirna_instance, RDF.type, mirna_class))
            label = Literal(data[mid]["name"])
            self.g.add((mirna_instance, RDFS.label, label))
            description = Literal(data[mid]["description"])
            self.g.add((mirna_instance, RDFS.comment, description))
            for p in data[mid]["previous_names"]:
                if p.strip():
                    previous_name = Literal(p)
                    self.g.add((mirna_instance, MIRBASE["previous_acc"], previous_name))
            for mature in data[mid]["mature"]:
                mature_instance = URIRef(MIRBASE + data[mid]["mature"][mature]["acc"])
                self.g.add((mature_instance, RDF.type, mirna_class))
                mature_label = Literal(data[mid]["mature"][mature]["name"])
                self.g.add((mature_instance, RDFS.label, mature_label))
                for mature_p in data[mid]["mature"][mature]["previous_names"]:
                    if mature_p.strip():
                        mature_previous_name = Literal(mature_p)
                        self.g.add((mature_instance, MIRBASE["previous_acc"], mature_previous_name))
                self.g.add((mirna_instance, MIRBASE["stemloopOf"], mature_instance))


    def parse_mirbase(self, mirbase_root):
        mirna_dic = {}
        with open(mirbase_root + "mirna.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mname = props[2]
                mid = props[0]
                macc = props[1]
                mdesc = props[4]
                mprev = props[3].split(";")
                if int(props[-1]) != 22: # not h**o sapiens
                    continue
                mirna_dic[mid] = {}
                mirna_dic[mid]["name"] = mname
                mirna_dic[mid]["acc"] = macc
                mirna_dic[mid]["previous_names"] = mprev
                mirna_dic[mid]["description"] = mdesc
        mature_dic = {}
        with open(mirbase_root + "mirna_mature.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mname = props[1]
                mid = props[0]
                macc = props[3]
                # mdesc = props[4]
                mprev = props[2].split(";")
                if not mname.startswith("hsa-"): # not h**o sapiens
                    continue
                mature_dic[mid] = {}
                mature_dic[mid]["name"] = mname
                mature_dic[mid]["previous_names"] = mprev
                mature_dic[mid]["acc"] = macc
        with open(mirbase_root + "mirna_pre_mature.txt") as mirnas:
            for m in mirnas:
                props = m.strip().split("\t")
                mid, matureid = props[:2]
                if mid in mirna_dic:
                    if "mature" not in mirna_dic[mid]:
                        mirna_dic[mid]["mature"] = {}
                    mirna_dic[mid]["mature"][matureid] = mature_dic[matureid]
        # pp.pprint(mirna_dic)
        return mirna_dic

    def map_label(self, label):
        label = label.lower()
        label = label.replace("microrna", "mir")
        label = label.replace("mirna", "mir")
        if not label.startswith("hsa-"):
            label = "hsa-" + label

        result = process.extractOne(label, self.choices)
        # result = process.extract(label, choices, limit=3)
        """if result[1] != 100:
            print
            print "original:", label.encode("utf-8"), result
            # if label[-1].isdigit():
            #     label += "a"
            # else:
            new_label = label + "-1"
            revised_result = process.extractOne(new_label, self.choices)
            if revised_result[1] != 100:
                new_label = label + "a"
                revised_result = process.extractOne(new_label, self.choices)
            if revised_result[1] > result[1]:
                result = revised_result
                print "revised:", label.encode("utf-8"), result"""

        return result


    def load_graph(self):
        self.g.load(self.path + "data.rdf")
        # print "Opened graph with {} triples".format(len(self.g))
        self.get_label_to_acc()
        self.choices = self.labels.keys()

    def get_label_to_acc(self):
        for subj, pred, obj in self.g.triples((None, RDFS.label, None)):
            self.labels[str(obj)] = str(subj)
        for subj, pred, obj in self.g.triples((None, RDFS.label, None)):
            self.labels[str(obj)] = str(subj)

    def save_graph(self):
        self.g.serialize(self.path + "data.rdf", format='pretty-xml')
        print('Triples in graph after add: ', len(self.g))
        self.g.close()
Example #59
0
    def handle(self, **options):
        LOGGER.debug("linking places")
        for place in models.Place.objects.filter(dbpedia__isnull=True):
            if not place.city or not place.state:
                continue

            # formulate a dbpedia place uri
            path = urllib2.quote('%s,_%s' %
                                 (_clean(place.city), _clean(place.state)))
            url = URIRef('http://dbpedia.org/resource/%s' % path)

            # attempt to get a graph from it
            graph = ConjunctiveGraph()
            try:
                LOGGER.debug("looking up %s" % url)
                graph.load(url)
            except urllib2.HTTPError as e:
                LOGGER.error(e)

            # if we've got more than 3 assertions extract some stuff from
            # the graph and save back some info to the db, would be nice
            # to have a triple store underneath where we could persist
            # all the facts eh?

            if len(graph) >= 3:
                place.dbpedia = url
                place.latitude = graph.value(url, geo['lat'])
                place.longitude = graph.value(url, geo['long'])
                for object in graph.objects(URIRef(url), owl['sameAs']):
                    if object.startswith('http://sws.geonames.org'):
                        place.geonames = object
                place.save()
                LOGGER.info("found dbpedia resource %s" % url)
            else:
                LOGGER.warn("couldn't find dbpedia resource for %s" % url)

            reset_queries()
        LOGGER.info("finished looking up places in dbpedia")

        LOGGER.info("dumping place_links.json fixture")

        # so it would be nice to use django.core.serializer here
        # but it serializes everything about the model, including
        # titles that are linked to ... and this could theoretically
        # change over time, so we only preserve the facts that have
        # been harvested from dbpedia, so they can overlay over
        # the places that have been extracted during title load

        json_src = []
        places_qs = models.Place.objects.filter(dbpedia__isnull=False)
        for p in places_qs.order_by('name'):
            json_src.append({
                'name': p.name,
                'dbpedia': p.dbpedia,
                'geonames': p.geonames,
                'longitude': p.longitude,
                'latitude': p.latitude
            })
            reset_queries()
        json.dump(json_src,
                  file('core/fixtures/place_links.json', 'w'),
                  indent=2)
        LOGGER.info("finished dumping place_links.json fixture")
class Inspector(object):

    """ Class that includes methods for querying an RDFS/OWL ontology """

    def __init__(self, uri, language=""):
        super(Inspector, self).__init__()
        self.rdfGraph = ConjunctiveGraph()
        try:
            self.rdfGraph.parse(uri, format="application/rdf+xml")
        except:
            try:
                self.rdfGraph.parse(uri, format="n3")
            except:
                raise exceptions.Error("Could not parse the file! Is it a valid RDF/OWL ontology?")
        finally:
            self.baseURI = self.get_OntologyURI() or uri
            self.allclasses = self.__getAllClasses(includeDomainRange=True, includeImplicit=True, removeBlankNodes=False, excludeRDF_OWL=False)

    def get_OntologyURI(self, return_as_string=True):
        test = [x for x, y, z in self.rdfGraph.triples((None, RDF.type, Ontology))]
        if test:
            if return_as_string:
                return str(test[0])
            else:
                return test[0]
        else:
            return None

    def __getAllClasses(self, classPredicate="", includeDomainRange=False, includeImplicit=False, removeBlankNodes=True, addOWLThing=True, excludeRDF_OWL=True):

        rdfGraph = self.rdfGraph
        exit = {}

        def addIfYouCan(x, mydict):
            if excludeRDF_OWL:
                if x.startswith('http://www.w3.org/2002/07/owl#') or  \
                   x.startswith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") or \
                   x.startswith("http://www.w3.org/2000/01/rdf-schema#"):
                    return mydict
            if x not in mydict:
                mydict[x] = None
            return mydict

        if addOWLThing:
            exit = addIfYouCan(Thing, exit)

        if classPredicate == "rdfs" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, RDFS.Class):
                exit = addIfYouCan(s, exit)

        if classPredicate == "owl" or classPredicate == "":
            for s in rdfGraph.subjects(RDF.type, Class):
                exit = addIfYouCan(s, exit)

        if includeDomainRange:
            for o in rdfGraph.objects(None, RDFS.domain):
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDFS.range):
                exit = addIfYouCan(o, exit)

        if includeImplicit:
            for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)):
                exit = addIfYouCan(s, exit)
                exit = addIfYouCan(o, exit)
            for o in rdfGraph.objects(None, RDF.type):
                exit = addIfYouCan(o, exit)

        # get a list
        exit = exit.keys()
        if removeBlankNodes:
            exit = [x for x in exit if not isBlankNode(x)]
        return sort_uri_list_by_name(exit)

    def __getTopclasses(self, classPredicate=''):
        returnlist = []

        for eachclass in self.__getAllClasses(classPredicate):
            x = self.get_classDirectSupers(eachclass)
            if not x:
                returnlist.append(eachclass)
        return sort_uri_list_by_name(returnlist)

    def __getTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = topclasses

            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = children
                for potentialfather in children:
                    self.__getTree(potentialfather, out)

            return out

        else:
            children = self.get_classDirectSubs(father)
            out[father] = children
            for ch in children:
                self.__getTree(ch, out)

    def __buildClassTree(self, father=None, out=None):
        if not father:
            out = {}
            topclasses = self.toplayer
            out[0] = [Thing]
            out[Thing] = sort_uri_list_by_name(topclasses)
            for top in topclasses:
                children = self.get_classDirectSubs(top)
                out[top] = sort_uri_list_by_name(children)
                for potentialfather in children:
                    self.__buildClassTree(potentialfather, out)
            return out
        else:
            children = self.get_classDirectSubs(father)
            out[father] = sort_uri_list_by_name(children)
            for ch in children:
                self.__buildClassTree(ch, out)

    # methods for getting ancestores and descendants of classes: by default, we do not include blank nodes
    def get_classDirectSupers(self, aClass, excludeBnodes=True, sortUriName=False):
        returnlist = []
        for o in self.rdfGraph.objects(aClass, RDFS.subClassOf):
            if not (o == Thing):
                if excludeBnodes:
                    if not isBlankNode(o):
                        returnlist.append(o)
                else:
                    returnlist.append(o)
        if sortUriName:
            return sort_uri_list_by_name(remove_duplicates(returnlist))
        else:
            return remove_duplicates(returnlist)

    def get_classDirectSubs(self, aClass, excludeBnodes=True):
        returnlist = []
        for s, v, o in self.rdfGraph.triples((None, RDFS.subClassOf, aClass)):
            if excludeBnodes:
                if not isBlankNode(s):
                    returnlist.append(s)
            else:
                returnlist.append(s)
        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def get_classSiblings(self, aClass, excludeBnodes=True):
        returnlist = []
        for father in self.get_classDirectSupers(aClass, excludeBnodes):
            for child in self.get_classDirectSubs(father, excludeBnodes):
                if child != aClass:
                    returnlist.append(child)

        return sort_uri_list_by_name(remove_duplicates(returnlist))

    def entitySynonyms(self, anEntity, language=DEFAULT_LANGUAGE, getall=True):
        if getall:
            temp = []
            # Uberon synonyms
            for o in self.rdfGraph.objects(anEntity, Synonym):
                temp += [o]
            # EFO synonyms
            for o in self.rdfGraph.objects(anEntity, EFO_Synonym):
                temp += [o]
            # OBI synonyms
            for o in self.rdfGraph.objects(anEntity, OBO_Synonym):
                temp += [o]
            return temp
        else:
            for o in self.rdfGraph.objects(anEntity, Synonym):
                if getattr(o, 'language') and getattr(o, 'language') == language:
                    return o
            return ""

    def classFind(self, name, exact=False):
        temp = []
        if name:
            for x in self.allclasses:
                if exact:
                    if x.__str__().lower() == str(name).lower():
                        return [x]
                else:
                    if x.__str__().lower().find(str(name).lower()) >= 0:
                        temp.append(x)
        return temp