Ejemplo n.º 1
0
    def store(self,
              cur_g,
              base_dir,
              base_iri,
              context_path,
              tmp_dir=None,
              override=False,
              already_processed={},
              store_now=True,
              remove_data=False):
        self.repok.new_article()
        self.reperr.new_article()

        if len(cur_g) > 0:
            cur_dir_path, cur_file_path = self.dir_and_file_paths(
                cur_g, base_dir, base_iri)

            try:
                if not os.path.exists(cur_dir_path):
                    os.makedirs(cur_dir_path)

                final_g = ConjunctiveGraph()
                final_g.addN(
                    [item + (cur_g.identifier, ) for item in list(cur_g)])

                # Remove the data
                if remove_data:
                    stored_g = None
                    if cur_file_path in already_processed:
                        stored_g = already_processed[cur_file_path]
                    elif os.path.exists(cur_file_path):
                        stored_g = self.load(cur_file_path, cur_g, tmp_dir)

                    for s, p, o, g in final_g.quads((None, None, None, None)):
                        stored_g.remove((s, p, o, g))

                    final_g = stored_g
                elif not override:  # Merging the data
                    if cur_file_path in already_processed:
                        stored_g = already_processed[cur_file_path]
                        stored_g.addN(final_g.quads((None, None, None, None)))
                        final_g = stored_g
                    elif os.path.exists(cur_file_path):
                        # This is a conjunctive graps that contains all the triples (and graphs)
                        # the file is actually defining - they could be more than those using
                        # 'cur_subject' as subject.
                        final_g = self.load(cur_file_path, cur_g, tmp_dir)

                already_processed[cur_file_path] = final_g

                if store_now:
                    self.__store_in_file(final_g, cur_file_path, context_path)

                return already_processed
            except Exception as e:
                self.reperr.add_sentence(
                    "[5] It was impossible to store the RDF statements in %s. %s"
                    % (cur_file_path, str(e)))

        return None
Ejemplo n.º 2
0
def __store_graph(cur_g, rdf_iri_string, d_dir):
    try:
        res_dir, dest_file = \
            find_paths(rdf_iri_string, args.base + os.sep, "https://w3id.org/oc/corpus/", 10000, 1000)
        
        dest_dir = res_dir.replace(args.base + os.sep, d_dir + os.sep)
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        
        cur_file = dest_file.replace(res_dir, dest_dir)
        if os.path.exists(cur_file):
            c_graph = __load_graph(cur_file)
        else:
            c_graph = ConjunctiveGraph()

        c_graph.remove_context(c_graph.get_context(cur_g.identifier))
        c_graph.addN([item + (cur_g.identifier,) for item in list(cur_g)])
        
        with open(dest_file.replace(res_dir, dest_dir), "w") as f:
            cur_json_ld = json.loads(c_graph.serialize(format="json-ld", context=context_json))
            cur_json_ld["@context"] = context_path
            json.dump(cur_json_ld, f, indent=4)
        # repok.add_sentence("File '%s' added." % cur_file)
        return dest_file
    except Exception as e:
        reperr.add_sentence("[5] It was impossible to store the RDF statements in %s. %s" %
                            (dest_file, str(e)))
Ejemplo n.º 3
0
    def __store_in_file(self, cur_g, cur_file_path, context_path):
        # Note: the following lines from here and until 'cur_json_ld' are a sort of hack for including all
        # the triples of the input graph into the final stored file. Some how, some of them are not written
        # in such file otherwise - in particular the provenance ones.
        new_g = ConjunctiveGraph()
        for s, p, o in cur_g.triples((None, None, None)):
            g_iri = None
            for g_context in cur_g.contexts((s, p, o)):
                g_iri = g_context.identifier
                break

            new_g.addN([(s, p, o, g_iri)])

        if not self.nt and not self.nq and context_path:
            cur_json_ld = json.loads(
                new_g.serialize(
                    format="json-ld",
                    context=self.__get_context(context_path)).decode("utf-8"))

            if isinstance(cur_json_ld, dict):
                cur_json_ld["@context"] = context_path
            else:  # it is a list
                for item in cur_json_ld:
                    item["@context"] = context_path

            with open(cur_file_path, "w") as f:
                json.dump(cur_json_ld, f, indent=4, ensure_ascii=False)
        elif self.nt:
            new_g.serialize(cur_file_path, format="nt11", encoding="utf-8")
        elif self.nq:
            new_g.serialize(cur_file_path, format="nquads", encoding="utf-8")

        self.repok.add_sentence("File '%s' added." % cur_file_path)
Ejemplo n.º 4
0
def test_quad_contexts():
    g = ConjunctiveGraph()
    a = URIRef("urn:a")
    b = URIRef("urn:b")
    g.get_context(a).add((a, a, a))
    g.addN([(b, b, b, b)])

    assert set(g) == set([(a, a, a), (b, b, b)])
    for q in g.quads():
        assert isinstance(q[3], Graph)
Ejemplo n.º 5
0
def test_quad_contexts():
    g = ConjunctiveGraph()
    a = URIRef('urn:a')
    b = URIRef('urn:b')
    g.get_context(a).add((a, a, a))
    g.addN([(b, b, b, b)])

    assert set(g) == set([(a, a, a), (b, b, b)])
    for q in g.quads():
        assert isinstance(q[3], Graph)
Ejemplo n.º 6
0
    def get_where(graph, args):
        s, p, o, c = _spoc(args)

        result = ConjunctiveGraph()

        for subgraph in (x for x in graph.store.contexts((s, p, o))
                         if c is None or x.identifier == c):
            result.addN((s, p, o, subgraph.identifier)
                        for s, p, o in subgraph.triples((None, None, None)))
        return result
Ejemplo n.º 7
0
    def store_graphs_in_file(self, file_path, context_path):
        self.repok.new_article()
        self.reperr.new_article()
        self.repok.add_sentence("Store the graphs into a file: starting process")

        cg = ConjunctiveGraph()
        for g in self.g:
            cg.addN([item + (g.identifier,) for item in list(g)])

        self.__store_in_file(cg, file_path, context_path)
Ejemplo n.º 8
0
    def get_where(graph, args):
        s, p, o, c = _spoc(args)

        result = ConjunctiveGraph()

        for subgraph in (
            x for x in graph.store.contexts((s, p, o)) if c is None or x.identifier == c
        ):
            result.addN((s, p, o, subgraph.identifier)
                        for s, p, o in subgraph.triples((None, None, None)))
        return result
Ejemplo n.º 9
0
    def _graph(self):
        """Lazy loading of the _graph attribute

        This property getter will be called only when the instance attribute self._graph has been deleted.
        In that case, it will load the graph from self.identifier.

        This is used by the `from_iri`:meth: class method,
        to ensure that graphs are only loaded when required...
        """
        if '_graph' in self.__dict__:
            return self.__dict__['_graph']

        headers = self.__dict__.pop('_headers')
        http = self.__dict__.pop('_http')
        base_iri = self._identifier.split('#', 1)[0]
        effective_headers = dict(DEFAULT_REQUEST_HEADERS)
        if headers:
            effective_headers.update(headers)
        http = http or DEFAULT_HTTP_CLIENT

        LOG.info('downloading <%s>', base_iri)
        response, content = http.request(base_iri,
                                         "GET",
                                         headers=effective_headers)
        LOG.debug('got %s %s %s', response.status, response['content-type'],
                  response.fromcache)
        if response.status // 100 != 2:
            raise HttpLib2ErrorWithResponse(response.reason, response, content)

        source = StringInputSource(content)
        ctype = response['content-type'].split(';', 1)[0]
        g = ConjunctiveGraph(identifier=base_iri)
        g.addN(BACKGROUND_KNOWLEDGE.quads())
        g.parse(source, base_iri, ctype)
        _fix_default_graph(g)

        # if available, load API Documentation in a separate graph
        links = response.get('link')
        if links:
            if type(links) != list:
                links = [links]
            for link in links:
                match = APIDOC_RE.match(link)
                if match:
                    self._api_doc = apidoc_iri = URIRef(match.groups()[0])
                    if apidoc_iri != self.identifier:
                        apidoc = ApiDocumentation.from_iri(
                            apidoc_iri, headers, http)
                        g.addN(apidoc.graph.quads())
                    break

        self.__dict__['_graph'] = g
        return g
Ejemplo n.º 10
0
def dump():
    query = '''SELECT ?x ?y ?z ?g where { GRAPH ?g {?x ?y ?z}}'''
    data = get_query(query)
    if data:
        g = ConjunctiveGraph()
        for q in data:
            if q['z']['type'] == 'uri':
                g.addN([(URIRef(q['x']['value']), URIRef(q['y']['value']),
                         URIRef(q['z']['value']), URIRef(q['g']['value']))])
            else:
                g.addN([(URIRef(q['x']['value']), URIRef(q['y']['value']),
                         Literal(q['z']['value']), URIRef(q['g']['value']))])
        g.serialize("dump/dump.nq", format="nquads")
Ejemplo n.º 11
0
def uploadDocumentContext(annfile):
    docid = annfile.split('/').pop().split('.')[0]
    cg = ConjunctiveGraph(identifier=gid['tempUploadGraph'])
    cg.addN([(s,p,o,gid[docid]) for (s,p,o) in ann2rdf(annfile)])

    r = requests.post(
        AGVM_VC_REPO + "/statements",
        headers={'Content-Type': 'text/x-nquads'},
        data=cg.serialize(format='nquads'),
        auth=AG_AUTH,
        params={"commit":1000}
    )
    return r.content
Ejemplo n.º 12
0
def graphWithoutMetadata(g, ignorePredicates=[]):
    """
    graph filter that removes any statements whose subjects are
    contexts in the graph and also any statements with the given
    predicates
    """

    ctxs = [ctx.identifier for ctx in g.contexts()]

    out = ConjunctiveGraph()
    for stmt in g.quads((None, None, None)):
        if stmt[0] not in ctxs and stmt[1] not in ignorePredicates:
            out.addN([stmt])
    return out
Ejemplo n.º 13
0
def graphWithoutMetadata(g: ConjunctiveGraph, ignorePredicates=[]):
    """
    graph filter that removes any statements whose subjects are
    contexts in the graph and also any statements with the given
    predicates
    """

    ctxs = [ctx.identifier for ctx in g.contexts()]

    out = ConjunctiveGraph()
    for stmt in g.quads((None, None, None)):
        if stmt[0] not in ctxs and stmt[1] not in ignorePredicates:
            out.addN([stmt])
    return out
Ejemplo n.º 14
0
    def _graph(self):
        """Lazy loading of the _graph attribute

        This property getter will be called only when the instance attribute self._graph has been deleted.
        In that case, it will load the graph from self.identifier.

        This is used by the `from_iri`:meth: class method,
        to ensure that graphs are only loaded when required...
        """
        if '_graph' in self.__dict__:
            return self.__dict__['_graph']

        headers = self.__dict__.pop('_headers')
        http = self.__dict__.pop('_http')
        base_iri = self._identifier.split('#', 1)[0]
        effective_headers = dict(DEFAULT_REQUEST_HEADERS)
        if headers:
            effective_headers.update(headers)
        http = http or DEFAULT_HTTP_CLIENT

        LOG.info('downloading <%s>', base_iri)
        response, content = http.request(base_iri, "GET", headers=effective_headers)
        LOG.debug('got %s %s %s', response.status, response['content-type'], response.fromcache)
        if response.status // 100 != 2:
            raise HttpLib2ErrorWithResponse(response.reason, response, content)

        source = StringInputSource(content)
        ctype = response['content-type'].split(';',1)[0]
        g = ConjunctiveGraph(identifier=base_iri)
        g.addN(BACKGROUND_KNOWLEDGE.quads())
        g.parse(source, base_iri, ctype)
        _fix_default_graph(g)

        # if available, load API Documentation in a separate graph
        links = response.get('link')
        if links:
            if type(links) != list:
                links = [links]
            for link in links:
                match = APIDOC_RE.match(link)
                if match:
                    self._api_doc = apidoc_iri = URIRef(match.groups()[0])
                    if apidoc_iri != self.identifier:
                        apidoc = ApiDocumentation.from_iri(apidoc_iri, headers, http)
                        g.addN(apidoc.graph.quads())
                    break

        self.__dict__['_graph'] = g
        return g
Ejemplo n.º 15
0
    def __init__(self, location, repository, inmemory=False):
        super(RDFLibStore, self).__init__(location, repository)
        self.inmemory = inmemory
        self.closed = False
        graphid = URIRef("file://" + self.repository)
        g = ConjunctiveGraph(store=self._storeid(), identifier=graphid)
        if os.path.exists(self.location):
            g.open(self.location, create=False)
        else:
            g.open(self.location, create=True)

        l = logging.getLogger(__name__)
        if inmemory:
            l.debug("Loading store into memory")
            ig = ConjunctiveGraph(identifier=graphid)
            ig.addN(g.quads())
            g.close()
            self.graph = ig
        else:
            l.debug("Using on-disk store")
            self.graph = g
Ejemplo n.º 16
0
    def __init__(self, location, repository, inmemory=False):
        super(RDFLibStore, self).__init__(location, repository)
        self.inmemory = inmemory
        self.closed = False
        graphid = URIRef("file://" + self.repository)
        g = ConjunctiveGraph(store=self._storeid(), identifier=graphid)
        if os.path.exists(self.location):
            g.open(self.location, create=False)
        else:
            g.open(self.location, create=True)

        l = logging.getLogger(__name__)
        if inmemory:
            l.debug("Loading store into memory")
            ig = ConjunctiveGraph(identifier=graphid)
            ig.addN(g.quads())
            g.close()
            self.graph = ig
        else:
            l.debug("Using on-disk store")
            self.graph = g
Ejemplo n.º 17
0
    def store(self, cur_g, base_dir, base_iri, context_path, tmp_dir=None,
              override=False, already_processed={}, store_now=True):
        self.repok.new_article()
        self.reperr.new_article()

        if len(cur_g) > 0:
            cur_subject = set(cur_g.subjects(None, None)).pop()
            cur_dir_path, cur_file_path = find_paths(
                str(cur_subject), base_dir, base_iri, self.dir_split, self.n_file_item)

            try:
                if not os.path.exists(cur_dir_path):
                    os.makedirs(cur_dir_path)

                final_g = ConjunctiveGraph()
                final_g.addN([item + (cur_g.identifier,) for item in list(cur_g)])

                # Merging the data
                if not override:
                    if cur_file_path in already_processed:
                        stored_g = already_processed[cur_file_path]
                        stored_g.addN(final_g.quads((None, None, None, None)))
                        final_g = stored_g
                    elif os.path.exists(cur_file_path):
                        # This is a conjunctive graps that contains all the triples (and graphs)
                        # the file is actually defining - they could be more than those using
                        # 'cur_subject' as subject.
                        final_g = self.load(cur_file_path, cur_g, tmp_dir)

                already_processed[cur_file_path] = final_g

                if store_now:
                    self.__store_in_file(final_g, cur_file_path, context_path)

                return already_processed
            except Exception as e:
                self.reperr.add_sentence("[5] It was impossible to store the RDF statements in %s. %s" %
                                         (cur_file_path, str(e)))

        return None
            txy_list.append((t, x, y))
            accident_url_list.append(ident)

    yield from accident_coverage_triples(txy_list, accident_url_list)

parser = ArgumentParser()
parser.add_argument(
    '--tweets', type=FileType('r'), default='data/tweets.json'
)
parser.add_argument(
    '--streets', type=FileType('r'), default='data/streets.json'
)
parser.add_argument(
    '--out', type=FileType('wb'), default='data/accidents.ttl'
)

if __name__ == '__main__':
    args    = parser.parse_args()
    streets = load(args.streets)
    tweets  = load(args.tweets)
    print('Loaded {} tweets and {} streets'.format(len(tweets), len(streets)))

    fuseki = ConjunctiveGraph(store='SPARQLUpdateStore')
    fuseki.open(('http://localhost:3030/accidents/query',
                'http://localhost:3030/accidents/update'))
    default = 'urn:x-arq:DefaultGraph'

    add_namespaces(fuseki)
    fuseki.addN((s,p,o,default) for s,p,o in build_graph(tweets))
    fuseki.close()
Ejemplo n.º 19
0
def ProjectRdf(data):
    g = ConjunctiveGraph()
    graph = URIRef(app.config["BASE_URI"] + "graph/" + data["Id"])
    proj = URIRef(app.config["BASE_URI"] + "project/" + data["Id"])
    DOAP = Namespace("http://usefulinc.com/ns/doap#")
    g.addN([(proj, RDF.type, FOAF.Project, graph)])
    g.addN([(proj, DCTERMS.title, Literal(data["Title"]), graph)])
    g.addN([(proj, DCTERMS.description, Literal(data["Description"]), graph)])
    g.addN([(proj, DOAP.GitRepository, URIRef(data["Repository"]), graph)])
    if data["Homepage"]:
        g.addN([(proj, FOAF.homepage, URIRef(data["Homepage"]), graph)])

    #authors
    for aut in data["Aut"]:
        aut_uri = URIRef(app.config["BASE_URI"] + "person/" + parse.quote(
            data["Aut"][aut]["Mail"].split("@")[0].replace(".", "_"), safe=""))
        g.addN([(proj, DCTERMS.creator, aut_uri, graph)])
        g.addN([(aut_uri, RDF.type, FOAF.Person, graph)])
        g.addN([(aut_uri, FOAF.givenName, Literal(data["Aut"][aut]["Name"]),
                 graph)])
        g.addN([(aut_uri, FOAF.familyName,
                 Literal(data["Aut"][aut]["Surname"]), graph)])
        g.addN([(aut_uri, FOAF.mbox, Literal(data["Aut"][aut]["Mail"]), graph)
                ])

    #course
    course_id = parse.quote(data["Course"].replace(" ", "").replace(
        string.punctuation, ""),
                            safe="")[:30]
    course_uri = URIRef(app.config["BASE_URI"] + "course/" +
                        data["Year"].replace("-", "_") + "/" + course_id)
    g.addN([(proj, DCTERMS.subject, course_uri, graph)])
    g.addN([(course_uri, RDF.type, DCTERMS.MethodOfInstruction, graph)])
    g.addN([(course_uri, DCTERMS.title, Literal(data["Course"]), graph)])
    g.addN([(course_uri, FOAF.homepage, URIRef(data["Course_url"]), graph)])

    #Year
    year_uri = URIRef(app.config["BASE_URI"] + "year/" +
                      data["Year"].replace("-", "_"))
    g.addN([(course_uri, DCTERMS.coverage, year_uri, graph)])
    TIME = Namespace("http://www.w3.org/2006/time#")
    g.addN([(year_uri, RDF.type, TIME.TemporalEntity, graph)])
    g.addN([(year_uri, RDFS.label, Literal(data["Year"]), graph)])

    #Graph Metadata
    g.addN([(graph, DCTERMS.accessRights, Literal("SUSPENDED"), graph)])
    g.addN([(graph, DCTERMS.dateSubmitted,
             Literal(str(data["Date"]), datatype=XSD.date), graph)])
    pub_uri = URIRef(app.config["BASE_URI"] + "person/" +
                     data["Responsible"].split("@")[0].replace(".", "_"))
    g.addN([(graph, DCTERMS.publisher, pub_uri, graph)])

    return g
Ejemplo n.º 20
0
class MemoryStore:
    """A class that combines and syncronieses n-quad files and an in-memory quad store.

    This class contains information about all graphs, their corresponding URIs and
    pathes in the file system. For every Graph (context of Quad-Store) exists a
    FileReference object (n-quad) that enables versioning (with git) and persistence.
    """
    def __init__(self):
        """Initialize a new MemoryStore instance."""
        logger = logging.getLogger('quit.core.MemoryStore')
        logger.debug('Create an instance of MemoryStore')
        self.store = ConjunctiveGraph(identifier='default')

        return

    def getgraphuris(self):
        """Method to get all available named graphs.

        Returns:
            A list containing all graph uris found in store.
        """
        graphs = []
        for graph in self.store.contexts():
            if isinstance(graph, BNode) or str(graph.identifier) == 'default':
                pass
            else:
                graphs.append(graph.identifier)

        return graphs

    def getgraphcontent(self, graphuri):
        """Get the serialized content of a named graph.

        Args:
            graphuri: The URI of a named graph.
        Returns:
            content: A list of strings where each string is a quad.
        """
        data = []
        context = self.store.get_context(URIRef(graphuri))
        triplestring = context.serialize(format='nt').decode('UTF-8')

        # Since we have triples here, we transform them to quads by adding the graphuri
        # TODO This might cause problems if ' .\n' will be part of a literal.
        #   Maybe a regex would be a better solution
        triplestring = triplestring.replace(' .\n', ' <' + graphuri + '> .\n')

        data = triplestring.splitlines()
        data.remove('')

        return data

    def getstoreobject(self):
        """Get the conjunctive graph object.

        Returns:
            graph: A list of strings where each string is a quad.
        """

    def graphexists(self, graphuri):
        """Ask if a named graph FileReference object for a named graph URI.

        Args:
            graphuri: A string containing the URI of a named graph

        Returns:
            True or False
        """
        if self.store.get_context(URIRef(graphuri)) is None:
            return False
        else:
            return True

    def addfile(self, filename, serialization):
        """Add a file to the store.

        Args:
            filename: A String for the path to the file.
            serialization: A String containg the RDF format
        Raises:
            ValueError if the given file can't be parsed as nquads.
        """
        try:
            self.store.parse(source=filename, format=serialization)
        except Exception as e:
            logger.debug(e)
            logger.debug("Could not import file: {}. " +
                         "Make sure the file exists and contains data in  {}".
                         format(filename, serialization))

    def addquads(self, quads):
        """Add quads to the MemoryStore.

        Args:
            quads: Rdflib.quads that should be added to the MemoryStore.
        """
        self.store.addN(quads)
        self.store.commit()

    def query(self, querystring):
        """Execute a SPARQL select query.

        Args:
            querystring: A string containing a SPARQL ask or select query.
        Returns:
            The SPARQL result set
        """
        return self.store.query(querystring)

    def update(self, querystring, versioning=True):
        """Execute a SPARQL update query and update the store.

        This method executes a SPARQL update query and updates and commits all affected files.

        Args:
            querystring: A string containing a SPARQL upate query.
        """
        # methods of rdflib ConjunciveGraph
        if versioning:
            actions = evalUpdate(self.store, querystring)
            self.store.update(querystring)
            return actions
        else:
            self.store.update(querystring)
            return

        return

    def removequads(self, quads):
        """Remove quads from the MemoryStore.

        Args:
            quads: Rdflib.quads that should be removed to the MemoryStore.
        """
        self.store.remove((quads))
        self.store.commit()
        return

    def exit(self):
        """Execute actions on API shutdown."""
        return