Beispiel #1
0
    def load(self, uri, format=None, **kwargs):
        kwargs = {k: kwargs[k].pop() for k in kwargs}
        result = self.__wrapper.load(uri, **kwargs)

        if result is None:
            result = _load_remote(uri, format=format)
            base_graph = result[0]
            base_ttl = extract_ttl(result[1])
            types = base_graph.objects(URIRef(uri), RDF.type)
            enrichments = reduce(
                lambda x, y: y.union(x),
                map(lambda t: set(self.ecosystem.enrichments_by_type(t)),
                    types), set())
            result = self.enrich_resource(uri, enrichments, base_graph,
                                          base_ttl)
        return result
Beispiel #2
0
 def load(self, endpoint):
     href = str(endpoint.href)
     if href not in self.endpoints_data:
         try:
             response = endpoint.invoke(graph=self.graph,
                                        subject=self.graph.identifier,
                                        **self.r_args)
             if response.status_code == 200:
                 result = {
                     'content': response.json(),
                     'ttl': extract_ttl(response.headers)
                     or self.max_def_ttl
                 }
             elif response.status_code < 500 and response.status_code != 404:
                 result = {'content': {}, 'ttl': 10}
             else:
                 result = {'content': {}, 'ttl': self.max_def_ttl}
             self.endpoints_data[href] = result
         except AttributeError as e:
             log.debug('Missing attributes:' + e.message)
             raise
     return self.endpoints_data[href]['content']
Beispiel #3
0
    def create(self, conjunctive=False, gid=None, loader=None, format=None):
        try:
            if conjunctive:
                uuid = shortuuid.uuid()
                g = get_triple_store(self.__persist_mode,
                                     base=self.__base_path,
                                     path=uuid)
                return g
            else:
                p = self._r.pipeline(transaction=True)
                p.multi()

                g = Graph(identifier=gid)

                lock = self.uri_lock(gid)
                with lock:
                    uuid = self._r.hget('{}:gids'.format(self.__cache_key),
                                        gid)
                    if not uuid:
                        uuid = shortuuid.uuid()
                        p.hset('{}:gids'.format(self.__cache_key), gid, uuid)

                    gid_key = '{}:{}'.format(self.__cache_key, uuid)

                    ttl_ts = self._r.hget(gid_key, 'ttl')
                    if ttl_ts is not None:
                        ttl_dt = dt.utcfromtimestamp(int(ttl_ts))
                        now = dt.utcnow()
                        if ttl_dt > now:
                            try:
                                g = self.__recall(gid)
                            except KeyError:
                                source_z = self._r.hget(gid_key, 'data')
                                source = zlib.decompress(source_z)
                                g.parse(StringIO(source), format=format)
                                self.__memoize(gid, g)

                            ttl = math.ceil(
                                (ttl_dt - dt.utcnow()).total_seconds())
                            return g, math.ceil(ttl)

                    log.debug('Caching {}'.format(gid))
                    response = loader(gid, format)
                    if response is None and loader != http_get:
                        response = http_get(gid, format)

                    if isinstance(response, bool):
                        return response

                    ttl = self.__min_cache_time
                    source, headers = response
                    if not isinstance(source, Graph) and not isinstance(
                            source, ConjunctiveGraph):
                        parse_rdf(g, source, format, headers)
                        data = g.serialize(format='turtle')
                    else:
                        data = source.serialize(format='turtle')
                        for prefix, ns in source.namespaces():
                            g.bind(prefix, ns)
                        g.__iadd__(source)

                    self.__memoize(gid, g)

                    if not self.__force_cache_time:
                        ttl = extract_ttl(headers) or ttl

                    p.hset(gid_key, 'data', zlib.compress(data))
                    ttl_ts = calendar.timegm(
                        (dt.utcnow() + delta(seconds=ttl)).timetuple())
                    p.hset(gid_key, 'ttl', ttl_ts)
                    p.expire(gid_key, ttl)
                    p.execute()
                return g, int(ttl)
        except ConnectionError as e:
            raise EnvironmentError(e.message)
Beispiel #4
0
    def describe_resource(self, tid, b64=None, **kwargs):
        td = self.__rdict[tid]
        g = ConjunctiveGraph()

        fountain = self.fountain
        ns = get_ns(fountain)

        prefixes = fountain.prefixes
        for prefix, uri in prefixes.items():
            g.bind(prefix, uri)

        ttl = 100000
        try:
            if b64 is not None:
                b64 = b64.replace('%3D', '=')
                resource_args = eval(base64.b64decode(b64))
            else:
                resource_args = kwargs
            r_uri = self.url_for(tid=tid, b64=b64)
            if kwargs:
                r_uri = '{}?{}'.format(
                    r_uri,
                    '&'.join(['{}={}'.format(k, kwargs[k]) for k in kwargs]))
            r_uri = URIRef(r_uri)

            bnode_map = {}

            for s, p, o in td.resource.graph:
                if o in self.__ndict:
                    o = URIRef(
                        self.url_for(tid=self.__ndict[o],
                                     b64=b64,
                                     **resource_args))
                elif isinstance(o, BNode):
                    if o not in bnode_map:
                        bnode_map[o] = BNode()
                    o = bnode_map[o]
                elif isinstance(o, Literal):
                    if str(o) in resource_args:
                        o = Literal(resource_args[str(o)], datatype=o.datatype)

                if s == td.resource.node:
                    s = r_uri

                if isinstance(s, BNode):
                    if s not in self.__ndict:
                        if s not in bnode_map:
                            bnode_map[s] = BNode()

                        for t in td.resource.graph.objects(s, RDF.type):
                            for supt in fountain.get_type(t.n3(ns))['super']:
                                g.add((bnode_map[s], RDF.type,
                                       extend_uri(supt, prefixes)))

                        s = bnode_map[s]
                        g.add((s, p, o))
                else:
                    g.add((s, p, o))

            resource_props = set([])
            for t in td.resource.types:
                if isinstance(t, URIRef):
                    t_n3 = t.n3(ns)
                else:
                    t_n3 = t
                type_dict = fountain.get_type(t_n3)
                resource_props.update(type_dict['properties'])
                for st in type_dict['super']:
                    g.add((r_uri, RDF.type, extend_uri(st, prefixes)))

            if td.rdf_sources:
                for e in td.rdf_sources:
                    uri = URIRef(e.endpoint.href)
                    g.add((r_uri, OWL.sameAs, uri))
                    same_as_g = Graph()
                    same_as_g.load(source=uri)
                    for s, p, o in same_as_g:
                        if p.n3(ns) in resource_props:
                            if s == uri:
                                s = r_uri
                            elif not isinstance(s, BNode):
                                continue
                            g.add((s, p, o))

            if td.base:
                invoked_endpoints = {}
                endpoints = list(self.compose_endpoints(td))
                endpoints_order = {
                    am.endpoint: am.order
                    for am in td.access_mappings
                }
                for e in sorted(endpoints, key=lambda x: endpoints_order[x]):
                    if str(e.href) not in invoked_endpoints:
                        invoked_endpoints[str(e.href)] = e.invoke(
                            graph=g, subject=r_uri, **resource_args)
                    response = invoked_endpoints[str(e.href)]
                    if response.status_code == 200:
                        data = response.json()
                        e_mappings = td.endpoint_mappings(e)
                        mapped_data = apply_mappings(data, e_mappings, ns)
                        ld = self.enrich(r_uri,
                                         mapped_data,
                                         td.resource.types,
                                         fountain,
                                         ns=ns,
                                         vars=td.vars,
                                         **resource_args)
                        ld_triples(ld, g)
                        ttl = min(ttl, extract_ttl(response.headers) or ttl)

        except Exception as e:
            traceback.print_exc()
            log.warn(e.message)
        return g, {'Cache-Control': 'max-age={}'.format(ttl)}