def load(self, uri, format=None, **kwargs): kwargs = {k: kwargs[k].pop() for k in kwargs} result = self.__wrapper.load(uri, **kwargs) if result is None: result = _load_remote(uri, format=format) base_graph = result[0] base_ttl = extract_ttl(result[1]) types = base_graph.objects(URIRef(uri), RDF.type) enrichments = reduce( lambda x, y: y.union(x), map(lambda t: set(self.ecosystem.enrichments_by_type(t)), types), set()) result = self.enrich_resource(uri, enrichments, base_graph, base_ttl) return result
def load(self, endpoint): href = str(endpoint.href) if href not in self.endpoints_data: try: response = endpoint.invoke(graph=self.graph, subject=self.graph.identifier, **self.r_args) if response.status_code == 200: result = { 'content': response.json(), 'ttl': extract_ttl(response.headers) or self.max_def_ttl } elif response.status_code < 500 and response.status_code != 404: result = {'content': {}, 'ttl': 10} else: result = {'content': {}, 'ttl': self.max_def_ttl} self.endpoints_data[href] = result except AttributeError as e: log.debug('Missing attributes:' + e.message) raise return self.endpoints_data[href]['content']
def create(self, conjunctive=False, gid=None, loader=None, format=None): try: if conjunctive: uuid = shortuuid.uuid() g = get_triple_store(self.__persist_mode, base=self.__base_path, path=uuid) return g else: p = self._r.pipeline(transaction=True) p.multi() g = Graph(identifier=gid) lock = self.uri_lock(gid) with lock: uuid = self._r.hget('{}:gids'.format(self.__cache_key), gid) if not uuid: uuid = shortuuid.uuid() p.hset('{}:gids'.format(self.__cache_key), gid, uuid) gid_key = '{}:{}'.format(self.__cache_key, uuid) ttl_ts = self._r.hget(gid_key, 'ttl') if ttl_ts is not None: ttl_dt = dt.utcfromtimestamp(int(ttl_ts)) now = dt.utcnow() if ttl_dt > now: try: g = self.__recall(gid) except KeyError: source_z = self._r.hget(gid_key, 'data') source = zlib.decompress(source_z) g.parse(StringIO(source), format=format) self.__memoize(gid, g) ttl = math.ceil( (ttl_dt - dt.utcnow()).total_seconds()) return g, math.ceil(ttl) log.debug('Caching {}'.format(gid)) response = loader(gid, format) if response is None and loader != http_get: response = http_get(gid, format) if isinstance(response, bool): return response ttl = self.__min_cache_time source, headers = response if not isinstance(source, Graph) and not isinstance( source, ConjunctiveGraph): parse_rdf(g, source, format, headers) data = g.serialize(format='turtle') else: data = source.serialize(format='turtle') for prefix, ns in source.namespaces(): g.bind(prefix, ns) g.__iadd__(source) self.__memoize(gid, g) if not self.__force_cache_time: ttl = extract_ttl(headers) or ttl p.hset(gid_key, 'data', zlib.compress(data)) ttl_ts = calendar.timegm( (dt.utcnow() + delta(seconds=ttl)).timetuple()) p.hset(gid_key, 'ttl', ttl_ts) p.expire(gid_key, ttl) p.execute() return g, int(ttl) except ConnectionError as e: raise EnvironmentError(e.message)
def describe_resource(self, tid, b64=None, **kwargs): td = self.__rdict[tid] g = ConjunctiveGraph() fountain = self.fountain ns = get_ns(fountain) prefixes = fountain.prefixes for prefix, uri in prefixes.items(): g.bind(prefix, uri) ttl = 100000 try: if b64 is not None: b64 = b64.replace('%3D', '=') resource_args = eval(base64.b64decode(b64)) else: resource_args = kwargs r_uri = self.url_for(tid=tid, b64=b64) if kwargs: r_uri = '{}?{}'.format( r_uri, '&'.join(['{}={}'.format(k, kwargs[k]) for k in kwargs])) r_uri = URIRef(r_uri) bnode_map = {} for s, p, o in td.resource.graph: if o in self.__ndict: o = URIRef( self.url_for(tid=self.__ndict[o], b64=b64, **resource_args)) elif isinstance(o, BNode): if o not in bnode_map: bnode_map[o] = BNode() o = bnode_map[o] elif isinstance(o, Literal): if str(o) in resource_args: o = Literal(resource_args[str(o)], datatype=o.datatype) if s == td.resource.node: s = r_uri if isinstance(s, BNode): if s not in self.__ndict: if s not in bnode_map: bnode_map[s] = BNode() for t in td.resource.graph.objects(s, RDF.type): for supt in fountain.get_type(t.n3(ns))['super']: g.add((bnode_map[s], RDF.type, extend_uri(supt, prefixes))) s = bnode_map[s] g.add((s, p, o)) else: g.add((s, p, o)) resource_props = set([]) for t in td.resource.types: if isinstance(t, URIRef): t_n3 = t.n3(ns) else: t_n3 = t type_dict = fountain.get_type(t_n3) resource_props.update(type_dict['properties']) for st in type_dict['super']: g.add((r_uri, RDF.type, extend_uri(st, prefixes))) if td.rdf_sources: for e in td.rdf_sources: uri = URIRef(e.endpoint.href) g.add((r_uri, OWL.sameAs, uri)) same_as_g = Graph() same_as_g.load(source=uri) for s, p, o in same_as_g: if p.n3(ns) in resource_props: if s == uri: s = r_uri elif not isinstance(s, BNode): continue g.add((s, p, o)) if td.base: invoked_endpoints = {} endpoints = list(self.compose_endpoints(td)) endpoints_order = { am.endpoint: am.order for am in td.access_mappings } for e in sorted(endpoints, key=lambda x: endpoints_order[x]): if str(e.href) not in invoked_endpoints: invoked_endpoints[str(e.href)] = e.invoke( graph=g, subject=r_uri, **resource_args) response = invoked_endpoints[str(e.href)] if response.status_code == 200: data = response.json() e_mappings = td.endpoint_mappings(e) mapped_data = apply_mappings(data, e_mappings, ns) ld = self.enrich(r_uri, mapped_data, td.resource.types, fountain, ns=ns, vars=td.vars, **resource_args) ld_triples(ld, g) ttl = min(ttl, extract_ttl(response.headers) or ttl) except Exception as e: traceback.print_exc() log.warn(e.message) return g, {'Cache-Control': 'max-age={}'.format(ttl)}