def get_fragment(fid): if not r.sismember('fragments', fid): raise NotFound('The fragment {} does not exist'.format(fid)) pulling = r.get('fragments:{}:pulling'.format(fid)) if pulling is None: pulling = 'False' fr_dict = {'id': fid, 'pattern': "{ %s }" % ' . '.join(r.smembers('fragments:{}:gp'.format(fid))), 'updated': r.get('fragments:{}:updated'.format(fid)), 'pulling': eval(pulling), 'requests': list(r.smembers('fragments:{}:requests'.format(fid)))} if fr_dict['pulling']: fr_dict['triples'] = r.zcard('fragments:{}:stream'.format(fid)) return jsonify(fr_dict)
def load(self): dict_fields = r.hgetall(self._enrichment_key) self.target = URIRef(dict_fields.get('target', None)) self.fragment_id = dict_fields.get('fragment_id', None) self.links = map(lambda (link, v): (URIRef(link), v), [eval(pair_str) for pair_str in r.smembers('{}:links'.format( self._enrichment_key))])
def __check_gp(self): gp_keys = r.keys('fragments:*:gp') for gpk in gp_keys: stored_gp = GraphPattern(r.smembers(gpk)) mapping = stored_gp.mapping(self._graph_pattern) if mapping: return gpk.split(':')[1], mapping return None
def __load_fragment_requests(fid): requests_ = r.smembers("fragments:{}:requests".format(fid)) sinks_ = {} for rid in requests_: try: sinks_[rid] = build_response(rid).sink except Exception, e: traceback.print_exc() log.warning(e.message) with r.pipeline(transaction=True) as p: p.multi() p.srem("fragments:{}:requests".format(fid), rid) p.execute()
def __deliver_responses(): import time registered_deliveries = r.scard('deliveries') deliveries_ready = r.scard('deliveries:ready') log.info("""Delivery daemon started: - Deliveries: {} - Ready: {}""".format(registered_deliveries, deliveries_ready)) log.info('Delivery daemon started') futures = {} while True: ready = r.smembers('deliveries:ready') for rid in ready: if rid not in futures: log.info('Response delivery of request {} is ready. Preparing...'.format(rid)) futures[rid] = thp.submit(__deliver_response, rid) for obsolete_rid in set.difference(set(futures.keys()), ready): if obsolete_rid in futures and futures[obsolete_rid].done(): del futures[obsolete_rid] sent = r.smembers('deliveries:sent') for rid in sent: r.srem('deliveries:ready', rid) r.srem('deliveries', rid) try: response = build_response(rid) response.sink.remove() log.info('Request {} was sent and cleared'.format(rid)) except AttributeError: log.warning('Request number {} was deleted by other means'.format(rid)) pass r.delete('deliveries:sent') time.sleep(1)
def _load(self): super(FragmentSink, self)._load() self._fragment_id = self._dict_fields['fragment_id'] self._graph_pattern = GraphPattern(r.smembers('fragments:{}:gp'.format(self._fragment_id))) mapping = self._dict_fields.get('mapping', None) if mapping is not None: mapping = eval(mapping) self._dict_fields['mapping'] = mapping preferred_labels = self._dict_fields.get('preferred_labels', None) if preferred_labels is not None: preferred_labels = eval(preferred_labels) self._dict_fields['preferred_labels'] = preferred_labels try: del self._dict_fields['fragment_id'] except KeyError: pass
def __collect_fragments(): registered_fragments = r.scard("fragments") synced_fragments = len(r.keys("fragments:*:sync")) log.info( """Collector daemon started: - Fragments: {} - Synced: {}""".format( registered_fragments, synced_fragments ) ) futures = {} while True: for fid in filter( lambda x: r.get("fragments:{}:sync".format(x)) is None and r.get("fragments:{}:pulling".format(x)) is None, r.smembers("fragments"), ): if fid in futures: if futures[fid].done(): del futures[fid] if fid not in futures: futures[fid] = thp.submit(__pull_fragment, fid) time.sleep(1)
def get_fragments(): fragments = list(r.smembers('fragments')) return jsonify({"fragments": fragments})
def __pull_fragment(fid): tps = r.smembers("fragments:{}:gp".format(fid)) requests, r_sinks = __load_fragment_requests(fid) log.info( """Starting collection of fragment {}: - GP: {} - Supporting: ({}) {}""".format( fid, list(tps), len(requests), list(requests) ) ) start_time = datetime.now() try: fgm_gen, _, graph = agora_client.get_fragment_generator( "{ %s }" % " . ".join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS ) except Exception: log.error("Agora is not available") return # There is no search plan to execute if not list(graph.subjects(RDF.type, AGORA.SearchTree)): log.info("There is no search plan for fragment {}. Removing...".format(fid)) # TODO: Send additional headers notifying the reason to end __notify_completion(fid, r_sinks) __remove_fragment(fid) return triple_patterns = {tpn: __triple_pattern(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)} fragment_contexts = {tpn: (fid, triple_patterns[tpn]) for tpn in triple_patterns} __bind_prefixes(graph) lock_key = "fragments:{}:lock".format(fid) lock = r.lock(lock_key, lock_class=Lock) lock.acquire() lock_consume_key = "fragments:{}:lock:consume".format(fid) c_lock = r.lock(lock_consume_key, lock_class=Lock) c_lock.acquire() # Update fragment contexts with r.pipeline(transaction=True) as p: p.multi() p.set("fragments:{}:pulling".format(fid), True) p.delete("fragments:{}:contexts".format(fid)) for tpn in fragment_contexts.keys(): p.sadd("fragments:{}:contexts".format(fid), fragment_contexts[tpn]) p.execute() lock.release() c_lock.release() n_triples = 0 fragment_weight = 0 fragment_delta = 0 try: for (c, s, p, o) in fgm_gen: pre_ts = datetime.now() triple_weight = len(u"{}{}{}".format(s, p, o)) fragment_weight += triple_weight fragment_delta += triple_weight lock.acquire() if add_stream_triple(fid, triple_patterns[c], (s, p, o)): __consume_quad(fid, (triple_patterns[c], s, p, o), graph, sinks=r_sinks) lock.release() if fragment_delta > 1000: fragment_delta = 0 log.info("Pulling fragment {} [{} kB]".format(fid, fragment_weight / 1000.0)) if r.scard("fragments:{}:requests".format(fid)) != len(requests): requests, r_sinks = __load_fragment_requests(fid) n_triples += 1 post_ts = datetime.now() elapsed = (post_ts - pre_ts).total_seconds() excess = (1.0 / COLLECT_THROTTLING) - elapsed if excess > 0: sleep(excess) except Exception, e: traceback.print_exc()
def fragment_contexts(self): return r.smembers('fragments:{}:contexts'.format(self._fragment_id))
def get_fragment_enrichments(fid): return [EnrichmentData(eid) for eid in r.smembers('fragments:{}:enrichments'.format(fid))]