def remove(self): with r.pipeline(transaction=True) as p: p.multi() action_id = r.hget(self._request_key, 'id') p.zrem('requests', action_id) r_keys = r.keys('{}*'.format(self._request_key)) for key in r_keys: p.delete(key) self._remove(p) p.execute() log.info('Request {} was removed'.format(self._request_id))
def delivery(self, value): with r.pipeline(transaction=True) as p: p.multi() if value == 'ready': p.sadd('deliveries:ready', self._request_id) elif value == 'sent': p.sadd('deliveries:sent', self._request_id) if value != 'ready': p.srem('deliveries:ready', self._request_id) p.hset('requests:{}'.format(self._request_id), 'delivery', value) p.execute() log.info('Request {} delivery state is now "{}"'.format(self._request_id, value))
def __load_fragment_requests(fid): requests_ = r.smembers("fragments:{}:requests".format(fid)) sinks_ = {} for rid in requests_: try: sinks_[rid] = build_response(rid).sink except Exception, e: traceback.print_exc() log.warning(e.message) with r.pipeline(transaction=True) as p: p.multi() p.srem("fragments:{}:requests".format(fid), rid) p.execute()
def __remove_fragment(fid): log.debug("Waiting to remove fragment {}...".format(fid)) lock_key = "fragments:{}:lock".format(fid) lock = r.lock(lock_key, lock_class=Lock) lock.acquire() with r.pipeline(transaction=True) as p: requests, r_sinks = __load_fragment_requests(fid) __notify_completion(fid, r_sinks) fragment_keys = r.keys("fragments:{}*".format(fid)) map(lambda k: p.delete(k), fragment_keys) p.srem("fragments", fid) p.execute() log.info("Fragment {} has been removed".format(fid))
def __replace_fragment(fid): """ Recreate fragment <fid> cached data and all its data-contexts from the corresponding stream (Redis) :param fid: :return: """ tps = cache.get_context(fid).subjects(RDF.type, AGORA.TriplePattern) cache.remove_context(cache.get_context("/" + fid)) for tp in tps: cache.remove_context(cache.get_context(str((fid, __triple_pattern(cache, tp))))) fragment_triples = load_stream_triples(fid, calendar.timegm(dt.now().timetuple())) for c, s, p, o in fragment_triples: cache.get_context(str((fid, c))).add((s, p, o)) cache.get_context("/" + fid).add((s, p, o)) with r.pipeline() as pipe: pipe.delete("fragments:{}:stream".format(fid)) pipe.execute()
def stream(self, value): with r.pipeline(transaction=True) as p: p.multi() p.hset('requests:{}'.format(self._request_id), '__stream', value) p.execute() log.info('Request {} stream state is now "{}"'.format(self._request_id, value))
def __pull_fragment(fid): tps = r.smembers("fragments:{}:gp".format(fid)) requests, r_sinks = __load_fragment_requests(fid) log.info( """Starting collection of fragment {}: - GP: {} - Supporting: ({}) {}""".format( fid, list(tps), len(requests), list(requests) ) ) start_time = datetime.now() try: fgm_gen, _, graph = agora_client.get_fragment_generator( "{ %s }" % " . ".join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS ) except Exception: log.error("Agora is not available") return # There is no search plan to execute if not list(graph.subjects(RDF.type, AGORA.SearchTree)): log.info("There is no search plan for fragment {}. Removing...".format(fid)) # TODO: Send additional headers notifying the reason to end __notify_completion(fid, r_sinks) __remove_fragment(fid) return triple_patterns = {tpn: __triple_pattern(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)} fragment_contexts = {tpn: (fid, triple_patterns[tpn]) for tpn in triple_patterns} __bind_prefixes(graph) lock_key = "fragments:{}:lock".format(fid) lock = r.lock(lock_key, lock_class=Lock) lock.acquire() lock_consume_key = "fragments:{}:lock:consume".format(fid) c_lock = r.lock(lock_consume_key, lock_class=Lock) c_lock.acquire() # Update fragment contexts with r.pipeline(transaction=True) as p: p.multi() p.set("fragments:{}:pulling".format(fid), True) p.delete("fragments:{}:contexts".format(fid)) for tpn in fragment_contexts.keys(): p.sadd("fragments:{}:contexts".format(fid), fragment_contexts[tpn]) p.execute() lock.release() c_lock.release() n_triples = 0 fragment_weight = 0 fragment_delta = 0 try: for (c, s, p, o) in fgm_gen: pre_ts = datetime.now() triple_weight = len(u"{}{}{}".format(s, p, o)) fragment_weight += triple_weight fragment_delta += triple_weight lock.acquire() if add_stream_triple(fid, triple_patterns[c], (s, p, o)): __consume_quad(fid, (triple_patterns[c], s, p, o), graph, sinks=r_sinks) lock.release() if fragment_delta > 1000: fragment_delta = 0 log.info("Pulling fragment {} [{} kB]".format(fid, fragment_weight / 1000.0)) if r.scard("fragments:{}:requests".format(fid)) != len(requests): requests, r_sinks = __load_fragment_requests(fid) n_triples += 1 post_ts = datetime.now() elapsed = (post_ts - pre_ts).total_seconds() excess = (1.0 / COLLECT_THROTTLING) - elapsed if excess > 0: sleep(excess) except Exception, e: traceback.print_exc()
sleep(excess) except Exception, e: traceback.print_exc() elapsed = (datetime.now() - start_time).total_seconds() log.info( "{} triples retrieved for fragment {} in {} s [{} kB]".format(n_triples, fid, elapsed, fragment_weight / 1000.0) ) lock.acquire() c_lock.acquire() __replace_fragment(fid) log.info("Fragment {} data has been replaced with the recently collected".format(fid)) __cache_plan_context(fid, graph) log.info("BGP context of fragment {} has been cached".format(fid)) with r.pipeline(transaction=True) as p: p.multi() sync_key = "fragments:{}:sync".format(fid) demand_key = "fragments:{}:on_demand".format(fid) # Fragment is now synced p.set(sync_key, True) # If the fragment collection time has not exceeded the threshold, switch to on-demand mode if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4: p.set(demand_key, True) log.info("Fragment {} has been switched to on-demand mode".format(fid)) else: p.delete(demand_key) min_durability = int(max(MIN_SYNC, elapsed)) durability = random.randint(min_durability, min_durability * 2) p.expire(sync_key, durability) log.info("Fragment {} is considered synced for {} s".format(fid, durability))
def __init__(self): self._pipe = r.pipeline(transaction=True) self._request_id = None self._request_key = None self._dict_fields = {}
c, s, p, o = eval(x) return c, __term(s), __term(p), __term(o) for x in r.zrangebyscore('fragments:{}:stream'.format(fid), '-inf', '{}'.format(float(until))): yield __triplify(x) def add_stream_triple(fid, tp, (s, p, o), timestamp=None): if timestamp is None: timestamp = calendar.timegm(dt.utcnow().timetuple()) quad = (tp, s.n3(), p.n3(), o.n3()) stream_key = 'fragments:{}:stream'.format(fid) not_found = not bool(r.zscore(stream_key, quad)) if not_found: with r.pipeline() as pipe: pipe.zadd(stream_key, timestamp, quad) pipe.execute() return not_found class GraphProvider(object): def __init__(self): self.__graph_dict = {} @staticmethod def __clean(name): shutil.rmtree('store/query/{}'.format(name)) def create(self, conjunctive=False): uuid = shortuuid.uuid()
def set_link(self, link): with r.pipeline(transaction=True) as p: p.multi() p.hset('{}:links:status'.format(self._enrichment_key), str(link), True) p.execute()