def __update_fragment_cache(fid, gp): """ Recreate fragment <fid> cached data and all its data-contexts from the corresponding stream (Redis) :param fid: :return: """ fragments_cache.remove_context(fragments_cache.get_context('/' + fid)) gp_graph = graph_from_gp(gp) roots = filter(lambda x: gp_graph.in_degree(x) == 0, gp_graph.nodes()) fragment_triples = load_stream_triples(fid, calendar.timegm(dt.utcnow().timetuple())) visited_contexts = set([]) for c, s, p, o in fragment_triples: if c not in visited_contexts: fragments_cache.remove_context(fragments_cache.get_context(str((fid, c)))) visited_contexts.add(c) fragments_cache.get_context(str((fid, c))).add((s, p, o)) fragments_cache.get_context('/' + fid).add((s, p, o)) if c[0] in roots: fragments_cache.get_context('/' + fid).add((s, RDF.type, STOA.Root)) visited_contexts.clear() with r.pipeline() as pipe: pipe.delete('{}:{}:stream'.format(fragments_key, fid)) pipe.execute()
def create(self, conjunctive=False, gid=None, loader=None, format=None): self.__lock.acquire() uuid_lock = None cached = False p = r.pipeline(transaction=True) p.multi() try: uuid = shortuuid.uuid() if conjunctive: if 'persist' in app.config['STORE']: g = ConjunctiveGraph('Sleepycat') g.open('store/resources/{}'.format(uuid), create=True) else: g = ConjunctiveGraph() g.store.graph_aware = False self.__graph_dict[g] = uuid self.__uuid_dict[uuid] = g return g else: g = resources_cache.get_context(uuid) try: if gid is not None: st_uuid = r.hget(self.__gids_key, gid) if st_uuid is not None: cached = True uuid = st_uuid uuid_lock = self.uuid_lock(uuid) uuid_lock.acquire() g = self.__uuid_dict[uuid] uuid_lock.release() else: post_ts = dt.now() elapsed = (post_ts - self.__last_creation_ts).total_seconds() throttling = (1.0 / GRAPH_THROTTLING) - elapsed if throttling > 0: sleep(throttling) temp_key = '{}:cache:{}'.format(AGENT_ID, uuid) counter_key = '{}:cnt'.format(temp_key) ttl = MIN_CACHE_TIME + int(10 * random()) ttl_ts = calendar.timegm((dt.now() + datetime.timedelta(ttl)).timetuple()) if st_uuid is None: p.delete(counter_key) p.sadd(self.__cache_key, uuid) p.hset(self.__gids_key, uuid, gid) p.hset(self.__gids_key, gid, uuid) self.__last_creation_ts = dt.now() p.incr(counter_key) p.set(temp_key, ttl_ts) p.expire(temp_key, ttl) uuid_lock = self.uuid_lock(uuid) uuid_lock.acquire() except Exception, e: log.error(e.message) traceback.print_exc() self.__graph_dict[g] = uuid self.__uuid_dict[uuid] = g
def __purge(self): while True: self.__lock.acquire() try: obsolete = filter(lambda x: not r.exists('{}:cache:{}'.format(AGENT_ID, x)), r.smembers(self.__cache_key)) if obsolete: with r.pipeline(transaction=True) as p: p.multi() log.info('Removing {} resouces from cache...'.format(len(obsolete))) for uuid in obsolete: uuid_lock = self.uuid_lock(uuid) uuid_lock.acquire() try: gid = r.hget(self.__gids_key, uuid) counter_key = '{}:cache:{}:cnt'.format(AGENT_ID, uuid) usage_counter = r.get(counter_key) if usage_counter is None or int(usage_counter) <= 0: try: resources_cache.remove_context(resources_cache.get_context(uuid)) p.srem(self.__cache_key, uuid) p.hdel(self.__gids_key, uuid) p.hdel(self.__gids_key, gid) p.delete(counter_key) g = self.__uuid_dict[uuid] del self.__uuid_dict[uuid] del self.__graph_dict[g] except Exception, e: traceback.print_exc() log.error('Purging resource {} with uuid {}'.format(gid, uuid)) p.execute() finally: uuid_lock.release()
def delivery(self, value): """ Changes the delivery state of the request :param value: 'ready', 'sent', 'accepted', ... """ with r.pipeline(transaction=True) as p: p.multi() if value == 'ready': p.sadd(self.__ready_key, self._request_id) elif value == 'sent': p.sadd(self.__sent_key, self._request_id) if value != 'ready': p.srem(self.__ready_key, self._request_id) p.hset('{}'.format(self._request_key), 'delivery', value) p.execute() log.info('Request {} delivery state is now "{}"'.format(self._request_id, value))
def __load_fragment_requests(fid): """ Load all requests and their sinks that are related to a given fragment id :param fid: Fragment id :return: A dictionary of sinks of all fragment requests """ sinks_ = {} fragment_requests_key = '{}:{}:requests'.format(fragments_key, fid) for rid in r.smembers(fragment_requests_key): try: sinks_[rid] = build_response(rid).sink except Exception, e: log.warning(e.message) with r.pipeline(transaction=True) as p: p.multi() p.srem(fragment_requests_key, rid) p.execute()
def _update_result_set(fid, gp): try: result_gen = _query(fid, gp) removed = db[fid].delete_many({}).deleted_count log.info('{} rows removed from fragment {} result set'.format(removed, fid)) table = db[fid] rows = set(result_gen) if rows: table.insert_many([{label: row[row.labels[label]] for label in row.labels} for row in rows]) log.info('{} rows inserted into fragment {} result set'.format(len(rows), fid)) with r.pipeline(transaction=True) as p: p.multi() p.set('{}:{}:rs'.format(fragments_key, fid), True) p.execute() except Exception, e: traceback.print_exc() log.error(e.message)
def __remove_fragment(fid): """ Completely remove a fragment from the system after notifying its known consumers :param fid: Fragment identifier """ log.debug('Waiting to remove fragment {}...'.format(fid)) lock = fragment_lock(fid) lock.acquire() r_sinks = __load_fragment_requests(fid) __notify_completion(fid, r_sinks) fragment_keys = r.keys('{}:{}*'.format(fragments_key, fid)) with r.pipeline(transaction=True) as p: p.multi() map(lambda k: p.delete(k), fragment_keys) p.srem(fragments_key, fid) p.execute() # Fragment lock key was just implicitly removed, so it's not necessary to release the lock # lock.release() log.info('Fragment {} has been removed'.format(fid))
def __update_fragment_cache(fid, gp): """ Recreate fragment <fid> cached data and all its data-contexts from the corresponding stream (Redis) :param fid: :return: """ plan_tps = fragments_cache.get_context(fid).subjects(RDF.type, AGORA.TriplePattern) fragments_cache.remove_context(fragments_cache.get_context('/' + fid)) for tp in plan_tps: fragments_cache.remove_context( fragments_cache.get_context(str((fid, __extract_tp_from_plan(fragments_cache, tp))))) gp_graph = graph_from_gp(gp) roots = filter(lambda x: gp_graph.in_degree(x) == 0, gp_graph.nodes()) fragment_triples = load_stream_triples(fid, calendar.timegm(dt.now().timetuple())) for c, s, p, o in fragment_triples: fragments_cache.get_context(str((fid, c))).add((s, p, o)) fragments_cache.get_context('/' + fid).add((s, p, o)) if c[0] in roots: fragments_cache.get_context('/' + fid).add((s, RDF.type, STOA.Root)) with r.pipeline() as pipe: pipe.delete('{}:{}:stream'.format(fragments_key, fid)) pipe.execute()
def stream(self, value): with r.pipeline(transaction=True) as p: p.multi() p.hset('{}'.format(self._request_key), '__stream', value) p.execute() log.info('Request {} stream state is now "{}"'.format(self._request_id, value))
def set_link(self, link): with r.pipeline(transaction=True) as p: p.multi() p.hset('{}:links:status'.format(self._enrichment_key), str(link), True) p.execute()
def __pull_fragment(fid): """ Pull and replace (if needed) a given fragment :param fid: Fragment id """ fragment_key = '{}:{}'.format(fragments_key, fid) # Load fragment graph pattern tps = r.smembers('{}:gp'.format(fragment_key)) # Load fragment requests (including their sinks) r_sinks = __load_fragment_requests(fid) log.info("""Starting collection of fragment {}: - GP: {} - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks))) # Prepare the corresponding fragment generator and fetch the search plan start_time = datetime.now() try: fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS) except Exception: log.error('Agora is not available') return # In case there is not SearchTree in the plan: notify, remove and abort collection if not list(graph.subjects(RDF.type, AGORA.SearchTree)): log.info('There is no search plan for fragment {}. Removing...'.format(fid)) # TODO: Send additional headers notifying the reason to end __notify_completion(fid, r_sinks) __remove_fragment(fid) return # Update cache graph prefixes __bind_prefixes(graph) # Extract triple patterns' dictionary from the search plan context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)} frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp} lock = fragment_lock(fid) lock.acquire() # Update fragment contexts with r.pipeline(transaction=True) as p: p.multi() p.set('{}:pulling'.format(fragment_key), True) contexts_key = '{}:contexts'.format(fragment_key) p.delete(contexts_key) for tpn in context_tp.keys(): p.sadd(contexts_key, frag_contexts[tpn]) p.execute() lock.release() # Init fragment collection counters n_triples = 0 fragment_weight = 0 fragment_delta = 0 log.info('Collecting fragment {}...'.format(fid)) try: # Iterate all fragment triples and their contexts for (c, s, p, o) in fgm_gen: pre_ts = datetime.now() # Update weights and counters triple_weight = len(u'{}{}{}'.format(s, p, o)) fragment_weight += triple_weight fragment_delta += triple_weight # Store the triple if it was not obtained before and notify related requests try: lock.acquire() new_triple = add_stream_triple(fid, context_tp[c], (s, p, o)) lock.release() if new_triple: __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks) n_triples += 1 except Exception, e: log.warning(e.message) traceback.print_exc() if fragment_delta > 10000: fragment_delta = 0 log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0)) if n_triples % 100 == 0: # Update fragment requests if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks): r_sinks = __load_fragment_requests(fid) post_ts = datetime.now() elapsed = (post_ts - pre_ts).total_seconds() throttling = THROTTLING_TIME - elapsed if throttling > 0: sleep(throttling) except Exception, e: log.warning(e.message) traceback.print_exc()
log.info( '{} triples retrieved for fragment {} in {} s [{} kB]'.format(n_triples, fid, elapsed, fragment_weight / 1000.0)) # Update fragment cache and its contexts lock.acquire() try: __update_fragment_cache(fid, tps) log.info('Fragment {} data has been replaced with the recently collected'.format(fid)) __cache_plan_context(fid, graph) log.info('BGP context of fragment {} has been cached'.format(fid)) log.info('Updating result set for fragment {}...'.format(fid)) # __update_result_set(fid, tps) # Calculate sync times and update fragment flags with r.pipeline(transaction=True) as p: p.multi() sync_key = '{}:sync'.format(fragment_key) demand_key = '{}:on_demand'.format(fragment_key) # Fragment is now synced p.set(sync_key, True) # If the fragment collection time has not exceeded the threshold, switch to on-demand mode if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4: p.set(demand_key, True) log.info('Fragment {} has been switched to on-demand mode'.format(fid)) else: p.delete(demand_key) min_durability = int(max(MIN_SYNC, elapsed)) durability = random.randint(min_durability, min_durability * 2) p.expire(sync_key, durability) log.info('Fragment {} is considered synced for {} s'.format(fid, durability))
def __pull_fragment(fid): """ Pull and replace (if needed) a given fragment :param fid: Fragment id """ fragment_key = '{}:{}'.format(fragments_key, fid) on_events = r.get('{}:events'.format(fragment_key)) if on_events == 'True' and not change_in_fragment_resource(fid, int(r.get('{}:ud'.format(fragment_key)))): with r.pipeline(transaction=True) as p: p.multi() sync_key = '{}:sync'.format(fragment_key) p.set(sync_key, True) durability = int(r.get('{}:ud'.format(fragment_key))) p.expire(sync_key, durability) p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple())) p.delete('{}:pulling'.format(fragment_key)) p.execute() return # Load fragment graph pattern tps = r.smembers('{}:gp'.format(fragment_key)) # Load fragment requests (including their sinks) r_sinks = __load_fragment_requests(fid) log.info("""Starting collection of fragment {}: - GP: {} - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks))) init_fragment_resources(fid) # Prepare the corresponding fragment generator and fetch the search plan start_time = datetime.utcnow() try: fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS*100) except Exception: traceback.print_exc() log.error('Agora is not available') return # In case there is not SearchTree in the plan: notify, remove and abort collection if not list(graph.subjects(RDF.type, AGORA.SearchTree)): log.info('There is no search plan for fragment {}. Removing...'.format(fid)) # TODO: Send additional headers notifying the reason to end __notify_completion(fid, r_sinks) __remove_fragment(fid) return # Update cache graph prefixes __bind_prefixes(graph) # Extract triple patterns' dictionary from the search plan context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)} frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp} lock = fragment_lock(fid) lock.acquire() # Update fragment contexts with r.pipeline(transaction=True) as p: p.multi() p.set('{}:pulling'.format(fragment_key), True) contexts_key = '{}:contexts'.format(fragment_key) p.delete(contexts_key) clear_fragment_stream(fid) for tpn in context_tp.keys(): p.sadd(contexts_key, frag_contexts[tpn]) p.execute() lock.release() # Init fragment collection counters n_triples = 0 fragment_weight = 0 fragment_delta = 0 log.info('Collecting fragment {}...'.format(fid)) try: # Iterate all fragment triples and their contexts pre_ts = datetime.utcnow() for (c, s, p, o) in fgm_gen: # Update weights and counters triple_weight = len(u'{}{}{}'.format(s, p, o)) fragment_weight += triple_weight fragment_delta += triple_weight # Store the triple if it was not obtained before and notify related requests try: lock.acquire() new_triple = add_stream_triple(fid, context_tp[c], (s, p, o)) lock.release() if new_triple: if isinstance(s, URIRef): if s not in resource_in_fragment: resource_in_fragment[s] = set([]) resource_in_fragment[s].add(fid) fragment_resources[fid].add(s) __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks) n_triples += 1 except Exception as e: log.warning(e.message) traceback.print_exc() if fragment_delta > 10000: fragment_delta = 0 log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0)) if n_triples % 100 == 0: # Update fragment requests if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks): r_sinks = __load_fragment_requests(fid) post_ts = datetime.utcnow() elapsed = (post_ts - pre_ts).total_seconds() throttling = THROTTLING_TIME - elapsed if throttling > 0: sleep(throttling) pre_ts = datetime.utcnow() except Exception as e: log.warning(e.message) traceback.print_exc() elapsed = (datetime.utcnow() - start_time).total_seconds() log.info( '{} triples retrieved for fragment {} in {} s [{} kB]'.format(n_triples, fid, elapsed, fragment_weight / 1000.0)) # Update fragment cache and its contexts lock.acquire() try: __update_fragment_cache(fid, tps) log.info('Fragment {} data has been replaced with the recently collected'.format(fid)) __cache_plan_context(fid, graph) log.info('BGP context of fragment {} has been cached'.format(fid)) log.info('Updating result set for fragment {}...'.format(fid)) # Calculate sync times and update fragment flags with r.pipeline(transaction=True) as p: p.multi() sync_key = '{}:sync'.format(fragment_key) demand_key = '{}:on_demand'.format(fragment_key) # Fragment is now synced p.set(sync_key, True) # If the fragment collection time has not exceeded the threshold, switch to on-demand mode # if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4: # p.set(demand_key, True) # log.info('Fragment {} has been switched to on-demand mode'.format(fid)) # else: p.delete(demand_key) updated_delay = int(r.get('{}:ud'.format(fragment_key))) last_requests_ts = map(lambda x: int(x), r.lrange('{}:hist'.format(fragment_key), 0, -1)) print last_requests_ts current_ts = calendar.timegm(datetime.utcnow().timetuple()) first_collection = r.get('{}:updated'.format(fragment_key)) is None base_ts = last_requests_ts[:] if not first_collection: if current_ts - base_ts[0] <= updated_delay: current_ts += updated_delay # Force base_ts = [current_ts] + base_ts request_intervals = [i - j for i, j in zip(base_ts[:-1], base_ts[1:])] if request_intervals: avg_gap = reduce(lambda x, y: x + y, request_intervals) / len(request_intervals) print avg_gap, durability = avg_gap - elapsed if avg_gap > updated_delay else updated_delay - elapsed else: durability = updated_delay - elapsed durability = int(max(durability, 1)) print durability if durability <= updated_delay - elapsed: p.expire(sync_key, durability) log.info('Fragment {} is considered synced for {} s'.format(fid, durability)) else: clear_fragment_stream(fid) p.delete('{}:updated'.format(fragment_key)) p.delete('{}:hist'.format(fragment_key)) log.info('Fragment {} will no longer be automatically updated'.format(fid)) p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple())) p.delete('{}:pulling'.format(fragment_key)) p.execute() __notify_completion(fid, r_sinks) finally: lock.release() log.info('Fragment {} collection is complete!'.format(fid))
c, s, p, o = eval(x) return c, __term(s), __term(p), __term(o) for x in r.zrangebyscore('{}:fragments:{}:stream'.format(AGENT_ID, fid), '-inf', '{}'.format(float(until))): yield __triplify(x) def add_stream_triple(fid, tp, (s, p, o), timestamp=None): try: if timestamp is None: timestamp = calendar.timegm(dt.utcnow().timetuple()) quad = (tp, s.n3(), p.n3(), o.n3()) stream_key = '{}:fragments:{}:stream'.format(AGENT_ID, fid) not_found = not bool(r.zscore(stream_key, quad)) if not_found: with r.pipeline() as pipe: pipe.zadd(stream_key, timestamp, quad) pipe.execute() return not_found except Exception, e: traceback.print_exc() log.error(e.message) class GraphProvider(object): def __init__(self): self.__last_creation_ts = dt.now() self.__graph_dict = {} self.__uuid_dict = {} self.__gid_uuid_dict = {} self.__lock = Lock()