コード例 #1
0
ファイル: base.py プロジェクト: SmartDeveloperHub/sdh-curator
 def remove(self):
     with r.pipeline(transaction=True) as p:
         p.multi()
         action_id = r.hget(self._request_key, 'id')
         p.zrem('requests', action_id)
         r_keys = r.keys('{}*'.format(self._request_key))
         for key in r_keys:
             p.delete(key)
         self._remove(p)
         p.execute()
     log.info('Request {} was removed'.format(self._request_id))
コード例 #2
0
 def delivery(self, value):
     with r.pipeline(transaction=True) as p:
         p.multi()
         if value == 'ready':
             p.sadd('deliveries:ready', self._request_id)
         elif value == 'sent':
             p.sadd('deliveries:sent', self._request_id)
         if value != 'ready':
             p.srem('deliveries:ready', self._request_id)
         p.hset('requests:{}'.format(self._request_id), 'delivery', value)
         p.execute()
     log.info('Request {} delivery state is now "{}"'.format(self._request_id, value))
コード例 #3
0
def __load_fragment_requests(fid):
    requests_ = r.smembers("fragments:{}:requests".format(fid))
    sinks_ = {}
    for rid in requests_:
        try:
            sinks_[rid] = build_response(rid).sink
        except Exception, e:
            traceback.print_exc()
            log.warning(e.message)
            with r.pipeline(transaction=True) as p:
                p.multi()
                p.srem("fragments:{}:requests".format(fid), rid)
                p.execute()
コード例 #4
0
def __remove_fragment(fid):
    log.debug("Waiting to remove fragment {}...".format(fid))
    lock_key = "fragments:{}:lock".format(fid)
    lock = r.lock(lock_key, lock_class=Lock)
    lock.acquire()

    with r.pipeline(transaction=True) as p:
        requests, r_sinks = __load_fragment_requests(fid)
        __notify_completion(fid, r_sinks)
        fragment_keys = r.keys("fragments:{}*".format(fid))
        map(lambda k: p.delete(k), fragment_keys)
        p.srem("fragments", fid)
        p.execute()

    log.info("Fragment {} has been removed".format(fid))
コード例 #5
0
def __replace_fragment(fid):
    """
    Recreate fragment <fid> cached data and all its data-contexts from the corresponding stream (Redis)
    :param fid:
    :return:
    """
    tps = cache.get_context(fid).subjects(RDF.type, AGORA.TriplePattern)
    cache.remove_context(cache.get_context("/" + fid))
    for tp in tps:
        cache.remove_context(cache.get_context(str((fid, __triple_pattern(cache, tp)))))
    fragment_triples = load_stream_triples(fid, calendar.timegm(dt.now().timetuple()))
    for c, s, p, o in fragment_triples:
        cache.get_context(str((fid, c))).add((s, p, o))
        cache.get_context("/" + fid).add((s, p, o))
    with r.pipeline() as pipe:
        pipe.delete("fragments:{}:stream".format(fid))
        pipe.execute()
コード例 #6
0
 def stream(self, value):
     with r.pipeline(transaction=True) as p:
         p.multi()
         p.hset('requests:{}'.format(self._request_id), '__stream', value)
         p.execute()
     log.info('Request {} stream state is now "{}"'.format(self._request_id, value))
コード例 #7
0
def __pull_fragment(fid):
    tps = r.smembers("fragments:{}:gp".format(fid))
    requests, r_sinks = __load_fragment_requests(fid)
    log.info(
        """Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(
            fid, list(tps), len(requests), list(requests)
        )
    )
    start_time = datetime.now()

    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator(
            "{ %s }" % " . ".join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS
        )
    except Exception:
        log.error("Agora is not available")
        return

    # There is no search plan to execute
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info("There is no search plan for fragment {}. Removing...".format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    triple_patterns = {tpn: __triple_pattern(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)}
    fragment_contexts = {tpn: (fid, triple_patterns[tpn]) for tpn in triple_patterns}
    __bind_prefixes(graph)

    lock_key = "fragments:{}:lock".format(fid)
    lock = r.lock(lock_key, lock_class=Lock)
    lock.acquire()

    lock_consume_key = "fragments:{}:lock:consume".format(fid)
    c_lock = r.lock(lock_consume_key, lock_class=Lock)
    c_lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set("fragments:{}:pulling".format(fid), True)
        p.delete("fragments:{}:contexts".format(fid))
        for tpn in fragment_contexts.keys():
            p.sadd("fragments:{}:contexts".format(fid), fragment_contexts[tpn])
        p.execute()
    lock.release()

    c_lock.release()

    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    try:
        for (c, s, p, o) in fgm_gen:
            pre_ts = datetime.now()
            triple_weight = len(u"{}{}{}".format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight
            lock.acquire()
            if add_stream_triple(fid, triple_patterns[c], (s, p, o)):
                __consume_quad(fid, (triple_patterns[c], s, p, o), graph, sinks=r_sinks)
            lock.release()
            if fragment_delta > 1000:
                fragment_delta = 0
                log.info("Pulling fragment {} [{} kB]".format(fid, fragment_weight / 1000.0))

            if r.scard("fragments:{}:requests".format(fid)) != len(requests):
                requests, r_sinks = __load_fragment_requests(fid)
            n_triples += 1
            post_ts = datetime.now()
            elapsed = (post_ts - pre_ts).total_seconds()
            excess = (1.0 / COLLECT_THROTTLING) - elapsed
            if excess > 0:
                sleep(excess)
    except Exception, e:
        traceback.print_exc()
コード例 #8
0
                sleep(excess)
    except Exception, e:
        traceback.print_exc()

    elapsed = (datetime.now() - start_time).total_seconds()
    log.info(
        "{} triples retrieved for fragment {} in {} s [{} kB]".format(n_triples, fid, elapsed, fragment_weight / 1000.0)
    )

    lock.acquire()
    c_lock.acquire()
    __replace_fragment(fid)
    log.info("Fragment {} data has been replaced with the recently collected".format(fid))
    __cache_plan_context(fid, graph)
    log.info("BGP context of fragment {} has been cached".format(fid))
    with r.pipeline(transaction=True) as p:
        p.multi()
        sync_key = "fragments:{}:sync".format(fid)
        demand_key = "fragments:{}:on_demand".format(fid)
        # Fragment is now synced
        p.set(sync_key, True)
        # If the fragment collection time has not exceeded the threshold, switch to on-demand mode
        if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4:
            p.set(demand_key, True)
            log.info("Fragment {} has been switched to on-demand mode".format(fid))
        else:
            p.delete(demand_key)
            min_durability = int(max(MIN_SYNC, elapsed))
            durability = random.randint(min_durability, min_durability * 2)
            p.expire(sync_key, durability)
            log.info("Fragment {} is considered synced for {} s".format(fid, durability))
コード例 #9
0
ファイル: base.py プロジェクト: SmartDeveloperHub/sdh-curator
 def __init__(self):
     self._pipe = r.pipeline(transaction=True)
     self._request_id = None
     self._request_key = None
     self._dict_fields = {}
コード例 #10
0
        c, s, p, o = eval(x)
        return c, __term(s), __term(p), __term(o)

    for x in r.zrangebyscore('fragments:{}:stream'.format(fid), '-inf', '{}'.format(float(until))):
        yield __triplify(x)


def add_stream_triple(fid, tp, (s, p, o), timestamp=None):
    if timestamp is None:
        timestamp = calendar.timegm(dt.utcnow().timetuple())
    quad = (tp, s.n3(), p.n3(), o.n3())
    stream_key = 'fragments:{}:stream'.format(fid)
    not_found = not bool(r.zscore(stream_key, quad))
    if not_found:
        with r.pipeline() as pipe:
            pipe.zadd(stream_key, timestamp, quad)
            pipe.execute()
    return not_found


class GraphProvider(object):
    def __init__(self):
        self.__graph_dict = {}

    @staticmethod
    def __clean(name):
        shutil.rmtree('store/query/{}'.format(name))

    def create(self, conjunctive=False):
        uuid = shortuuid.uuid()
コード例 #11
0
 def set_link(self, link):
     with r.pipeline(transaction=True) as p:
         p.multi()
         p.hset('{}:links:status'.format(self._enrichment_key), str(link), True)
         p.execute()