Example #1
0
    def _build(self):
        """
        This function does not yield anything only when the new state is 'streaming'
        :return:
        """

        timestamp = calendar.timegm(dt.now().timetuple())
        lock = r.lock('fragments:{}:lock'.format(self.sink.fragment_id), lock_class=Lock)
        lock.acquire()
        fragment = None
        try:
            fragment, stream = self.fragment(stream=True, timestamp=timestamp)
            if stream:
                self.sink.stream = True
                if fragment:
                    self.sink.delivery = 'mixing'
                else:
                    self.sink.delivery = 'streaming'
            else:
                if fragment:
                    self.sink.delivery = 'pushing'
                    log.debug('Fragment retrieved from cache for request number {}'.format(self._request_id))
                else:
                    self.sink.delivery = 'sent'
                    log.debug('Sending end stream signal since there is no fragment and stream is disabled')
                    yield (), {'state': 'end'}
                self.sink.stream = False
        except Exception as e:
            log.warning(e.message)
            self.sink.stream = True
            self.sink.delivery = 'streaming'
        finally:
            lock.release()

        if fragment:
            log.info('Building a stream result from cache for request number {}...'.format(self._request_id))
            for ch in chunks(fragment, 1000):
                if ch:
                    yield [(map_variables(c, self.sink.mapping), s.n3(), p.n3(), o.n3()) for
                           (c, s, p, o)
                           in ch], {'source': 'store'}

            lock.acquire()
            try:
                if self.sink.delivery == 'pushing' or (self.sink.delivery == 'mixing' and not self.sink.stream):
                    self.sink.delivery = 'sent'
                    log.info(
                        'The response stream of request {} is completed. Notifying...'.format(self.sink.request_id))
                    yield (), {'state': 'end'}
                elif self.sink.delivery == 'mixing' and self.sink.stream:
                    self.sink.delivery = 'streaming'
            finally:
                lock.release()
 def build(self):
     super(FragmentResponse, self).build()
     lock_consume_key = 'fragments:{}:lock:consume'.format(self.sink.fragment_id)
     c_lock = r.lock(lock_consume_key, lock_class=Lock)
     c_lock.acquire()
     generator = self._build()
     try:
         for response in generator:
             yield response
     except Exception, e:
         traceback.print_exc()
         log.error(e.message)
def __remove_fragment(fid):
    log.debug("Waiting to remove fragment {}...".format(fid))
    lock_key = "fragments:{}:lock".format(fid)
    lock = r.lock(lock_key, lock_class=Lock)
    lock.acquire()

    with r.pipeline(transaction=True) as p:
        requests, r_sinks = __load_fragment_requests(fid)
        __notify_completion(fid, r_sinks)
        fragment_keys = r.keys("fragments:{}*".format(fid))
        map(lambda k: p.delete(k), fragment_keys)
        p.srem("fragments", fid)
        p.execute()

    log.info("Fragment {} has been removed".format(fid))
def __pull_fragment(fid):
    tps = r.smembers("fragments:{}:gp".format(fid))
    requests, r_sinks = __load_fragment_requests(fid)
    log.info(
        """Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(
            fid, list(tps), len(requests), list(requests)
        )
    )
    start_time = datetime.now()

    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator(
            "{ %s }" % " . ".join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS
        )
    except Exception:
        log.error("Agora is not available")
        return

    # There is no search plan to execute
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info("There is no search plan for fragment {}. Removing...".format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    triple_patterns = {tpn: __triple_pattern(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)}
    fragment_contexts = {tpn: (fid, triple_patterns[tpn]) for tpn in triple_patterns}
    __bind_prefixes(graph)

    lock_key = "fragments:{}:lock".format(fid)
    lock = r.lock(lock_key, lock_class=Lock)
    lock.acquire()

    lock_consume_key = "fragments:{}:lock:consume".format(fid)
    c_lock = r.lock(lock_consume_key, lock_class=Lock)
    c_lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set("fragments:{}:pulling".format(fid), True)
        p.delete("fragments:{}:contexts".format(fid))
        for tpn in fragment_contexts.keys():
            p.sadd("fragments:{}:contexts".format(fid), fragment_contexts[tpn])
        p.execute()
    lock.release()

    c_lock.release()

    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    try:
        for (c, s, p, o) in fgm_gen:
            pre_ts = datetime.now()
            triple_weight = len(u"{}{}{}".format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight
            lock.acquire()
            if add_stream_triple(fid, triple_patterns[c], (s, p, o)):
                __consume_quad(fid, (triple_patterns[c], s, p, o), graph, sinks=r_sinks)
            lock.release()
            if fragment_delta > 1000:
                fragment_delta = 0
                log.info("Pulling fragment {} [{} kB]".format(fid, fragment_weight / 1000.0))

            if r.scard("fragments:{}:requests".format(fid)) != len(requests):
                requests, r_sinks = __load_fragment_requests(fid)
            n_triples += 1
            post_ts = datetime.now()
            elapsed = (post_ts - pre_ts).total_seconds()
            excess = (1.0 / COLLECT_THROTTLING) - elapsed
            if excess > 0:
                sleep(excess)
    except Exception, e:
        traceback.print_exc()