コード例 #1
0
    def _save(self, action, general=True):
        """
        Stores data relating to the recovery of a fragment for this request
        """

        super(FragmentSink, self)._save(action)

        # Recover pattern from the request object
        self._graph_pattern = action.request.pattern

        effective_gp = self._generalize_gp() if general else self._graph_pattern

        # fragment_mapping is a tuple like (fragment_id, mapping)
        fragment_mapping = self.__check_gp_mappings(gp=effective_gp)
        exists = fragment_mapping is not None

        # Decide to proceed depending on whether it's the first time this request is received and the fragment
        # is already known
        proceed = action.id in self.passed_requests or (
            random() > 1.0 - PASS_THRESHOLD if not exists else random() > PASS_THRESHOLD)
        if not proceed:
            self.do_pass(action)
        if action.id in self.passed_requests:
            self.passed_requests.remove(action.id)

        if not exists:
            # If there is no mapping, register a new fragment collection for the general graph pattern
            fragment_id = str(uuid())
            self._fragment_key = self.__f_key_pattern.format(fragment_id)
            self._pipe.sadd(self._fragments_key, fragment_id)
            self._pipe.sadd('{}:gp'.format(self._fragment_key), *effective_gp)
            mapping = {str(k): str(k) for k in action.request.variable_labels}
            mapping.update({str(k): str(k) for k in self._filter_mapping})
        else:
            fragment_id, mapping = fragment_mapping
            self._fragment_key = self.__f_key_pattern.format(fragment_id)
            # Remove the sync state if the fragment is on-demand mode
            if r.get('{}:on_demand'.format(self._fragment_key)) is not None:
                self._pipe.delete('{}:sync'.format(self._fragment_key))

        # Here the following is persisted: mapping, pref_labels, fragment-request links and the original graph_pattern
        self._pipe.hmset('{}map'.format(self._request_key), mapping)
        if action.request.preferred_labels:
            self._pipe.sadd('{}pl'.format(self._request_key), *action.request.preferred_labels)
        self._pipe.sadd('{}:requests'.format(self._fragment_key), self._request_id)
        self._pipe.hset('{}'.format(self._request_key), 'fragment_id', fragment_id)
        self._pipe.sadd('{}gp'.format(self._request_key), *self._graph_pattern)
        self._pipe.hset('{}'.format(self._request_key), 'pattern', ' . '.join(self._graph_pattern))

        # Populate attributes that may be required during the rest of the submission process
        self._dict_fields['mapping'] = mapping
        self._dict_fields['preferred_labels'] = action.request.preferred_labels
        self._dict_fields['fragment_id'] = fragment_id

        if not exists:
            log.info('Request {} has started a new fragment collection: {}'.format(self._request_id, fragment_id))
        else:
            log.info('Request {} is going to re-use fragment {}'.format(self._request_id, fragment_id))
            n_fragment_reqs = r.scard('{}:requests'.format(self._fragment_key))
            log.info('Fragment {} is supporting {} more requests'.format(fragment_id, n_fragment_reqs))
コード例 #2
0
def __collect_fragments():
    registered_fragments = r.scard(fragments_key)
    synced_fragments = len(r.keys('{}:*:sync'.format(fragments_key)))
    log.info("""Collector daemon started:
                    - Fragments: {}
                    - Synced: {}""".format(registered_fragments, synced_fragments))

    futures = {}
    while True:
        for fid in filter(
                lambda x: r.get('{}:{}:sync'.format(fragments_key, x)) is None and r.get(
                    '{}:{}:pulling'.format(fragments_key, x)) is None,
                r.smembers(fragments_key)):
            if fid in futures:
                if futures[fid].done():
                    del futures[fid]
            if fid not in futures:
                futures[fid] = thp.submit(__pull_fragment, fid)
        time.sleep(1)
コード例 #3
0
def __pull_fragment(fid):
    """
    Pull and replace (if needed) a given fragment
    :param fid: Fragment id
    """

    fragment_key = '{}:{}'.format(fragments_key, fid)

    # Load fragment graph pattern
    tps = r.smembers('{}:gp'.format(fragment_key))
    # Load fragment requests (including their sinks)
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks)))

    # Prepare the corresponding fragment generator and fetch the search plan
    start_time = datetime.now()
    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS,
                                                                provider=graph_provider, queue_size=N_COLLECTORS)
    except Exception:
        log.error('Agora is not available')
        return

    # In case there is not SearchTree in the plan: notify, remove and abort collection
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info('There is no search plan for fragment {}. Removing...'.format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    # Update cache graph prefixes
    __bind_prefixes(graph)

    # Extract triple patterns' dictionary from the search plan
    context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in
                  graph.subjects(RDF.type, AGORA.TriplePattern)}
    frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp}

    lock = fragment_lock(fid)
    lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set('{}:pulling'.format(fragment_key), True)
        contexts_key = '{}:contexts'.format(fragment_key)
        p.delete(contexts_key)
        for tpn in context_tp.keys():
            p.sadd(contexts_key, frag_contexts[tpn])
        p.execute()
    lock.release()

    # Init fragment collection counters
    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    log.info('Collecting fragment {}...'.format(fid))
    try:
        # Iterate all fragment triples and their contexts
        for (c, s, p, o) in fgm_gen:
            pre_ts = datetime.now()
            # Update weights and counters
            triple_weight = len(u'{}{}{}'.format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight

            # Store the triple if it was not obtained before and notify related requests
            try:
                lock.acquire()
                new_triple = add_stream_triple(fid, context_tp[c], (s, p, o))
                lock.release()
                if new_triple:
                    __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks)
                n_triples += 1
            except Exception, e:
                log.warning(e.message)
                traceback.print_exc()

            if fragment_delta > 10000:
                fragment_delta = 0
                log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0))

            if n_triples % 100 == 0:
                # Update fragment requests
                if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
                    r_sinks = __load_fragment_requests(fid)

            post_ts = datetime.now()
            elapsed = (post_ts - pre_ts).total_seconds()
            throttling = THROTTLING_TIME - elapsed
            if throttling > 0:
                sleep(throttling)
    except Exception, e:
        log.warning(e.message)
        traceback.print_exc()
コード例 #4
0
ファイル: fragment.py プロジェクト: fserena/agora-scholar
def __pull_fragment(fid):
    """
    Pull and replace (if needed) a given fragment
    :param fid: Fragment id
    """

    fragment_key = '{}:{}'.format(fragments_key, fid)
    on_events = r.get('{}:events'.format(fragment_key))

    if on_events == 'True' and not change_in_fragment_resource(fid, int(r.get('{}:ud'.format(fragment_key)))):
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            p.set(sync_key, True)
            durability = int(r.get('{}:ud'.format(fragment_key)))
            p.expire(sync_key, durability)
            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()
        return

    # Load fragment graph pattern
    tps = r.smembers('{}:gp'.format(fragment_key))
    # Load fragment requests (including their sinks)
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks)))

    init_fragment_resources(fid)

    # Prepare the corresponding fragment generator and fetch the search plan
    start_time = datetime.utcnow()
    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS,
                                                                provider=graph_provider, queue_size=N_COLLECTORS*100)

    except Exception:
        traceback.print_exc()
        log.error('Agora is not available')
        return

    # In case there is not SearchTree in the plan: notify, remove and abort collection
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info('There is no search plan for fragment {}. Removing...'.format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    # Update cache graph prefixes
    __bind_prefixes(graph)

    # Extract triple patterns' dictionary from the search plan
    context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in
                  graph.subjects(RDF.type, AGORA.TriplePattern)}
    frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp}

    lock = fragment_lock(fid)
    lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set('{}:pulling'.format(fragment_key), True)
        contexts_key = '{}:contexts'.format(fragment_key)
        p.delete(contexts_key)
        clear_fragment_stream(fid)
        for tpn in context_tp.keys():
            p.sadd(contexts_key, frag_contexts[tpn])
        p.execute()
    lock.release()

    # Init fragment collection counters
    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    log.info('Collecting fragment {}...'.format(fid))
    try:
        # Iterate all fragment triples and their contexts
        pre_ts = datetime.utcnow()
        for (c, s, p, o) in fgm_gen:
            # Update weights and counters
            triple_weight = len(u'{}{}{}'.format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight

            # Store the triple if it was not obtained before and notify related requests
            try:
                lock.acquire()
                new_triple = add_stream_triple(fid, context_tp[c], (s, p, o))
                lock.release()
                if new_triple:
                    if isinstance(s, URIRef):
                        if s not in resource_in_fragment:
                            resource_in_fragment[s] = set([])
                        resource_in_fragment[s].add(fid)
                        fragment_resources[fid].add(s)
                    __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks)
                n_triples += 1
            except Exception as e:
                log.warning(e.message)
                traceback.print_exc()

            if fragment_delta > 10000:
                fragment_delta = 0
                log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0))

            if n_triples % 100 == 0:
                # Update fragment requests
                if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
                    r_sinks = __load_fragment_requests(fid)

            post_ts = datetime.utcnow()
            elapsed = (post_ts - pre_ts).total_seconds()
            throttling = THROTTLING_TIME - elapsed
            if throttling > 0:
                sleep(throttling)
            pre_ts = datetime.utcnow()
    except Exception as e:
        log.warning(e.message)
        traceback.print_exc()

    elapsed = (datetime.utcnow() - start_time).total_seconds()
    log.info(
        '{} triples retrieved for fragment {} in {} s [{} kB]'.format(n_triples, fid, elapsed,
                                                                      fragment_weight / 1000.0))

    # Update fragment cache and its contexts
    lock.acquire()
    try:
        __update_fragment_cache(fid, tps)
        log.info('Fragment {} data has been replaced with the recently collected'.format(fid))
        __cache_plan_context(fid, graph)
        log.info('BGP context of fragment {} has been cached'.format(fid))
        log.info('Updating result set for fragment {}...'.format(fid))

        # Calculate sync times and update fragment flags
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            demand_key = '{}:on_demand'.format(fragment_key)
            # Fragment is now synced
            p.set(sync_key, True)
            # If the fragment collection time has not exceeded the threshold, switch to on-demand mode
            # if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4:
            #     p.set(demand_key, True)
            #     log.info('Fragment {} has been switched to on-demand mode'.format(fid))
            # else:
            p.delete(demand_key)

            updated_delay = int(r.get('{}:ud'.format(fragment_key)))
            last_requests_ts = map(lambda x: int(x), r.lrange('{}:hist'.format(fragment_key), 0, -1))
            print last_requests_ts
            current_ts = calendar.timegm(datetime.utcnow().timetuple())
            first_collection = r.get('{}:updated'.format(fragment_key)) is None
            base_ts = last_requests_ts[:]
            if not first_collection:
                if current_ts - base_ts[0] <= updated_delay:
                    current_ts += updated_delay  # Force
                base_ts = [current_ts] + base_ts
            request_intervals = [i - j for i, j in zip(base_ts[:-1], base_ts[1:])]
            if request_intervals:
                avg_gap = reduce(lambda x, y: x + y, request_intervals) / len(request_intervals)
                print avg_gap,
                durability = avg_gap - elapsed if avg_gap > updated_delay else updated_delay - elapsed
            else:
                durability = updated_delay - elapsed

            durability = int(max(durability, 1))
            print durability
            if durability <= updated_delay - elapsed:
                p.expire(sync_key, durability)
                log.info('Fragment {} is considered synced for {} s'.format(fid, durability))
            else:
                clear_fragment_stream(fid)
                p.delete('{}:updated'.format(fragment_key))
                p.delete('{}:hist'.format(fragment_key))
                log.info('Fragment {} will no longer be automatically updated'.format(fid))

            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()

        __notify_completion(fid, r_sinks)
    finally:
        lock.release()

    log.info('Fragment {} collection is complete!'.format(fid))
コード例 #5
0
ファイル: fragment.py プロジェクト: fserena/agora-stoa
    def _save(self, action, general=True):
        """
        Stores data relating to the recovery of a fragment for this request
        """

        super(FragmentSink, self)._save(action)

        # Override general parameter
        general = general and action.request.allow_generalisation

        # Fragment collection parameters
        requested_updating_delay = action.request.updating_delay
        if action.request.updating_delay is None:
            requested_updating_delay = MIN_SYNC_TIME
        self._pipe.hset(self._request_key, 'updating_delay', requested_updating_delay)
        self._pipe.hset(self._request_key, 'allow_generalisation', action.request.allow_generalisation)

        # Recover pattern from the request object
        self._graph_pattern = action.request.pattern

        effective_gp = self._generalize_gp() if general else self._graph_pattern

        # fragment_mapping is a tuple like (fragment_id, mapping)
        fragment_mapping = self.__check_gp_mappings(gp=effective_gp)
        exists = fragment_mapping is not None

        # Decide to proceed depending on whether it's the first time this request is received and the fragment
        # is already known
        proceed = action.id in self.passed_requests or (
            random() > 1.0 - PASS_THRESHOLD if not exists else random() > PASS_THRESHOLD)
        if not proceed:
            self.do_pass(action)
        if action.id in self.passed_requests:
            self.passed_requests.remove(action.id)

        lock = None
        try:
            if not exists:
                # If there is no mapping, register a new fragment collection for the general graph pattern
                fragment_id = str(uuid())
                self._fragment_key = self.__f_key_pattern.format(fragment_id)
                self._pipe.sadd(self._fragments_key, fragment_id)
                self._pipe.sadd('{}:gp'.format(self._fragment_key), *effective_gp)
                mapping = {str(k): str(k) for k in action.request.variable_labels}
                mapping.update({str(k): str(k) for k in self._filter_mapping})
            else:
                fragment_id, mapping = fragment_mapping
                self._fragment_key = self.__f_key_pattern.format(fragment_id)
                lock = fragment_lock(fragment_id)
                lock.acquire()
                # Remove the sync state if the fragment is on-demand mode
                if r.get('{}:on_demand'.format(self._fragment_key)) is not None:
                    self._pipe.delete('{}:sync'.format(self._fragment_key))

            # Here the following is persisted: mapping, pref_labels, fragment-request links and the original
            # graph_pattern
            self._pipe.hmset('{}map'.format(self._request_key), mapping)
            if action.request.preferred_labels:
                self._pipe.sadd('{}pl'.format(self._request_key), *action.request.preferred_labels)
            self._pipe.sadd('{}:requests'.format(self._fragment_key), self._request_id)
            self._pipe.hset(self._request_key, 'fragment_id', fragment_id)
            self._pipe.sadd('{}gp'.format(self._request_key), *self._graph_pattern)
            self._pipe.hset(self._request_key, 'pattern', ' . '.join(self._graph_pattern))

            # Update collection parameters
            fragment_synced = True
            current_updated = r.get('{}:updated'.format(self._fragment_key))
            if current_updated is not None:
                current_updated = dt.utcfromtimestamp(float(current_updated))
                utcnow = dt.utcnow()
                limit = utcnow - delta(seconds=requested_updating_delay)
                if limit > current_updated:
                    diff = (limit - current_updated).total_seconds()
                    self._pipe.delete('{}:sync'.format(self._fragment_key))
                    fragment_synced = False
                    # if diff > requested_updating_delay / 2.0:
                    #     self._pipe.delete('{}:updated'.format(self._fragment_key))

            current_updating_delay = int(
                r.get('{}:ud'.format(self._fragment_key))) if exists and fragment_synced else sys.maxint
            if current_updating_delay > requested_updating_delay:
                self._pipe.set('{}:ud'.format(self._fragment_key), requested_updating_delay)

            current_on_events = r.get('{}:events'.format(self._fragment_key))
            requested_on_events = action.request.update_on_events
            if current_on_events is None or (current_on_events is not None and current_on_events == 'True'):
                self._pipe.set('{}:events'.format(self._fragment_key), requested_on_events)

            # Update fragment request history
            # if not fragment_synced:
            #     self._pipe.delete('{}:hist'.format(self._fragment_key))
            self._pipe.lpush('{}:hist'.format(self._fragment_key), calendar.timegm(datetime.utcnow().timetuple()))
            self._pipe.ltrim('{}:hist'.format(self._fragment_key), 0, 3)

            # Populate attributes that may be required during the rest of the submission process
            self._dict_fields['mapping'] = mapping
            self._dict_fields['preferred_labels'] = action.request.preferred_labels
            self._dict_fields['fragment_id'] = fragment_id

            if not exists:
                _log.info('Request {} has started a new fragment collection: {}'.format(self._request_id, fragment_id))
            else:
                _log.info('Request {} is going to re-use fragment {}'.format(self._request_id, fragment_id))
                n_fragment_reqs = r.scard('{}:requests'.format(self._fragment_key))
                _log.info('Fragment {} is supporting {} more requests'.format(fragment_id, n_fragment_reqs))
        finally:
            if lock is not None:
                lock.release()
コード例 #6
0
ファイル: delivery.py プロジェクト: fserena/agora-stoa
            for rid in sent:
                r.srem(__ready_key, rid)
                r.srem(__deliveries_key, rid)
                try:
                    response = build_response(rid)
                    response.sink.remove()  # Its lock is removed too
                    __log.info('Request {} was sent and cleared'.format(rid))
                except AttributeError:
                    traceback.print_exc()
                    __log.warning('Request number {} was deleted by other means'.format(rid))
                    pass
                r.srem(__sent_key, rid)
        except Exception as e:
            __log.error(e.message)
            traceback.print_exc()
        finally:
            time.sleep(0.1)


# Log delivery counters at startup
__registered_deliveries = r.scard(__deliveries_key)
__deliveries_ready = r.scard(__ready_key)
__log.info("""Delivery daemon started:
                - Deliveries: {}
                - Ready: {}""".format(__registered_deliveries, __deliveries_ready))

# Create and start delivery daemon
__thread = Thread(target=__deliver_responses)
__thread.daemon = True
__thread.start()
コード例 #7
0
    lock.acquire()
    try:
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            # Fragment is now synced
            p.set(sync_key, True)
            min_durability = int(MIN_SYNC)
            durability = random.randint(min_durability, min_durability * 2)
            p.expire(sync_key, durability)
            log.info('Fragment {} is considered synced for {} s'.format(fid, durability))
            p.set('{}:updated'.format(fragment_key), dt.now())
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()
        if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
            r_sinks = __load_fragment_requests(fid)
        __notify_completion(r_sinks)
    finally:
        lock.release()


def __collect_fragments():
    registered_fragments = r.scard(fragments_key)
    synced_fragments = len(r.keys('{}:*:sync'.format(fragments_key)))
    log.info("""Collector daemon started:
                    - Fragments: {}
                    - Synced: {}""".format(registered_fragments, synced_fragments))

    futures = {}
    while True: