Exemple #1
0
def __remove_fragment(fid):
    """
    Completely remove a fragment from the system after notifying its known consumers
    :param fid: Fragment identifier
    """
    log.debug('Waiting to remove fragment {}...'.format(fid))
    lock = fragment_lock(fid)
    lock.acquire()

    r_sinks = __load_fragment_requests(fid)
    __notify_completion(fid, r_sinks)
    fragment_keys = r.keys('{}:{}*'.format(fragments_key, fid))
    with r.pipeline(transaction=True) as p:
        p.multi()
        map(lambda k: p.delete(k), fragment_keys)
        p.srem(fragments_key, fid)
        p.execute()

    # Fragment lock key was just implicitly removed, so it's not necessary to release the lock
    # lock.release()
    log.info('Fragment {} has been removed'.format(fid))
Exemple #2
0
def __pull_fragment(fid):
    """
    Pull and replace (if needed) a given fragment
    :param fid: Fragment id
    """

    fragment_key = '{}:{}'.format(fragments_key, fid)
    on_events = r.get('{}:events'.format(fragment_key))

    if on_events == 'True' and not change_in_fragment_resource(fid, int(r.get('{}:ud'.format(fragment_key)))):
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            p.set(sync_key, True)
            durability = int(r.get('{}:ud'.format(fragment_key)))
            p.expire(sync_key, durability)
            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()
        return

    # Load fragment graph pattern
    tps = r.smembers('{}:gp'.format(fragment_key))
    # Load fragment requests (including their sinks)
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks)))

    init_fragment_resources(fid)

    # Prepare the corresponding fragment generator and fetch the search plan
    start_time = datetime.utcnow()
    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS,
                                                                provider=graph_provider, queue_size=N_COLLECTORS*100)

    except Exception:
        traceback.print_exc()
        log.error('Agora is not available')
        return

    # In case there is not SearchTree in the plan: notify, remove and abort collection
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info('There is no search plan for fragment {}. Removing...'.format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    # Update cache graph prefixes
    __bind_prefixes(graph)

    # Extract triple patterns' dictionary from the search plan
    context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in
                  graph.subjects(RDF.type, AGORA.TriplePattern)}
    frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp}

    lock = fragment_lock(fid)
    lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set('{}:pulling'.format(fragment_key), True)
        contexts_key = '{}:contexts'.format(fragment_key)
        p.delete(contexts_key)
        clear_fragment_stream(fid)
        for tpn in context_tp.keys():
            p.sadd(contexts_key, frag_contexts[tpn])
        p.execute()
    lock.release()

    # Init fragment collection counters
    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    log.info('Collecting fragment {}...'.format(fid))
    try:
        # Iterate all fragment triples and their contexts
        pre_ts = datetime.utcnow()
        for (c, s, p, o) in fgm_gen:
            # Update weights and counters
            triple_weight = len(u'{}{}{}'.format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight

            # Store the triple if it was not obtained before and notify related requests
            try:
                lock.acquire()
                new_triple = add_stream_triple(fid, context_tp[c], (s, p, o))
                lock.release()
                if new_triple:
                    if isinstance(s, URIRef):
                        if s not in resource_in_fragment:
                            resource_in_fragment[s] = set([])
                        resource_in_fragment[s].add(fid)
                        fragment_resources[fid].add(s)
                    __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks)
                n_triples += 1
            except Exception as e:
                log.warning(e.message)
                traceback.print_exc()

            if fragment_delta > 10000:
                fragment_delta = 0
                log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0))

            if n_triples % 100 == 0:
                # Update fragment requests
                if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
                    r_sinks = __load_fragment_requests(fid)

            post_ts = datetime.utcnow()
            elapsed = (post_ts - pre_ts).total_seconds()
            throttling = THROTTLING_TIME - elapsed
            if throttling > 0:
                sleep(throttling)
            pre_ts = datetime.utcnow()
    except Exception as e:
        log.warning(e.message)
        traceback.print_exc()

    elapsed = (datetime.utcnow() - start_time).total_seconds()
    log.info(
        '{} triples retrieved for fragment {} in {} s [{} kB]'.format(n_triples, fid, elapsed,
                                                                      fragment_weight / 1000.0))

    # Update fragment cache and its contexts
    lock.acquire()
    try:
        __update_fragment_cache(fid, tps)
        log.info('Fragment {} data has been replaced with the recently collected'.format(fid))
        __cache_plan_context(fid, graph)
        log.info('BGP context of fragment {} has been cached'.format(fid))
        log.info('Updating result set for fragment {}...'.format(fid))

        # Calculate sync times and update fragment flags
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            demand_key = '{}:on_demand'.format(fragment_key)
            # Fragment is now synced
            p.set(sync_key, True)
            # If the fragment collection time has not exceeded the threshold, switch to on-demand mode
            # if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4:
            #     p.set(demand_key, True)
            #     log.info('Fragment {} has been switched to on-demand mode'.format(fid))
            # else:
            p.delete(demand_key)

            updated_delay = int(r.get('{}:ud'.format(fragment_key)))
            last_requests_ts = map(lambda x: int(x), r.lrange('{}:hist'.format(fragment_key), 0, -1))
            print last_requests_ts
            current_ts = calendar.timegm(datetime.utcnow().timetuple())
            first_collection = r.get('{}:updated'.format(fragment_key)) is None
            base_ts = last_requests_ts[:]
            if not first_collection:
                if current_ts - base_ts[0] <= updated_delay:
                    current_ts += updated_delay  # Force
                base_ts = [current_ts] + base_ts
            request_intervals = [i - j for i, j in zip(base_ts[:-1], base_ts[1:])]
            if request_intervals:
                avg_gap = reduce(lambda x, y: x + y, request_intervals) / len(request_intervals)
                print avg_gap,
                durability = avg_gap - elapsed if avg_gap > updated_delay else updated_delay - elapsed
            else:
                durability = updated_delay - elapsed

            durability = int(max(durability, 1))
            print durability
            if durability <= updated_delay - elapsed:
                p.expire(sync_key, durability)
                log.info('Fragment {} is considered synced for {} s'.format(fid, durability))
            else:
                clear_fragment_stream(fid)
                p.delete('{}:updated'.format(fragment_key))
                p.delete('{}:hist'.format(fragment_key))
                log.info('Fragment {} will no longer be automatically updated'.format(fid))

            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()

        __notify_completion(fid, r_sinks)
    finally:
        lock.release()

    log.info('Fragment {} collection is complete!'.format(fid))