コード例 #1
0
ファイル: api.py プロジェクト: SmartDeveloperHub/sdh-curator
def get_fragment(fid):
    if not r.sismember('fragments', fid):
        raise NotFound('The fragment {} does not exist'.format(fid))
    pulling = r.get('fragments:{}:pulling'.format(fid))
    if pulling is None:
        pulling = 'False'
    fr_dict = {'id': fid, 'pattern': "{ %s }" % ' . '.join(r.smembers('fragments:{}:gp'.format(fid))),
               'updated': r.get('fragments:{}:updated'.format(fid)),
               'pulling': eval(pulling),
               'requests': list(r.smembers('fragments:{}:requests'.format(fid)))}
    if fr_dict['pulling']:
        fr_dict['triples'] = r.zcard('fragments:{}:stream'.format(fid))

    return jsonify(fr_dict)
コード例 #2
0
 def load(self):
     dict_fields = r.hgetall(self._enrichment_key)
     self.target = URIRef(dict_fields.get('target', None))
     self.fragment_id = dict_fields.get('fragment_id', None)
     self.links = map(lambda (link, v): (URIRef(link), v), [eval(pair_str) for pair_str in
                                                            r.smembers('{}:links'.format(
                                                                self._enrichment_key))])
コード例 #3
0
    def __check_gp(self):
        gp_keys = r.keys('fragments:*:gp')
        for gpk in gp_keys:
            stored_gp = GraphPattern(r.smembers(gpk))

            mapping = stored_gp.mapping(self._graph_pattern)
            if mapping:
                return gpk.split(':')[1], mapping
        return None
コード例 #4
0
def __load_fragment_requests(fid):
    requests_ = r.smembers("fragments:{}:requests".format(fid))
    sinks_ = {}
    for rid in requests_:
        try:
            sinks_[rid] = build_response(rid).sink
        except Exception, e:
            traceback.print_exc()
            log.warning(e.message)
            with r.pipeline(transaction=True) as p:
                p.multi()
                p.srem("fragments:{}:requests".format(fid), rid)
                p.execute()
コード例 #5
0
def __deliver_responses():
    import time

    registered_deliveries = r.scard('deliveries')
    deliveries_ready = r.scard('deliveries:ready')
    log.info("""Delivery daemon started:
                    - Deliveries: {}
                    - Ready: {}""".format(registered_deliveries, deliveries_ready))

    log.info('Delivery daemon started')
    futures = {}
    while True:
        ready = r.smembers('deliveries:ready')
        for rid in ready:
            if rid not in futures:
                log.info('Response delivery of request {} is ready. Preparing...'.format(rid))
                futures[rid] = thp.submit(__deliver_response, rid)

        for obsolete_rid in set.difference(set(futures.keys()), ready):
            if obsolete_rid in futures and futures[obsolete_rid].done():
                del futures[obsolete_rid]

        sent = r.smembers('deliveries:sent')
        for rid in sent:
            r.srem('deliveries:ready', rid)
            r.srem('deliveries', rid)
            try:
                response = build_response(rid)
                response.sink.remove()
                log.info('Request {} was sent and cleared'.format(rid))
            except AttributeError:
                log.warning('Request number {} was deleted by other means'.format(rid))
                pass

        r.delete('deliveries:sent')
        time.sleep(1)
コード例 #6
0
 def _load(self):
     super(FragmentSink, self)._load()
     self._fragment_id = self._dict_fields['fragment_id']
     self._graph_pattern = GraphPattern(r.smembers('fragments:{}:gp'.format(self._fragment_id)))
     mapping = self._dict_fields.get('mapping', None)
     if mapping is not None:
         mapping = eval(mapping)
     self._dict_fields['mapping'] = mapping
     preferred_labels = self._dict_fields.get('preferred_labels', None)
     if preferred_labels is not None:
         preferred_labels = eval(preferred_labels)
     self._dict_fields['preferred_labels'] = preferred_labels
     try:
         del self._dict_fields['fragment_id']
     except KeyError:
         pass
コード例 #7
0
def __collect_fragments():
    registered_fragments = r.scard("fragments")
    synced_fragments = len(r.keys("fragments:*:sync"))
    log.info(
        """Collector daemon started:
                    - Fragments: {}
                    - Synced: {}""".format(
            registered_fragments, synced_fragments
        )
    )

    futures = {}
    while True:
        for fid in filter(
            lambda x: r.get("fragments:{}:sync".format(x)) is None and r.get("fragments:{}:pulling".format(x)) is None,
            r.smembers("fragments"),
        ):
            if fid in futures:
                if futures[fid].done():
                    del futures[fid]
            if fid not in futures:
                futures[fid] = thp.submit(__pull_fragment, fid)
        time.sleep(1)
コード例 #8
0
ファイル: api.py プロジェクト: SmartDeveloperHub/sdh-curator
def get_fragments():
    fragments = list(r.smembers('fragments'))
    return jsonify({"fragments": fragments})
コード例 #9
0
def __pull_fragment(fid):
    tps = r.smembers("fragments:{}:gp".format(fid))
    requests, r_sinks = __load_fragment_requests(fid)
    log.info(
        """Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(
            fid, list(tps), len(requests), list(requests)
        )
    )
    start_time = datetime.now()

    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator(
            "{ %s }" % " . ".join(tps), workers=N_COLLECTORS, provider=graph_provider, queue_size=N_COLLECTORS
        )
    except Exception:
        log.error("Agora is not available")
        return

    # There is no search plan to execute
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info("There is no search plan for fragment {}. Removing...".format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    triple_patterns = {tpn: __triple_pattern(graph, tpn) for tpn in graph.subjects(RDF.type, AGORA.TriplePattern)}
    fragment_contexts = {tpn: (fid, triple_patterns[tpn]) for tpn in triple_patterns}
    __bind_prefixes(graph)

    lock_key = "fragments:{}:lock".format(fid)
    lock = r.lock(lock_key, lock_class=Lock)
    lock.acquire()

    lock_consume_key = "fragments:{}:lock:consume".format(fid)
    c_lock = r.lock(lock_consume_key, lock_class=Lock)
    c_lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set("fragments:{}:pulling".format(fid), True)
        p.delete("fragments:{}:contexts".format(fid))
        for tpn in fragment_contexts.keys():
            p.sadd("fragments:{}:contexts".format(fid), fragment_contexts[tpn])
        p.execute()
    lock.release()

    c_lock.release()

    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    try:
        for (c, s, p, o) in fgm_gen:
            pre_ts = datetime.now()
            triple_weight = len(u"{}{}{}".format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight
            lock.acquire()
            if add_stream_triple(fid, triple_patterns[c], (s, p, o)):
                __consume_quad(fid, (triple_patterns[c], s, p, o), graph, sinks=r_sinks)
            lock.release()
            if fragment_delta > 1000:
                fragment_delta = 0
                log.info("Pulling fragment {} [{} kB]".format(fid, fragment_weight / 1000.0))

            if r.scard("fragments:{}:requests".format(fid)) != len(requests):
                requests, r_sinks = __load_fragment_requests(fid)
            n_triples += 1
            post_ts = datetime.now()
            elapsed = (post_ts - pre_ts).total_seconds()
            excess = (1.0 / COLLECT_THROTTLING) - elapsed
            if excess > 0:
                sleep(excess)
    except Exception, e:
        traceback.print_exc()
コード例 #10
0
 def fragment_contexts(self):
     return r.smembers('fragments:{}:contexts'.format(self._fragment_id))
コード例 #11
0
def get_fragment_enrichments(fid):
    return [EnrichmentData(eid) for eid in r.smembers('fragments:{}:enrichments'.format(fid))]