예제 #1
0
파일: xref.py 프로젝트: aaronarnold2/aleph
def _query_item(entity):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(entity)
    if query == none_query():
        return

    query = {
        "query": query,
        "size": 100,
        "_source": {
            "includes": PROXY_INCLUDES
        }
    }
    matchable = list(entity.schema.matchable_schemata)
    index = entities_read_index(schema=matchable)
    result = es.search(index=index, body=query)
    for result in result.get("hits").get("hits"):
        result = unpack_result(result)
        if result is None:
            continue
        match = model.get_proxy(result)
        score = compare(model, entity, match)
        if score >= SCORE_CUTOFF:
            log.debug("Match: %s <[%.2f]> %s", entity.caption, score,
                      match.caption)
            yield score, entity, result.get("collection_id"), match
예제 #2
0
 def get_query(self):
     query = super(MatchQuery, self).get_query()
     query = match_query(self.entity,
                         collection_ids=self.collection_ids,
                         query=query)
     if len(self.exclude):
         exclude = {"ids": {"values": self.exclude}}
         query["bool"]["must_not"].append(exclude)
     return query
예제 #3
0
파일: xref.py 프로젝트: moreymat/aleph
def _query_item(entity, entitysets=True):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(entity)
    if query == none_query():
        return

    log.debug("Candidate [%s]: %s", entity.schema.name, entity.caption)
    entityset_ids = EntitySet.entity_entitysets(entity.id) if entitysets else []
    query = {"query": query, "size": 50, "_source": ENTITY_SOURCE}
    index = entities_read_index(schema=list(entity.schema.matchable_schemata))
    result = es.search(index=index, body=query)
    for result in result.get("hits").get("hits"):
        result = unpack_result(result)
        if result is None:
            continue
        match = model.get_proxy(result)
        score = compare(model, entity, match)
        log.debug("Match: %s <[%.2f]> %s", entity.caption, score, match.caption)
        yield score, entity, result.get("collection_id"), match, entityset_ids
예제 #4
0
def _query_item(collection, entity):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(entity)
    if query == none_query():
        return

    query = {'query': query, 'size': 100, '_source': {'includes': INCLUDES}}
    matchable = list(entity.schema.matchable_schemata)
    index = entities_read_index(schema=matchable)
    result = es.search(index=index, body=query)
    for result in result.get('hits').get('hits'):
        result = unpack_result(result)
        if result is None:
            continue
        match = model.get_proxy(result)
        score = compare(model, entity, match)
        if score >= SCORE_CUTOFF:
            # log.debug('Match: %r <-[%.3f]-> %r',
            #           entity.caption, score, match.caption)
            yield score, entity, result.get('collection_id'), match
예제 #5
0
def xref_item(proxy, collection_ids=None):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(proxy, collection_ids=collection_ids)
    if query == none_query():
        return

    query = {
        'query': query,
        'size': 100,
        '_source': {'includes': ['schema', 'properties', 'collection_id']}
    }
    matchable = list(proxy.schema.matchable_schemata)
    index = entities_read_index(schema=matchable)
    result = es.search(index=index, body=query)
    results = result.get('hits').get('hits')
    for result in results:
        result = unpack_result(result)
        if result is not None:
            other = model.get_proxy(result)
            score = compare(model, proxy, other)
            if score >= SCORE_CUTOFF:
                yield score, result.get('collection_id'), other
예제 #6
0
파일: __init__.py 프로젝트: wayne9qiu/aleph
 def get_query(self):
     query = super(MatchQuery, self).get_query()
     return match_query(self.entity,
                        collection_ids=self.collection_ids,
                        query=query)