def _query_item(entity): """Cross-reference an entity or document, given as an indexed document.""" query = match_query(entity) if query == none_query(): return query = { "query": query, "size": 100, "_source": { "includes": PROXY_INCLUDES } } matchable = list(entity.schema.matchable_schemata) index = entities_read_index(schema=matchable) result = es.search(index=index, body=query) for result in result.get("hits").get("hits"): result = unpack_result(result) if result is None: continue match = model.get_proxy(result) score = compare(model, entity, match) if score >= SCORE_CUTOFF: log.debug("Match: %s <[%.2f]> %s", entity.caption, score, match.caption) yield score, entity, result.get("collection_id"), match
def get_query(self): query = super(MatchQuery, self).get_query() query = match_query(self.entity, collection_ids=self.collection_ids, query=query) if len(self.exclude): exclude = {"ids": {"values": self.exclude}} query["bool"]["must_not"].append(exclude) return query
def _query_item(entity, entitysets=True): """Cross-reference an entity or document, given as an indexed document.""" query = match_query(entity) if query == none_query(): return log.debug("Candidate [%s]: %s", entity.schema.name, entity.caption) entityset_ids = EntitySet.entity_entitysets(entity.id) if entitysets else [] query = {"query": query, "size": 50, "_source": ENTITY_SOURCE} index = entities_read_index(schema=list(entity.schema.matchable_schemata)) result = es.search(index=index, body=query) for result in result.get("hits").get("hits"): result = unpack_result(result) if result is None: continue match = model.get_proxy(result) score = compare(model, entity, match) log.debug("Match: %s <[%.2f]> %s", entity.caption, score, match.caption) yield score, entity, result.get("collection_id"), match, entityset_ids
def _query_item(collection, entity): """Cross-reference an entity or document, given as an indexed document.""" query = match_query(entity) if query == none_query(): return query = {'query': query, 'size': 100, '_source': {'includes': INCLUDES}} matchable = list(entity.schema.matchable_schemata) index = entities_read_index(schema=matchable) result = es.search(index=index, body=query) for result in result.get('hits').get('hits'): result = unpack_result(result) if result is None: continue match = model.get_proxy(result) score = compare(model, entity, match) if score >= SCORE_CUTOFF: # log.debug('Match: %r <-[%.3f]-> %r', # entity.caption, score, match.caption) yield score, entity, result.get('collection_id'), match
def xref_item(proxy, collection_ids=None): """Cross-reference an entity or document, given as an indexed document.""" query = match_query(proxy, collection_ids=collection_ids) if query == none_query(): return query = { 'query': query, 'size': 100, '_source': {'includes': ['schema', 'properties', 'collection_id']} } matchable = list(proxy.schema.matchable_schemata) index = entities_read_index(schema=matchable) result = es.search(index=index, body=query) results = result.get('hits').get('hits') for result in results: result = unpack_result(result) if result is not None: other = model.get_proxy(result) score = compare(model, proxy, other) if score >= SCORE_CUTOFF: yield score, result.get('collection_id'), other
def get_query(self): query = super(MatchQuery, self).get_query() return match_query(self.entity, collection_ids=self.collection_ids, query=query)