Esempio n. 1
0
def entity_documents(entity, state):
    """Try and find all documents mentioning a particular entity."""
    shoulds = [{"term": {"entities.id": entity.get('id')}}]
    text_queries = []

    for name in entity.get('names', []):
        text_queries.append(phrase_match(name, 'text'))
        text_queries.append(phrase_match(name, 'text_latin'))
        shoulds.append(phrase_match(name, 'title'))
        shoulds.append(phrase_match(name, 'summary'))
        state.highlight.append(name)

    for fp in entity.get('fingerprints', []):
        text_queries.append(phrase_match(fp, 'text'))
        state.highlight.append(fp)

    # for ident in entity.get('identifiers', []):
    #     text_queries.append(multi_match(ident, ['text']))

    shoulds.append(child_record({"bool": {"should": text_queries}}))
    # TODO: add in other entity info like phone numbers, addresses, etc. for
    # ranking.

    state.raw_query = {"bool": {"should": shoulds, "minimum_should_match": 1}}
    # pprint(state.raw_query)
    return documents_query(state)
Esempio n. 2
0
def text_query(text):
    """Part of a query which finds a piece of text."""
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match":
            1,
            "should": [
                meta_query_string(text),
                child_record({"bool": {
                    "should": [text_query_string(text)]
                }})
            ]
        }
    }
Esempio n. 3
0
def text_query(text):
    """Part of a query which finds a piece of text."""
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match": 1,
            "should": [
                meta_query_string(text),
                child_record({
                    "bool": {
                        "should": [text_query_string(text)]
                    }
                })
            ]
        }
    }
Esempio n. 4
0
def text_query(text):
    """ Construct the part of a query which is responsible for finding a
    piece of thext in the selected documents. """
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match": 1,
            "should": [
                meta_query_string(text),
                child_record({
                    "bool": {
                        "should": [text_query_string(text)]
                    }
                })
            ]
        }
    }
Esempio n. 5
0
def analyze_terms(terms, seen=None):
    if seen is None:
        seen = set()
    for term in terms:
        query = {
            "bool": {
                "minimum_should_match": 1,
                "should": [
                    meta_query_string(term, literal=True),
                    child_record({
                        "bool": {
                            "should": [text_query_string(term, literal=True)]
                        }
                    })
                ]
            }
        }
        for doc_id in query_doc_ids(query):
            if doc_id not in seen:
                analyze_document.delay(doc_id)
            seen.add(doc_id)
Esempio n. 6
0
def analyze_terms(terms, seen=None):
    if seen is None:
        seen = set()
    for term in terms:
        term = normalize_strong(term)
        query = {
            "bool": {
                "minimum_should_match":
                1,
                "should": [
                    meta_query_string(term),
                    child_record(
                        {"bool": {
                            "should": [text_query_string(term)]
                        }})
                ]
            }
        }
        for doc_id in query_doc_ids(query):
            if doc_id not in seen:
                analyze_document.delay(doc_id)
            seen.add(doc_id)