Esempio n. 1
0
def test_service_id_hash_not_in_searched_fields(services_mapping):
    query = construct_query(services_mapping, build_query_params(keywords="some keywords"))

    assert not any("serviceIdHash" in key for key in query['query']['simple_query_string']['fields'])

    query = construct_query(services_mapping, build_query_params(filters={'serviceCategories': ["serviceType1"]}))

    assert not any("serviceIdHash" in key for key in query['highlight']['fields'])
def keyword_search(index_name, doc_type, query_args):
    try:
        page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE'])
        res = es.search(
            index=index_name,
            doc_type=doc_type,
            body=construct_query(query_args, page_size)
        )

        results = convert_es_results(res, query_args)

        url_for_search = lambda **kwargs: \
            url_for('.search', index_name=index_name, doc_type=doc_type,
                    _external=True, **kwargs)
        response = {
            "meta": results['meta'],
            "services": results['services'],
            "links": generate_pagination_links(
                query_args, results['meta']['total'],
                page_size, url_for_search
            )
        }

        return response, 200
    except TransportError as e:
        return _get_an_error_message(e), e.status_code
    except ValueError as e:
        return str(e), 400
Esempio n. 3
0
def test_aggregations_terms_added_for_each_param(services_mapping, aggregations):
    query = construct_query(services_mapping, build_query_params(), aggregations=aggregations)

    assert set(aggregations) == {x for x in query['aggregations']}
    assert {"_".join(("dmagg", x)) for x in aggregations} == {
        v['terms']['field'] for k, v in query['aggregations'].items()
    }
Esempio n. 4
0
def keyword_search(index_name, doc_type, query_args):
    try:
        page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE'])
        res = es.search(index=index_name,
                        doc_type=doc_type,
                        body=construct_query(query_args, page_size))

        results = convert_es_results(res, query_args)

        url_for_search = lambda **kwargs: \
            url_for('.search', index_name=index_name, doc_type=doc_type,
                    _external=True, **kwargs)
        response = {
            "meta":
            results['meta'],
            "services":
            results['services'],
            "links":
            generate_pagination_links(query_args, results['meta']['total'],
                                      page_size, url_for_search)
        }

        return response, 200
    except TransportError as e:
        return _get_an_error_message(e), e.status_code
    except ValueError as e:
        return str(e), 400
Esempio n. 5
0
def test_should_not_filter_on_unknown_keys(services_mapping):
    params = build_query_params(filters={'lot': "SaaS", 'serviceCategories': ["serviceCategories"]})
    params.add("this", "that")
    query = construct_query(services_mapping, params)
    terms = query["query"]["bool"]["filter"]["bool"]["must"]
    assert {"term": {'dmfilter_serviceCategories': 'serviceCategories'}} in terms
    assert {"term": {'dmfilter_lot': 'SaaS'}} in terms
    assert {"term": {'unknown': 'something to ignore'}} not in terms
Esempio n. 6
0
def test_should_have_filtered_term_service_types_clause():
    query = construct_query(build_query_params(service_types=["serviceTypes"]))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"]
        ["filter_serviceTypes"], "servicetypes")
Esempio n. 7
0
def test_should_have_filtered_term_lot_clause():
    query = construct_query(build_query_params(lot="SaaS"))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"]
        ["filter_lot"], "saas")
def test_should_not_filter_on_unknown_keys():
    params = build_query_params(lot="SaaS", service_types=["serviceTypes"])
    params.add("this", "that")
    query = construct_query(params)
    terms = query["query"]["filtered"]["filter"]["bool"]["must"]
    assert_in({"term": {'filter_serviceTypes': 'servicetypes'}}, terms)
    assert_in({"term": {'filter_lot': 'saas'}}, terms)
    assert_not_in({"term": {'unknown': 'something to ignore'}}, terms)
Esempio n. 9
0
def test_should_not_filter_on_unknown_keys():
    params = build_query_params(lot="SaaS", service_types=["serviceTypes"])
    params.add("this", "that")
    query = construct_query(params)
    terms = query["query"]["filtered"]["filter"]["bool"]["must"]
    assert_in({"term": {'filter_serviceTypes': 'servicetypes'}}, terms)
    assert_in({"term": {'filter_lot': 'saas'}}, terms)
    assert_not_in({"term": {'unknown': 'something to ignore'}}, terms)
Esempio n. 10
0
def test_should_have_filtered_term_for_multiple_service_types_clauses():
    query = construct_query(
        build_query_params(
            service_types=["serviceTypes1", "serviceTypes2", "serviceTypes3"]))
    terms = query["query"]["filtered"]["filter"]["bool"]["must"]
    assert_in({"term": {'filter_serviceTypes': 'servicetypes1'}}, terms)
    assert_in({"term": {'filter_serviceTypes': 'servicetypes2'}}, terms)
    assert_in({"term": {'filter_serviceTypes': 'servicetypes3'}}, terms)
def test_should_have_filtered_term_for_multiple_service_types_clauses():
    query = construct_query(
        build_query_params(
            service_types=["serviceTypes1", "serviceTypes2", "serviceTypes3"]))
    terms = query["query"]["filtered"]["filter"]["bool"]["must"]
    assert_in({"term": {'filter_serviceTypes': 'servicetypes1'}}, terms)
    assert_in({"term": {'filter_serviceTypes': 'servicetypes2'}}, terms)
    assert_in({"term": {'filter_serviceTypes': 'servicetypes3'}}, terms)
def test_should_have_filtered_term_service_types_clause():
    query = construct_query(build_query_params(service_types=["serviceTypes"]))
    assert_equal("term" in
                 query["query"]["filtered"]["filter"]["bool"]["must"][0], True)
    assert_equal(
        query["query"]["filtered"]["filter"]
        ["bool"]["must"][0]["term"]["filter_serviceTypes"],
        "servicetypes")
Esempio n. 13
0
def test_should_use_no_non_alphanumeric_characters_in_service_types():
    query = construct_query(
        build_query_params(service_types=["Mys Service TYPes"]))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"]
        ["filter_serviceTypes"], "mysservicetypes")
Esempio n. 14
0
def test_should_use_whitespace_stripped_lowercased_service_types():
    query = construct_query(
        build_query_params(service_types=["My serviceTypes"]))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"]
        ["filter_serviceTypes"], "myservicetypes")
def test_should_have_filtered_term_lot_clause():
    query = construct_query(build_query_params(lot="SaaS"))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]
        ["bool"]["must"][0]["term"]["filter_lot"],
        "saas")
def test_should_use_no_non_alphanumeric_characters_in_service_types():
    query = construct_query(
        build_query_params(service_types=["Mys Service TYPes"]))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]["bool"]["must"][0]
        ["term"]["filter_serviceTypes"],
        "mysservicetypes")
def test_should_use_whitespace_stripped_lowercased_service_types():
    query = construct_query(build_query_params(
        service_types=["My serviceTypes"]))
    assert_equal(
        "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0],
        True)
    assert_equal(
        query["query"]["filtered"]["filter"]
        ["bool"]["must"][0]["term"]["filter_serviceTypes"],
        "myservicetypes")
Esempio n. 18
0
def test_should_have_filtered_term_service_types_clause(services_mapping):
    query = construct_query(
        services_mapping,
        build_query_params(filters={'serviceCategories': ["serviceCategories"]})
    )
    assert "term" in query["query"]["bool"]["filter"]["bool"]["must"][0]
    assert (
        query["query"]["bool"]["filter"]["bool"]["must"][0]["term"]["dmfilter_serviceCategories"]
        ==
        "serviceCategories"
    )
Esempio n. 19
0
def test_should_make_multi_match_query_if_keywords_supplied(services_mapping):
    keywords = "these are my keywords"
    query = construct_query(services_mapping, build_query_params(keywords))
    assert "query" in query
    assert "simple_query_string" in query["query"]
    query_string_clause = query["query"]["simple_query_string"]
    assert query_string_clause["query"] == keywords
    assert query_string_clause["default_operator"] == "and"
    assert frozenset(query_string_clause["fields"]) == frozenset(
        "_".join(("dmtext", f)) for f in services_mapping.fields_by_prefix["dmtext"]
    )
Esempio n. 20
0
def test_should_have_filtered_root_element_and_match_keywords(services_mapping):
    query = construct_query(
        services_mapping,
        build_query_params(
            keywords="some keywords",
            filters={'serviceCategories': ["my serviceCategories"]}
        )
    )["query"]["bool"]["must"]

    assert "simple_query_string" in query
    query_string_clause = query["simple_query_string"]
    assert query_string_clause["query"] == "some keywords"
    assert query_string_clause["default_operator"] == "and"
    assert frozenset(query_string_clause["fields"]) == frozenset(
        "_".join(("dmtext", f)) for f in services_mapping.fields_by_prefix["dmtext"]
    )
Esempio n. 21
0
def test_should_have_filtered_term_for_multiple_service_types_clauses(services_mapping):
    query = construct_query(
        services_mapping,
        build_query_params(
            filters={
                'serviceCategories': [
                    "serviceCategories1",
                    "serviceCategories2",
                    "serviceCategories3"
                ]
            }
        )
    )
    terms = query["query"]["bool"]["filter"]["bool"]["must"]
    assert {"term": {'dmfilter_serviceCategories': 'serviceCategories1'}} in terms
    assert {"term": {'dmfilter_serviceCategories': 'serviceCategories2'}} in terms
    assert {"term": {'dmfilter_serviceCategories': 'serviceCategories3'}} in terms
Esempio n. 22
0
def test_highlight_block_contains_correct_fields():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal("highlight" in query, True)

    cases = [("id", True), ("lot", True), ("serviceName", True),
             ("serviceSummary", True), ("serviceFeatures", True),
             ("serviceBenefits", True), ("serviceTypes", True),
             ("supplierName", True)]

    for example, expected in cases:
        yield \
            assert_equal, \
            example in query["highlight"]["fields"], \
            expected, \
            example
def test_should_make_multi_match_query_if_keywords_supplied():
    keywords = "these are my keywords"
    query = construct_query(build_query_params(keywords))
    assert_equal("query" in query, True)
    assert_in("simple_query_string", query["query"])
    query_string_clause = query["query"]["simple_query_string"]
    assert_equal(query_string_clause["query"], keywords)
    assert_equal(query_string_clause["default_operator"], "and")
    assert_equal(query_string_clause["fields"], [
        "frameworkName",
        "id",
        "lot",
        "serviceBenefits",
        "serviceFeatures",
        "serviceName",
        "serviceSummary",
        "serviceTypes",
        "supplierName",
    ])
Esempio n. 24
0
def test_should_make_multi_match_query_if_keywords_supplied():
    keywords = "these are my keywords"
    query = construct_query(build_query_params(keywords))
    assert_equal("query" in query, True)
    assert_in("simple_query_string", query["query"])
    query_string_clause = query["query"]["simple_query_string"]
    assert_equal(query_string_clause["query"], keywords)
    assert_equal(query_string_clause["default_operator"], "and")
    assert_equal(query_string_clause["fields"], [
        "frameworkName",
        "id",
        "lot",
        "serviceBenefits",
        "serviceFeatures",
        "serviceName",
        "serviceSummary",
        "serviceTypes",
        "supplierName",
    ])
Esempio n. 25
0
def test_should_have_filtered_root_element_and_match_keywords():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["my serviceTypes"
                                          ]))["query"]["filtered"]["query"]
    assert_in("simple_query_string", query)
    query_string_clause = query["simple_query_string"]
    assert_equal(query_string_clause["query"], "some keywords")
    assert_equal(query_string_clause["default_operator"], "and")
    assert_equal(query_string_clause["fields"], [
        "frameworkName",
        "id",
        "lot",
        "serviceBenefits",
        "serviceFeatures",
        "serviceName",
        "serviceSummary",
        "serviceTypes",
        "supplierName",
    ])
def test_should_have_filtered_root_element_and_match_keywords():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["my serviceTypes"])
    )["query"]["filtered"]["query"]
    assert_in("simple_query_string", query)
    query_string_clause = query["simple_query_string"]
    assert_equal(query_string_clause["query"], "some keywords")
    assert_equal(query_string_clause["default_operator"], "and")
    assert_equal(query_string_clause["fields"], [
        "frameworkName",
        "id",
        "lot",
        "serviceBenefits",
        "serviceFeatures",
        "serviceName",
        "serviceSummary",
        "serviceTypes",
        "supplierName",
    ])
def test_highlight_block_contains_correct_fields():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal("highlight" in query, True)

    cases = [
        ("id", True),
        ("lot", True),
        ("serviceName", True),
        ("serviceSummary", True),
        ("serviceFeatures", True),
        ("serviceBenefits", True),
        ("serviceTypes", True),
        ("supplierName", True)
    ]

    for example, expected in cases:
        yield \
            assert_equal, \
            example in query["highlight"]["fields"], \
            expected, \
            example
Esempio n. 28
0
def test_should_have_no_from_by_default():
    assert_false("from" in construct_query(build_query_params()))
def test_should_have_filtered_root_element_if_service_types_search():
    query = construct_query(build_query_params(
        service_types=["my serviceTypes"]))
    assert_equal("query" in query, True)
    assert_equal("filtered" in query["query"], True)
Esempio n. 30
0
def test_page_should_set_from_parameter():
    assert_equal(construct_query(build_query_params(page=2))["from"], 100)
Esempio n. 31
0
def test_should_have_correct_root_element():
    assert_equal("query" in construct_query(build_query_params()), True)
def test_should_be_able_to_override_pagesize():
    assert_equal(construct_query(build_query_params(), 10)["size"], 10)
Esempio n. 33
0
def test_should_have_filtered_root_element_if_service_types_search():
    query = construct_query(
        build_query_params(service_types=["my serviceTypes"]))
    assert_equal("query" in query, True)
    assert_equal("filtered" in query["query"], True)
Esempio n. 34
0
def test_should_have_filtered_root_element_and_match_all_if_no_keywords():
    query = construct_query(
        build_query_params(service_types=["my serviceTypes"]))
    assert_equal("match_all" in query["query"]["filtered"]["query"], True)
def test_should_have_filtered_term_for_lot_and_service_types_clause():
    query = construct_query(
        build_query_params(lot="SaaS", service_types=["serviceTypes"]))
    terms = query["query"]["filtered"]["filter"]["bool"]["must"]
    assert_in({"term": {'filter_serviceTypes': 'servicetypes'}}, terms)
    assert_in({"term": {'filter_lot': 'saas'}}, terms)
def core_search_and_aggregate(index_name, doc_type, query_args, search=False, aggregations=[]):
    try:
        mapping = app.mapping.get_mapping(index_name, doc_type)
        page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE'])
        if 'idOnly' in query_args:
            page_size *= int(current_app.config['DM_ID_ONLY_SEARCH_PAGE_SIZE_MULTIPLIER'])

        es_search_kwargs = {'search_type': 'dfs_query_then_fetch'} if search else {}
        constructed_query = construct_query(mapping, query_args, aggregations, page_size)
        with logged_duration_for_external_request('es'):
            res = es.search(
                index=index_name, body=constructed_query, track_total_hits=True, **es_search_kwargs
            )

        results = convert_es_results(mapping, res, query_args)

        def url_for_search(**kwargs):
            return url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs)

        response = {
            "meta": results['meta'],
            "documents": results['documents'],
            "links": generate_pagination_links(
                query_args, results['meta']['total'],
                page_size, url_for_search
            ),
        }

        if aggregations:
            # Return aggregations in a slightly cleaner format.
            response['aggregations'] = {
                k: {d['key']: d['doc_count'] for d in v['buckets']}
                for k, v in res.get('aggregations', {}).items()
            }

        # determine whether we're actually off the end of the results. ES handles this as a result-less-yet-happy
        # response, but we probably want to turn it into a 404 not least so we can match our behaviour when fetching
        # beyond the `max_result_window` below
        if search and constructed_query.get("from") and not response["documents"]:
            return _page_404_response(query_args.get("page", None))

        return response, 200

    except TransportError as e:
        try:
            root_causes = getattr(e, "info", {}).get("error", {}).get("root_cause", {})
        except AttributeError:
            # Catch if the contents of 'info' has no ability to get attributes
            return _get_an_error_message(e), e.status_code

        if root_causes and root_causes[0].get("reason").startswith("Result window is too large"):
            # in this case we have to fire off another request to determine how we should handle this error...
            # (note minor race condition possible if index is modified between the original call and this one)
            try:
                body = construct_query(mapping, query_args, page_size=None)
                with logged_duration_for_external_request('es'):
                    result_count = es.count(
                        index=index_name,
                        body=body
                    )["count"]
            except TransportError as e:
                return _get_an_error_message(e), e.status_code
            else:
                if result_count < constructed_query.get("from", 0):
                    # there genuinely aren't enough results for this number of pages, so this should be a 404
                    return _page_404_response(query_args.get("page", None))
                # else fall through and allow this to 500 - we probably don't have max_result_window set high enough
                # for the number of results it's possible to access using this index.
        return _get_an_error_message(e), e.status_code

    except ValueError as e:
        return str(e), 400
def test_should_have_highlight_block_on_keyword_search():
    query = construct_query(build_query_params(keywords="some keywords"))

    assert_equal("highlight" in query, True)
def test_should_have_page_size_set():
    assert_equal(construct_query(build_query_params())["size"], 100)
Esempio n. 39
0
def test_should_have_page_size_set():
    assert_equal(construct_query(build_query_params())["size"], 100)
def test_should_have_highlight_block_on_filtered_search():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal("highlight" in query, True)
def test_highlight_block_sets_encoder_to_html():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal(query["highlight"]["encoder"], "html")
Esempio n. 42
0
def test_should_have_match_all_query_if_no_params():
    assert_equal("query" in construct_query(build_query_params()), True)
    assert_equal("match_all" in construct_query(build_query_params())["query"],
                 True)
def test_page_should_set_from_parameter():
    assert_equal(
        construct_query(build_query_params(page=2))["from"], 100)
def test_should_have_match_all_query_if_no_params():
    assert_equal("query" in construct_query(build_query_params()), True)
    assert_equal("match_all" in
                 construct_query(build_query_params())["query"], True)
Esempio n. 45
0
def test_should_have_highlight_block_on_keyword_search():
    query = construct_query(build_query_params(keywords="some keywords"))

    assert_equal("highlight" in query, True)
Esempio n. 46
0
def test_should_have_filtered_root_element_if_lot_search():
    query = construct_query(build_query_params(lot="SaaS"))
    assert_equal("query" in query, True)
    assert_equal("filtered" in query["query"], True)
Esempio n. 47
0
def test_should_have_highlight_block_on_filtered_search():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal("highlight" in query, True)
def test_should_have_no_from_by_default():
    assert_false("from" in construct_query(build_query_params()))
Esempio n. 49
0
def test_highlight_block_sets_encoder_to_html():
    query = construct_query(
        build_query_params(keywords="some keywords",
                           service_types=["some serviceTypes"]))

    assert_equal(query["highlight"]["encoder"], "html")
def test_should_have_correct_root_element():
    assert_equal("query" in construct_query(build_query_params()), True)
def test_should_have_filtered_root_element_and_match_all_if_no_keywords():
    query = construct_query(build_query_params(
        service_types=["my serviceTypes"]))
    assert_equal("match_all" in query["query"]["filtered"]["query"], True)
def test_should_have_filtered_root_element_if_lot_search():
    query = construct_query(build_query_params(lot="SaaS"))
    assert_equal("query" in query, True)
    assert_equal("filtered" in query["query"], True)
Esempio n. 53
0
def test_should_be_able_to_override_pagesize():
    assert_equal(construct_query(build_query_params(), 10)["size"], 10)