def test_service_id_hash_not_in_searched_fields(services_mapping): query = construct_query(services_mapping, build_query_params(keywords="some keywords")) assert not any("serviceIdHash" in key for key in query['query']['simple_query_string']['fields']) query = construct_query(services_mapping, build_query_params(filters={'serviceCategories': ["serviceType1"]})) assert not any("serviceIdHash" in key for key in query['highlight']['fields'])
def keyword_search(index_name, doc_type, query_args): try: page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) res = es.search( index=index_name, doc_type=doc_type, body=construct_query(query_args, page_size) ) results = convert_es_results(res, query_args) url_for_search = lambda **kwargs: \ url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "services": results['services'], "links": generate_pagination_links( query_args, results['meta']['total'], page_size, url_for_search ) } return response, 200 except TransportError as e: return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def test_aggregations_terms_added_for_each_param(services_mapping, aggregations): query = construct_query(services_mapping, build_query_params(), aggregations=aggregations) assert set(aggregations) == {x for x in query['aggregations']} assert {"_".join(("dmagg", x)) for x in aggregations} == { v['terms']['field'] for k, v in query['aggregations'].items() }
def keyword_search(index_name, doc_type, query_args): try: page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) res = es.search(index=index_name, doc_type=doc_type, body=construct_query(query_args, page_size)) results = convert_es_results(res, query_args) url_for_search = lambda **kwargs: \ url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "services": results['services'], "links": generate_pagination_links(query_args, results['meta']['total'], page_size, url_for_search) } return response, 200 except TransportError as e: return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def test_should_not_filter_on_unknown_keys(services_mapping): params = build_query_params(filters={'lot': "SaaS", 'serviceCategories': ["serviceCategories"]}) params.add("this", "that") query = construct_query(services_mapping, params) terms = query["query"]["bool"]["filter"]["bool"]["must"] assert {"term": {'dmfilter_serviceCategories': 'serviceCategories'}} in terms assert {"term": {'dmfilter_lot': 'SaaS'}} in terms assert {"term": {'unknown': 'something to ignore'}} not in terms
def test_should_have_filtered_term_service_types_clause(): query = construct_query(build_query_params(service_types=["serviceTypes"])) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"] ["filter_serviceTypes"], "servicetypes")
def test_should_have_filtered_term_lot_clause(): query = construct_query(build_query_params(lot="SaaS")) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"] ["filter_lot"], "saas")
def test_should_not_filter_on_unknown_keys(): params = build_query_params(lot="SaaS", service_types=["serviceTypes"]) params.add("this", "that") query = construct_query(params) terms = query["query"]["filtered"]["filter"]["bool"]["must"] assert_in({"term": {'filter_serviceTypes': 'servicetypes'}}, terms) assert_in({"term": {'filter_lot': 'saas'}}, terms) assert_not_in({"term": {'unknown': 'something to ignore'}}, terms)
def test_should_have_filtered_term_for_multiple_service_types_clauses(): query = construct_query( build_query_params( service_types=["serviceTypes1", "serviceTypes2", "serviceTypes3"])) terms = query["query"]["filtered"]["filter"]["bool"]["must"] assert_in({"term": {'filter_serviceTypes': 'servicetypes1'}}, terms) assert_in({"term": {'filter_serviceTypes': 'servicetypes2'}}, terms) assert_in({"term": {'filter_serviceTypes': 'servicetypes3'}}, terms)
def test_should_have_filtered_term_service_types_clause(): query = construct_query(build_query_params(service_types=["serviceTypes"])) assert_equal("term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"] ["bool"]["must"][0]["term"]["filter_serviceTypes"], "servicetypes")
def test_should_use_no_non_alphanumeric_characters_in_service_types(): query = construct_query( build_query_params(service_types=["Mys Service TYPes"])) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"] ["filter_serviceTypes"], "mysservicetypes")
def test_should_use_whitespace_stripped_lowercased_service_types(): query = construct_query( build_query_params(service_types=["My serviceTypes"])) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"]["bool"]["must"][0]["term"] ["filter_serviceTypes"], "myservicetypes")
def test_should_have_filtered_term_lot_clause(): query = construct_query(build_query_params(lot="SaaS")) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"] ["bool"]["must"][0]["term"]["filter_lot"], "saas")
def test_should_use_no_non_alphanumeric_characters_in_service_types(): query = construct_query( build_query_params(service_types=["Mys Service TYPes"])) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"]["bool"]["must"][0] ["term"]["filter_serviceTypes"], "mysservicetypes")
def test_should_use_whitespace_stripped_lowercased_service_types(): query = construct_query(build_query_params( service_types=["My serviceTypes"])) assert_equal( "term" in query["query"]["filtered"]["filter"]["bool"]["must"][0], True) assert_equal( query["query"]["filtered"]["filter"] ["bool"]["must"][0]["term"]["filter_serviceTypes"], "myservicetypes")
def test_should_have_filtered_term_service_types_clause(services_mapping): query = construct_query( services_mapping, build_query_params(filters={'serviceCategories': ["serviceCategories"]}) ) assert "term" in query["query"]["bool"]["filter"]["bool"]["must"][0] assert ( query["query"]["bool"]["filter"]["bool"]["must"][0]["term"]["dmfilter_serviceCategories"] == "serviceCategories" )
def test_should_make_multi_match_query_if_keywords_supplied(services_mapping): keywords = "these are my keywords" query = construct_query(services_mapping, build_query_params(keywords)) assert "query" in query assert "simple_query_string" in query["query"] query_string_clause = query["query"]["simple_query_string"] assert query_string_clause["query"] == keywords assert query_string_clause["default_operator"] == "and" assert frozenset(query_string_clause["fields"]) == frozenset( "_".join(("dmtext", f)) for f in services_mapping.fields_by_prefix["dmtext"] )
def test_should_have_filtered_root_element_and_match_keywords(services_mapping): query = construct_query( services_mapping, build_query_params( keywords="some keywords", filters={'serviceCategories': ["my serviceCategories"]} ) )["query"]["bool"]["must"] assert "simple_query_string" in query query_string_clause = query["simple_query_string"] assert query_string_clause["query"] == "some keywords" assert query_string_clause["default_operator"] == "and" assert frozenset(query_string_clause["fields"]) == frozenset( "_".join(("dmtext", f)) for f in services_mapping.fields_by_prefix["dmtext"] )
def test_should_have_filtered_term_for_multiple_service_types_clauses(services_mapping): query = construct_query( services_mapping, build_query_params( filters={ 'serviceCategories': [ "serviceCategories1", "serviceCategories2", "serviceCategories3" ] } ) ) terms = query["query"]["bool"]["filter"]["bool"]["must"] assert {"term": {'dmfilter_serviceCategories': 'serviceCategories1'}} in terms assert {"term": {'dmfilter_serviceCategories': 'serviceCategories2'}} in terms assert {"term": {'dmfilter_serviceCategories': 'serviceCategories3'}} in terms
def test_highlight_block_contains_correct_fields(): query = construct_query( build_query_params(keywords="some keywords", service_types=["some serviceTypes"])) assert_equal("highlight" in query, True) cases = [("id", True), ("lot", True), ("serviceName", True), ("serviceSummary", True), ("serviceFeatures", True), ("serviceBenefits", True), ("serviceTypes", True), ("supplierName", True)] for example, expected in cases: yield \ assert_equal, \ example in query["highlight"]["fields"], \ expected, \ example
def test_should_make_multi_match_query_if_keywords_supplied(): keywords = "these are my keywords" query = construct_query(build_query_params(keywords)) assert_equal("query" in query, True) assert_in("simple_query_string", query["query"]) query_string_clause = query["query"]["simple_query_string"] assert_equal(query_string_clause["query"], keywords) assert_equal(query_string_clause["default_operator"], "and") assert_equal(query_string_clause["fields"], [ "frameworkName", "id", "lot", "serviceBenefits", "serviceFeatures", "serviceName", "serviceSummary", "serviceTypes", "supplierName", ])
def test_should_have_filtered_root_element_and_match_keywords(): query = construct_query( build_query_params(keywords="some keywords", service_types=["my serviceTypes" ]))["query"]["filtered"]["query"] assert_in("simple_query_string", query) query_string_clause = query["simple_query_string"] assert_equal(query_string_clause["query"], "some keywords") assert_equal(query_string_clause["default_operator"], "and") assert_equal(query_string_clause["fields"], [ "frameworkName", "id", "lot", "serviceBenefits", "serviceFeatures", "serviceName", "serviceSummary", "serviceTypes", "supplierName", ])
def test_should_have_filtered_root_element_and_match_keywords(): query = construct_query( build_query_params(keywords="some keywords", service_types=["my serviceTypes"]) )["query"]["filtered"]["query"] assert_in("simple_query_string", query) query_string_clause = query["simple_query_string"] assert_equal(query_string_clause["query"], "some keywords") assert_equal(query_string_clause["default_operator"], "and") assert_equal(query_string_clause["fields"], [ "frameworkName", "id", "lot", "serviceBenefits", "serviceFeatures", "serviceName", "serviceSummary", "serviceTypes", "supplierName", ])
def test_highlight_block_contains_correct_fields(): query = construct_query( build_query_params(keywords="some keywords", service_types=["some serviceTypes"])) assert_equal("highlight" in query, True) cases = [ ("id", True), ("lot", True), ("serviceName", True), ("serviceSummary", True), ("serviceFeatures", True), ("serviceBenefits", True), ("serviceTypes", True), ("supplierName", True) ] for example, expected in cases: yield \ assert_equal, \ example in query["highlight"]["fields"], \ expected, \ example
def test_should_have_no_from_by_default(): assert_false("from" in construct_query(build_query_params()))
def test_should_have_filtered_root_element_if_service_types_search(): query = construct_query(build_query_params( service_types=["my serviceTypes"])) assert_equal("query" in query, True) assert_equal("filtered" in query["query"], True)
def test_page_should_set_from_parameter(): assert_equal(construct_query(build_query_params(page=2))["from"], 100)
def test_should_have_correct_root_element(): assert_equal("query" in construct_query(build_query_params()), True)
def test_should_be_able_to_override_pagesize(): assert_equal(construct_query(build_query_params(), 10)["size"], 10)
def test_should_have_filtered_root_element_if_service_types_search(): query = construct_query( build_query_params(service_types=["my serviceTypes"])) assert_equal("query" in query, True) assert_equal("filtered" in query["query"], True)
def test_should_have_filtered_root_element_and_match_all_if_no_keywords(): query = construct_query( build_query_params(service_types=["my serviceTypes"])) assert_equal("match_all" in query["query"]["filtered"]["query"], True)
def test_should_have_filtered_term_for_lot_and_service_types_clause(): query = construct_query( build_query_params(lot="SaaS", service_types=["serviceTypes"])) terms = query["query"]["filtered"]["filter"]["bool"]["must"] assert_in({"term": {'filter_serviceTypes': 'servicetypes'}}, terms) assert_in({"term": {'filter_lot': 'saas'}}, terms)
def core_search_and_aggregate(index_name, doc_type, query_args, search=False, aggregations=[]): try: mapping = app.mapping.get_mapping(index_name, doc_type) page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) if 'idOnly' in query_args: page_size *= int(current_app.config['DM_ID_ONLY_SEARCH_PAGE_SIZE_MULTIPLIER']) es_search_kwargs = {'search_type': 'dfs_query_then_fetch'} if search else {} constructed_query = construct_query(mapping, query_args, aggregations, page_size) with logged_duration_for_external_request('es'): res = es.search( index=index_name, body=constructed_query, track_total_hits=True, **es_search_kwargs ) results = convert_es_results(mapping, res, query_args) def url_for_search(**kwargs): return url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "documents": results['documents'], "links": generate_pagination_links( query_args, results['meta']['total'], page_size, url_for_search ), } if aggregations: # Return aggregations in a slightly cleaner format. response['aggregations'] = { k: {d['key']: d['doc_count'] for d in v['buckets']} for k, v in res.get('aggregations', {}).items() } # determine whether we're actually off the end of the results. ES handles this as a result-less-yet-happy # response, but we probably want to turn it into a 404 not least so we can match our behaviour when fetching # beyond the `max_result_window` below if search and constructed_query.get("from") and not response["documents"]: return _page_404_response(query_args.get("page", None)) return response, 200 except TransportError as e: try: root_causes = getattr(e, "info", {}).get("error", {}).get("root_cause", {}) except AttributeError: # Catch if the contents of 'info' has no ability to get attributes return _get_an_error_message(e), e.status_code if root_causes and root_causes[0].get("reason").startswith("Result window is too large"): # in this case we have to fire off another request to determine how we should handle this error... # (note minor race condition possible if index is modified between the original call and this one) try: body = construct_query(mapping, query_args, page_size=None) with logged_duration_for_external_request('es'): result_count = es.count( index=index_name, body=body )["count"] except TransportError as e: return _get_an_error_message(e), e.status_code else: if result_count < constructed_query.get("from", 0): # there genuinely aren't enough results for this number of pages, so this should be a 404 return _page_404_response(query_args.get("page", None)) # else fall through and allow this to 500 - we probably don't have max_result_window set high enough # for the number of results it's possible to access using this index. return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def test_should_have_highlight_block_on_keyword_search(): query = construct_query(build_query_params(keywords="some keywords")) assert_equal("highlight" in query, True)
def test_should_have_page_size_set(): assert_equal(construct_query(build_query_params())["size"], 100)
def test_should_have_highlight_block_on_filtered_search(): query = construct_query( build_query_params(keywords="some keywords", service_types=["some serviceTypes"])) assert_equal("highlight" in query, True)
def test_highlight_block_sets_encoder_to_html(): query = construct_query( build_query_params(keywords="some keywords", service_types=["some serviceTypes"])) assert_equal(query["highlight"]["encoder"], "html")
def test_should_have_match_all_query_if_no_params(): assert_equal("query" in construct_query(build_query_params()), True) assert_equal("match_all" in construct_query(build_query_params())["query"], True)
def test_page_should_set_from_parameter(): assert_equal( construct_query(build_query_params(page=2))["from"], 100)
def test_should_have_filtered_root_element_if_lot_search(): query = construct_query(build_query_params(lot="SaaS")) assert_equal("query" in query, True) assert_equal("filtered" in query["query"], True)
def test_should_have_filtered_root_element_and_match_all_if_no_keywords(): query = construct_query(build_query_params( service_types=["my serviceTypes"])) assert_equal("match_all" in query["query"]["filtered"]["query"], True)