def test_fetch_dashboard_search_results(self, mock_search: MagicMock) -> None: mock_dashboard_result = Dashboard(id='mode_dashboard', uri='dashboard_uri', cluster='gold', group_name='mode_dashboard_group', group_url='mode_dashboard_group_url', product='mode', name='mode_dashboard', url='mode_dashboard_url', description='test_dashboard', last_successful_run_timestamp=1000) mock_search.return_value = SearchResult(total_results=1, results=[mock_dashboard_result]) expected = SearchResult(total_results=1, results=[Dashboard(id='mode_dashboard', uri='dashboard_uri', cluster='gold', group_name='mode_dashboard_group', group_url='mode_dashboard_group_url', product='mode', name='mode_dashboard', url='mode_dashboard_url', description='test_dashboard', last_successful_run_timestamp=1000)]) resp = self.es_proxy.fetch_dashboard_search_results(query_term='test_query_term', page_index=0, index='dashboard_search_index') self.assertEqual(resp.total_results, expected.total_results) self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]), "Search result doesn't match with expected result!")
def test_search_match_with_field(self, mock_search: MagicMock) -> None: mock_search.return_value = SearchResult(total_results=1, results=[self.mock_result3]) expected = SearchResult(total_results=1, results=[Table(name='test_table3', key='test_key3', description='test_description3', cluster='gold', database='test_db3', schema='test_schema3', column_names=['test_col1', 'test_col2'], tags=[self.mock_tag], badges=[self.mock_badge], last_updated_timestamp=1527283287)]) resp = self.es_proxy.fetch_table_search_results_with_field(query_term='test_query_term', field_name='tag', field_value='match') self.assertEquals(resp.total_results, expected.total_results) self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]), "Search result doesn't match with expected result!")
def fetch_table_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: """ Conduct a 'Basic Search' in Amundsen UI. Atlas Basic Search API is used for that operation. We search on `qualifiedName` field as (following Atlas documentation) any 'Referencable' entity 'can be searched for using a unique attribute called qualifiedName'. It provides best performance, simplicity and sorting by popularityScore. :param query_term: Search Query Term :param page_index: Index of search page user is currently on (for pagination) :param index: Search Index (different resource corresponding to different index) :return: SearchResult Object """ if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) # @todo switch to search with 'query' not 'filters' once Atlas FreeTextSearchProcessor is fixed # https://reviews.apache.org/r/72440/ filters = [(self.ATLAS_QN_ATTRIBUTE, 'CONTAINS', query_term)] # conduct search using filter on qualifiedName (it already contains both dbName and tableName) # and table description query_params = self._prepare_basic_search_query(self.page_size, page_index, filters=filters, operator='OR') tables, approx_count = self._atlas_basic_search(query_params) return SearchResult(total_results=approx_count, results=tables)
def fetch_table_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: """ Query Atlas and return results as list of Table objects. Using Basic Search for this basic searching. :param query_term: search query term :param page_index: index of search page user is currently on :param index: search index (different resource corresponding to different index) :return: SearchResult Object """ if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) query_params = { 'typeName': 'Table', 'excludeDeletedEntities': True, 'limit': self.page_size, 'offset': page_index * self.page_size, 'query': f'*{query_term}*', 'attributes': ['description', 'comment'] } tables, approx_count = self._fetch_tables(query_params) return SearchResult(total_results=approx_count, results=tables)
def fetch_table_search_results_with_filter( self, *, query_term: str, search_request: dict, page_index: int = 0, index: str = '') -> SearchResult: """ Query Elasticsearch and return results as list of Table objects :param search_request: A json representation of search request :param page_index: index of search page user is currently on :param index: current index for search. Provide different index for different resource. :return: SearchResult Object """ current_index = index if index else \ current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX) # type: str if not search_request: # return empty result for blank query term return SearchResult(total_results=0, results=[]) try: query_string = self.convert_query_json_to_query_dsl( search_request=search_request, query_term=query_term) # type: str except Exception as e: LOGGING.exception(e) # return nothing if any exception is thrown under the hood return SearchResult(total_results=0, results=[]) s = Search(using=self.elasticsearch, index=current_index) query_name = { "function_score": { "query": { "query_string": { "query": query_string } }, "field_value_factor": { "field": "total_usage", "modifier": "log2p" } } } model = self.get_model_by_index(current_index) return self._search_helper(page_index=page_index, client=s, query_name=query_name, model=model)
def _get_search_result(self, page_index: int, client: Search) -> SearchResult: """ Common helper function to get search result. :param page_index: :param client :return: """ table_results = [] # Use {page_index} to calculate index of results to fetch from start_from = page_index * self.page_size end_at = start_from + self.page_size client = client[start_from:end_at] response = client.execute() for hit in response: table = Table(name=hit.table_name, key=hit.table_key, description=hit.table_description, cluster=hit.cluster, database=hit.database, schema_name=hit.schema_name, column_names=hit.column_names, tags=hit.tag_names, last_updated_epoch=hit.table_last_updated_epoch) table_results.append(table) return SearchResult(total_results=response.hits.total, results=table_results)
def fetch_search_results_with_field(self, *, query_term: str, field_name: str, field_value: str, page_index: int = 0) -> SearchResult: """ Query Atlas and return results as list of Table objects. Per field name we have a count query and a query for the tables. https://atlas.apache.org/Search-Advanced.html :param query_term: search query term :param field_name: field name to do the searching(e.g schema_name, tag_names) :param field_value: value for the field for filtering :param page_index: index of search page user is currently on :return: SearchResult Object :return: """ sql = f"Table from Table where false" count_sql = f"{sql} select count()" if field_name == 'tag': sql = f"from Table where Table is '{field_value}'" count_sql = f"{sql} select count()" elif field_name == 'schema': sql = f"from Table where db.name like '{field_value}'" count_sql = f"{sql} select count()" elif field_name == 'table': sql = f"from Table where name like '{field_value}'" count_sql = f"{sql} select count()" elif field_name == 'column': sql = f"hive_column where name like '{field_value}' select table" # TODO nanne: count tables instead of columns count_sql = f"hive_column where name like '{field_value}' select count()" LOGGER.debug(f"Used following sql query: {sql}") tables: List[Table] = [] count_value = 0 try: # count results count_params = {'query': count_sql} count_results = list(self.atlas.search_dsl(**count_params))[0] count_value = count_results._data['attributes']['values'][0][0] params = { 'query': f"{sql} limit {self.page_size} offset {page_index * self.page_size}" } search_results = self.atlas.search_dsl(**params) if count_value > 0 and page_index * self.page_size <= count_value: # unpack all collections (usually just one collection though) for collection in search_results: if hasattr(collection, 'entities'): tables.extend( self._parse_results(response=collection.entities)) except BadRequest: LOGGER.error("Atlas Search DSL error with the following query:", sql) return SearchResult(total_results=count_value, results=tables)
def test_search_tag_table(self) -> None: fields = ['tag', 'table'] for field in fields: expected = SearchResult(total_results=1, results=[Table(name=self.entity1_name, key=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", description=self.entity1_description, cluster=self.cluster, database=self.entity_type, schema_name=self.db, column_names=[], tags=[], last_updated_epoch=123)]) entity1 = self.to_class(self.entity1) entity_collection = MagicMock() entity_collection.entities = [entity1] entity_collection._data = {'approximateCount': 1} result = MagicMock(return_value=entity_collection) with patch.object(self.proxy.atlas.search_basic, 'create', result): resp = self.proxy.fetch_table_search_results_with_field( query_term=field + "Table1", field_name=field, field_value="Table1" ) self.assertTrue(resp.total_results == 1, "there should be 1 search result") self.assertIsInstance(resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def fetch_user_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: if not index: raise Exception('Index cant be empty for user search') if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) s = Search(using=self.elasticsearch, index=index) # Don't use any weight(total_follow, total_own, total_use) query_name = { "function_score": { "query": { "multi_match": { "query": query_term, "fields": [ "name.raw^30", "name^5", "first_name.raw^5", "last_name.raw^5", "first_name^3", "last_name^3", "email^3" ] } } } } return self._search_helper(page_index=page_index, client=s, query_name=query_name, model=User)
def test_search_with_one_user_result(self, mock_search: MagicMock) -> None: mock_results = MagicMock() mock_results.hits.total = 1 mock_results.__iter__.return_value = [Response(result=vars(self.mock_result4))] mock_search.return_value = mock_results expected = SearchResult(total_results=1, results=[User(full_name='First Last', first_name='First', last_name='Last', team_name='Test team', email='*****@*****.**', github_username='******', manager_email='*****@*****.**', is_active=True, employee_type='FTE')]) resp = self.es_proxy.fetch_user_search_results(query_term='test_query_term', index='user_search_index') self.assertEquals(resp.total_results, expected.total_results, "search result is not of length 1") self.assertIsInstance(resp.results[0], User, "Search result received is not of 'Table' type!") self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_normal(self) -> None: expected = SearchResult(total_results=2, results=[ Table(name=self.entity1_name, key=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", description=self.entity1_description, cluster=self.cluster, database=self.entity_type, schema=self.db, column_names=[], tags=[Tag(tag_name='PII_DATA')], badges=[Tag(tag_name='PII_DATA')], last_updated_timestamp=123) ]) entity1 = self.to_class(self.entity1) entity_collection = MagicMock() entity_collection.entities = [entity1] entity_collection._data = {'approximateCount': 1} result = MagicMock(return_value=entity_collection) with patch.object(self.proxy.atlas.search_basic, 'create', result): resp = self.proxy.fetch_table_search_results(query_term="Table") self.assertEquals(resp.total_results, 1) self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_with_one_table_result(self, mock_search: MagicMock) -> None: mock_results = MagicMock() mock_results.hits.total = 1 mock_results.__iter__.return_value = [Response(result=vars(self.mock_result1))] mock_search.return_value = mock_results expected = SearchResult(total_results=1, results=[Table(name='test_table', key='test_key', description='test_description', cluster='gold', database='test_db', schema='test_schema', column_names=['test_col1', 'test_col2'], tags=[], badges=self.mock_empty_badge, last_updated_timestamp=1527283287)]) resp = self.es_proxy.fetch_table_search_results(query_term='test_query_term') self.assertEquals(resp.total_results, expected.total_results, "search result is not of length 1") self.assertIsInstance(resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_table_filter(self, mock_search: MagicMock) -> None: mock_results = MagicMock() mock_results.hits.total = 1 mock_results.__iter__.return_value = [Response(result=vars(self.mock_result1))] mock_search.return_value = mock_results expected = SearchResult(total_results=1, results=[Table(name='test_table', key='test_key', description='test_description', cluster='gold', database='test_db', schema='test_schema', column_names=['test_col1', 'test_col2'], tags=self.mock_empty_tag, badges=self.mock_empty_badge, last_updated_timestamp=1527283287)]) search_request = { 'type': 'AND', 'filters': { 'database': ['hive', 'bigquery'], 'schema': ['test-schema1', 'test-schema2'], 'table': ['*amundsen*'], 'column': ['*ds*'], 'tag': ['test-tag'], } } resp = self.es_proxy.fetch_table_search_results_with_filter(search_request=search_request, query_term='test') self.assertEquals(resp.total_results, expected.total_results) self.assertIsInstance(resp.results[0], Table) self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]))
def test_unknown_field(self) -> None: expected = SearchResult(total_results=0, results=[]) self.proxy.atlas.search_dsl = self.dsl_inject([ (lambda dsl: "select count()" in dsl, { "attributes": { "name": ["count()"], "values": [[0]] } }), (lambda dsl: any(x in dsl for x in ["select table", "from Table"]), { 'entities': [] }) ]) self.proxy.atlas.entity_bulk = self.bulk_inject( [self.entity1, self.entity2, self.db_entity]) resp = self.proxy.fetch_table_search_results( query_term="unknown:Table1") self.assertTrue(resp.total_results == 0, "there should no search results") self.assertIsInstance( resp, SearchResult, "Search result received is not of 'SearchResult' type!") self.assertDictEqual( vars(resp), vars(expected), "Search Result doesn't match with expected result!")
def fetch_table_search_results_with_filter( self, *, query_term: str, search_request: dict, page_index: int = 0, index: str = '') -> SearchResult: """ Conduct an 'Advanced Search' to narrow down search results with a use of filters. Using Atlas Basic Search with filters to retrieve precise results and sort them by popularity score. :param query_term: A Search Query Term :param search_request: Values from Filters :param page_index: Index of search page user is currently on (for pagination) :param index: Search Index (different resource corresponding to different index) :return: SearchResult Object """ _filters = search_request.get('filters', dict()) db_filter_value = _filters.get('database') table_filter_value = _filters.get('table') cluster_filter_value = _filters.get('cluster') badges_filter_value = _filters.get('badges', list()) tags_filter_value = _filters.get('tag', list()) filters = list() # qualifiedName follows pattern ${db}.${table}@${cluster} if db_filter_value: filters.append((self.ATLAS_QN_ATTRIBUTE, 'STARTSWITH', db_filter_value[0] + '.')) if cluster_filter_value: filters.append((self.ATLAS_QN_ATTRIBUTE, 'ENDSWITH', '@' + cluster_filter_value[0])) if table_filter_value: filters.append(('name', 'CONTAINS', table_filter_value[0])) classifications: List[str] = list() # noqa: E701 if badges_filter_value or tags_filter_value: classifications = list(set(badges_filter_value + tags_filter_value)) # Currently Atlas doesn't allow mixing search by filters and classifications if filters: query_params = self._prepare_basic_search_query(self.page_size, page_index, filters=filters) elif classifications: query_params = self._prepare_basic_search_query( self.page_size, page_index, classifications=classifications) tables, approx_count = self._atlas_basic_search(query_params) return SearchResult(total_results=approx_count, results=tables)
def fetch_table_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: """ Query Atlas and return results as list of Table objects We use the Atlas DSL for querying the tables. https://atlas.apache.org/Search-Advanced.html :param query_term: search query term :param page_index: index of search page user is currently on :param index: search index (different resource corresponding to different index) :return: SearchResult Object """ if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) # define query sql = f"Table from Table " \ f"where name like '*{query_term}*' or " \ f"description like '*{query_term}*' " # count amount of tables count_params = {'query': f"{sql} select count()"} count_results = list(self.atlas.search_dsl(**count_params))[0] count_value = count_results._data['attributes']['values'][0][0] # select tables params = { 'query': f"{sql} " f"limit {self.page_size} " f"offset {page_index * self.page_size}" } search_results = self.atlas.search_dsl(**params) # retrieve results tables = [] if 0 < count_value >= page_index * self.page_size: for s in search_results: tables.extend(self._parse_results(response=s.entities)) return SearchResult(total_results=count_value, results=tables)
def test_search_not_match_with_field(self, mock_search: MagicMock) -> None: mock_search.return_value = SearchResult(total_results=0, results=[]) resp = self.es_proxy.fetch_search_results_with_field( query_term='test_query_term', field_name='tag_names', field_value='match') self.assertEquals(resp.total_results, 0)
def test_search_normal(self): expected = SearchResult( total_results=1, results=[ Table( name=self._qualified('table', 'Table1'), key=f"TEST_ENTITY://TEST_CLUSTER." f"{self._qualified('db', 'TEST_DB')}/" f"{self._qualified('table', 'Table1')}", description='Dummy Description', cluster='TEST_CLUSTER', database='TEST_ENTITY', schema_name=self._qualified('db', 'TEST_DB'), column_names=[ # 'column@name' ], tags=['PII_DATA'], last_updated_epoch=123), Table( name='Table2', key=f"TEST_ENTITY://./Table2", description='Dummy Description', cluster='', database='TEST_ENTITY', schema_name='', column_names=[ # 'column@name' ], tags=[], last_updated_epoch=234) ]) self.proxy.atlas.search_dsl = self.dsl_inject([ (lambda dsl: "select count()" in dsl and "Table" in dsl, { "attributes": { "name": ["count()"], "values": [[2]] } }), (lambda dsl: "Table" in dsl and any( x in dsl for x in ["select table", "from Table"]), { 'entities': [self.entity1, self.entity2] }) ]) self.proxy.atlas.entity_bulk = self.bulk_inject( [self.entity1, self.entity2, self.db_entity]) resp = self.proxy.fetch_table_search_results(query_term="Table") self.assertTrue(resp.total_results == 2, "there should be 2 search result") self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!") self.assertDictEqual( vars(resp.results[1]), vars(expected.results[1]), "Search Result doesn't match with expected result!")
def test_should_give_empty_result_when_there_are_no_results_from_proxy( self) -> None: self.mock_proxy.fetch_table_search_results.return_value = \ SearchResult(total_results=0, results=[]) response = self.app.test_client().get('/search?query_term=searchterm') expected_response = {"total_results": 0, "results": []} self.assertEqual(response.json, expected_response)
def test_search_with_empty_result(self, mock_search: MagicMock) -> None: mock_results = MagicMock() mock_results.hits.total = 0 mock_search.return_value = mock_results expected = SearchResult(total_results=0, results=[]) result = self.es_proxy.fetch_table_search_results(query_term='test_query_term') self.assertDictEqual(vars(result), vars(expected), "Received non-empty search results!")
def test_should_get_default_response_values_when_values_not_in_proxy_response(self) -> None: self.mock_proxy.fetch_table_search_results_with_field.return_value = \ SearchResult(total_results=1, results=[{}]) response = self.app.test_client().get('/search/field/field_name/field_val/myvalue') expected_response = { "total_results": 1, "results": [default_json_response()] } self.assertEqual(response.json, expected_response)
def test_search_with_empty_query_string(self, mock_search: MagicMock) -> None: expected = SearchResult(total_results=0, results=[]) result = self.es_proxy.fetch_table_search_results(query_term='') # check the output was empty list self.assertDictEqual(vars(result), vars(expected), "Received non-empty search results!") # ensure elasticsearch_dsl Search endpoint was not called # assert_not_called doesn't work. See here: http://engineroom.trackmaven.com/blog/mocking-mistakes/ self.assertTrue(mock_search.call_count == 0)
def test_should_get_default_response_values_when_values_not_in_proxy_response( self) -> None: self.mock_proxy.fetch_dashboard_search_results.return_value = \ SearchResult(total_results=1, results=[{}]) response = self.app.test_client().get( '/search_dashboard?query_term=searchterm') expected_response = { "total_results": 1, "results": [default_json_response()] } self.assertEqual(response.json, expected_response)
def test_should_get_result_for_search(self) -> None: result = mock_proxy_results() self.mock_proxy.fetch_dashboard_search_results.return_value = SearchResult(total_results=1, results=[result]) response = self.app.test_client().get('/search_dashboard?query_term=searchterm') expected_response = { "total_results": 1, "results": [mock_json_response()] } self.assertEqual(response.json, expected_response) self.assertEqual(response.status_code, HTTPStatus.OK) self.mock_proxy.fetch_dashboard_search_results.assert_called_with(query_term='searchterm', page_index=0, index='dashboard_search_index')
def test_search_fields(self): fields = ['tag', 'schema', 'table', 'column'] for field in fields: expected = SearchResult( total_results=1, results=[ Table( name=self._qualified('table', 'Table1'), key=f"TEST_ENTITY://TEST_CLUSTER" f".{self._qualified('db', 'TEST_DB')}/" f"{self._qualified('table', 'Table1')}", description='Dummy Description', cluster='TEST_CLUSTER', database='TEST_ENTITY', schema_name=self._qualified('db', 'TEST_DB'), column_names=[ # 'column@name' ], tags=['PII_DATA'], last_updated_epoch=123) ]) self.proxy.atlas.search_dsl = self.dsl_inject([ (lambda dsl: "select count()" in dsl, { "attributes": { "name": ["count()"], "values": [[1]] } }), (lambda dsl: any( x in dsl for x in ["select table", "from Table", "hive_column"]), { 'entities': [self.entity1] }) ]) self.proxy.atlas.entity_bulk = self.bulk_inject( [self.entity1, self.db_entity]) resp = self.proxy.fetch_table_search_results_with_field( query_term=field + "Table1", field_name=field, field_value="Table1") self.assertTrue(resp.total_results == 1, "there should be 1 search result") self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_schema_column(self) -> None: fields = ['schema', 'column'] for field in fields: expected = SearchResult( total_results=1, results=[ Table(name=self.entity1_name, key=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", description=self.entity1_description, cluster=self.cluster, database=self.entity_type, schema=self.db, column_names=[], tags=[Tag(tag_name='PII_DATA')], badges=[], last_updated_timestamp=123) ]) self.proxy.atlas.search_dsl = self.dsl_inject([ (lambda dsl: "select count()" in dsl, { "attributes": { "name": ["count()"], "values": [[1]] } }), (lambda dsl: any( x in dsl for x in ["select table", "from Table", "hive_column"]), { 'entities': [self.entity1] }) ]) self.proxy.atlas.entity_bulk = self.bulk_inject( [self.entity1, self.db_entity]) resp = self.proxy.fetch_table_search_results_with_field( query_term=field + "Table1", field_name=field, field_value="Table1") self.assertTrue(resp.total_results == 1, "there should be 1 search result") self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_with_multiple_result(self, mock_search: MagicMock) -> None: mock_results = MagicMock() mock_results.hits.total = 2 mock_results.__iter__.return_value = [ Response(result=vars(self.mock_result1)), Response(result=vars(self.mock_result2)) ] mock_search.return_value = mock_results expected = SearchResult( total_results=2, results=[ Table(name='test_table', key='test_key', description='test_description', cluster='gold', database='test_db', schema_name='test_schema', column_names=['test_col1', 'test_col2'], tags=[], last_updated_epoch=1527283287), Table(name='test_table2', key='test_key2', description='test_description2', cluster='gold', database='test_db2', schema_name='test_schema2', column_names=['test_col1', 'test_col2'], tags=[], last_updated_epoch=1527283287) ]) resp = self.es_proxy.fetch_table_search_results( query_term='test_query_term') self.assertEquals(resp.total_results, expected.total_results, "search result is not of length 2") for i in range(2): self.assertIsInstance( resp.results[i], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[i]), vars(expected.results[i]), "Search result doesn't match with expected result!")
def fetch_table_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: """ Query Elasticsearch and return results as list of Table objects :param query_term: search query term :param page_index: index of search page user is currently on :param index: current index for search. Provide different index for different resource. :return: SearchResult Object """ current_index = index if index else \ current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX) if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) s = Search(using=self.elasticsearch, index=current_index) query_name = { "function_score": { "query": { "multi_match": { "query": query_term, "fields": [ "display_name^1000", "name.raw^75", "name^5", "schema^3", "description^3", "column_names^2", "column_descriptions", "tags", "badges" ], } }, "field_value_factor": { "field": "total_usage", "modifier": "log2p" } } } return self._search_helper(page_index=page_index, client=s, query_name=query_name, model=Table)
def fetch_dashboard_search_results(self, *, query_term: str, page_index: int = 0, index: str = '') -> SearchResult: """ Fetch dashboard search result with fuzzy search :param query_term: :param page_index: :param index: :return: """ current_index = index if index else \ current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX) if not query_term: # return empty result for blank query term return SearchResult(total_results=0, results=[]) s = Search(using=self.elasticsearch, index=current_index) query_name = { "function_score": { "query": { "multi_match": { "query": query_term, "fields": [ "name.raw^75", "name^5", "group_name.raw^5", "description^3", "query_names^3" ] } }, "field_value_factor": { "field": "total_usage", "modifier": "log2p" } } } return self._search_helper(page_index=page_index, client=s, query_name=query_name, model=Dashboard)
def _get_search_result(self, page_index: int, client: Search, model: Any) -> SearchResult: """ Common helper function to get result. :param page_index: :param client: :param model: The model to import result(table, user etc) :return: """ if model is None: raise Exception('ES Doc model must be provided!') results = [] # Use {page_index} to calculate index of results to fetch from if page_index != -1: start_from = page_index * self.page_size end_at = start_from + self.page_size client = client[start_from:end_at] else: # if page index is -1, return everything client = client[0:client.count()] response = client.execute() for hit in response: try: # ES hit: {'_d_': {'key': xxx...} es_payload = hit.__dict__.get('_d_', {}) if not es_payload: raise Exception('The ES doc not contain required field') result = {} for attr, val in es_payload.items(): if attr in model.get_attrs(): result[attr] = val results.append(model(**result)) except Exception: LOGGING.exception('The record doesnt contain specified field.') return SearchResult(total_results=response.hits.total, results=results)