Beispiel #1
0
    def test_fetch_dashboard_search_results(self, mock_search: MagicMock) -> None:
        mock_dashboard_result = Dashboard(id='mode_dashboard',
                                          uri='dashboard_uri',
                                          cluster='gold',
                                          group_name='mode_dashboard_group',
                                          group_url='mode_dashboard_group_url',
                                          product='mode',
                                          name='mode_dashboard',
                                          url='mode_dashboard_url',
                                          description='test_dashboard',
                                          last_successful_run_timestamp=1000)

        mock_search.return_value = SearchResult(total_results=1,
                                                results=[mock_dashboard_result])

        expected = SearchResult(total_results=1,
                                results=[Dashboard(id='mode_dashboard',
                                                   uri='dashboard_uri',
                                                   cluster='gold',
                                                   group_name='mode_dashboard_group',
                                                   group_url='mode_dashboard_group_url',
                                                   product='mode',
                                                   name='mode_dashboard',
                                                   url='mode_dashboard_url',
                                                   description='test_dashboard',
                                                   last_successful_run_timestamp=1000)])

        resp = self.es_proxy.fetch_dashboard_search_results(query_term='test_query_term',
                                                            page_index=0,
                                                            index='dashboard_search_index')
        self.assertEqual(resp.total_results, expected.total_results)

        self.assertDictEqual(vars(resp.results[0]),
                             vars(expected.results[0]),
                             "Search result doesn't match with expected result!")
Beispiel #2
0
    def test_search_match_with_field(self,
                                     mock_search: MagicMock) -> None:

        mock_search.return_value = SearchResult(total_results=1,
                                                results=[self.mock_result3])

        expected = SearchResult(total_results=1,
                                results=[Table(name='test_table3',
                                               key='test_key3',
                                               description='test_description3',
                                               cluster='gold',
                                               database='test_db3',
                                               schema='test_schema3',
                                               column_names=['test_col1', 'test_col2'],
                                               tags=[self.mock_tag],
                                               badges=[self.mock_badge],
                                               last_updated_timestamp=1527283287)])

        resp = self.es_proxy.fetch_table_search_results_with_field(query_term='test_query_term',
                                                                   field_name='tag',
                                                                   field_value='match')
        self.assertEquals(resp.total_results, expected.total_results)

        self.assertDictEqual(vars(resp.results[0]),
                             vars(expected.results[0]),
                             "Search result doesn't match with expected result!")
    def fetch_table_search_results(self,
                                   *,
                                   query_term: str,
                                   page_index: int = 0,
                                   index: str = '') -> SearchResult:
        """
        Conduct a 'Basic Search' in Amundsen UI.

        Atlas Basic Search API is used for that operation. We search on `qualifiedName` field as
        (following Atlas documentation) any 'Referencable' entity 'can be searched for using a unique attribute called
        qualifiedName'. It provides best performance, simplicity and sorting by popularityScore.

        :param query_term: Search Query Term
        :param page_index: Index of search page user is currently on (for pagination)
        :param index: Search Index (different resource corresponding to different index)
        :return: SearchResult Object
        """
        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        # @todo switch to search with 'query' not 'filters' once Atlas FreeTextSearchProcessor is fixed
        # https://reviews.apache.org/r/72440/
        filters = [(self.ATLAS_QN_ATTRIBUTE, 'CONTAINS', query_term)]

        # conduct search using filter on qualifiedName (it already contains both dbName and tableName)
        # and table description
        query_params = self._prepare_basic_search_query(self.page_size,
                                                        page_index,
                                                        filters=filters,
                                                        operator='OR')

        tables, approx_count = self._atlas_basic_search(query_params)

        return SearchResult(total_results=approx_count, results=tables)
Beispiel #4
0
    def fetch_table_search_results(self,
                                   *,
                                   query_term: str,
                                   page_index: int = 0,
                                   index: str = '') -> SearchResult:
        """
        Query Atlas and return results as list of Table objects.
        Using Basic Search for this basic searching.

        :param query_term: search query term
        :param page_index: index of search page user is currently on
        :param index: search index (different resource corresponding to different index)
        :return: SearchResult Object
        """
        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        query_params = {
            'typeName': 'Table',
            'excludeDeletedEntities': True,
            'limit': self.page_size,
            'offset': page_index * self.page_size,
            'query': f'*{query_term}*',
            'attributes': ['description', 'comment']
        }

        tables, approx_count = self._fetch_tables(query_params)
        return SearchResult(total_results=approx_count, results=tables)
Beispiel #5
0
    def fetch_table_search_results_with_filter(
            self,
            *,
            query_term: str,
            search_request: dict,
            page_index: int = 0,
            index: str = '') -> SearchResult:
        """
        Query Elasticsearch and return results as list of Table objects
        :param search_request: A json representation of search request
        :param page_index: index of search page user is currently on
        :param index: current index for search. Provide different index for different resource.
        :return: SearchResult Object
        """
        current_index = index if index else \
            current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX)  # type: str
        if not search_request:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        try:
            query_string = self.convert_query_json_to_query_dsl(
                search_request=search_request,
                query_term=query_term)  # type: str
        except Exception as e:
            LOGGING.exception(e)
            # return nothing if any exception is thrown under the hood
            return SearchResult(total_results=0, results=[])
        s = Search(using=self.elasticsearch, index=current_index)

        query_name = {
            "function_score": {
                "query": {
                    "query_string": {
                        "query": query_string
                    }
                },
                "field_value_factor": {
                    "field": "total_usage",
                    "modifier": "log2p"
                }
            }
        }

        model = self.get_model_by_index(current_index)
        return self._search_helper(page_index=page_index,
                                   client=s,
                                   query_name=query_name,
                                   model=model)
    def _get_search_result(self, page_index: int,
                           client: Search) -> SearchResult:
        """
        Common helper function to get search result.

        :param page_index:
        :param client
        :return:
        """
        table_results = []
        # Use {page_index} to calculate index of results to fetch from
        start_from = page_index * self.page_size
        end_at = start_from + self.page_size
        client = client[start_from:end_at]
        response = client.execute()

        for hit in response:

            table = Table(name=hit.table_name,
                          key=hit.table_key,
                          description=hit.table_description,
                          cluster=hit.cluster,
                          database=hit.database,
                          schema_name=hit.schema_name,
                          column_names=hit.column_names,
                          tags=hit.tag_names,
                          last_updated_epoch=hit.table_last_updated_epoch)

            table_results.append(table)

        return SearchResult(total_results=response.hits.total,
                            results=table_results)
Beispiel #7
0
    def fetch_search_results_with_field(self,
                                        *,
                                        query_term: str,
                                        field_name: str,
                                        field_value: str,
                                        page_index: int = 0) -> SearchResult:
        """
        Query Atlas and return results as list of Table objects.
        Per field name we have a count query and a query for the tables.
        https://atlas.apache.org/Search-Advanced.html

        :param query_term: search query term
        :param field_name: field name to do the searching(e.g schema_name, tag_names)
        :param field_value: value for the field for filtering
        :param page_index: index of search page user is currently on
        :return: SearchResult Object
        :return:
        """

        sql = f"Table from Table where false"
        count_sql = f"{sql} select count()"
        if field_name == 'tag':
            sql = f"from Table where Table is '{field_value}'"
            count_sql = f"{sql} select count()"
        elif field_name == 'schema':
            sql = f"from Table where db.name like '{field_value}'"
            count_sql = f"{sql} select count()"
        elif field_name == 'table':
            sql = f"from Table where name like '{field_value}'"
            count_sql = f"{sql} select count()"
        elif field_name == 'column':
            sql = f"hive_column where name like '{field_value}' select table"
            # TODO nanne: count tables instead of columns
            count_sql = f"hive_column where name like '{field_value}' select count()"

        LOGGER.debug(f"Used following sql query: {sql}")
        tables: List[Table] = []
        count_value = 0
        try:
            # count results
            count_params = {'query': count_sql}
            count_results = list(self.atlas.search_dsl(**count_params))[0]
            count_value = count_results._data['attributes']['values'][0][0]

            params = {
                'query':
                f"{sql} limit {self.page_size} offset {page_index * self.page_size}"
            }
            search_results = self.atlas.search_dsl(**params)
            if count_value > 0 and page_index * self.page_size <= count_value:
                # unpack all collections (usually just one collection though)
                for collection in search_results:
                    if hasattr(collection, 'entities'):
                        tables.extend(
                            self._parse_results(response=collection.entities))
        except BadRequest:
            LOGGER.error("Atlas Search DSL error with the following query:",
                         sql)

        return SearchResult(total_results=count_value, results=tables)
    def test_search_tag_table(self) -> None:
        fields = ['tag', 'table']
        for field in fields:

            expected = SearchResult(total_results=1,
                                    results=[Table(name=self.entity1_name,
                                                   key=f"{self.entity_type}://"
                                                       f"{self.cluster}.{self.db}/"
                                                       f"{self.entity1_name}",
                                                   description=self.entity1_description,
                                                   cluster=self.cluster,
                                                   database=self.entity_type,
                                                   schema_name=self.db,
                                                   column_names=[],
                                                   tags=[],
                                                   last_updated_epoch=123)])
            entity1 = self.to_class(self.entity1)
            entity_collection = MagicMock()
            entity_collection.entities = [entity1]
            entity_collection._data = {'approximateCount': 1}

            result = MagicMock(return_value=entity_collection)

            with patch.object(self.proxy.atlas.search_basic, 'create', result):
                resp = self.proxy.fetch_table_search_results_with_field(
                    query_term=field + "Table1",
                    field_name=field,
                    field_value="Table1"
                )
                self.assertTrue(resp.total_results == 1, "there should be 1 search result")
                self.assertIsInstance(resp.results[0], Table, "Search result received is not of 'Table' type!")
                self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]),
                                     "Search Result doesn't match with expected result!")
    def fetch_user_search_results(self,
                                  *,
                                  query_term: str,
                                  page_index: int = 0,
                                  index: str = '') -> SearchResult:
        if not index:
            raise Exception('Index cant be empty for user search')
        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        s = Search(using=self.elasticsearch, index=index)

        # Don't use any weight(total_follow, total_own, total_use)
        query_name = {
            "function_score": {
                "query": {
                    "multi_match": {
                        "query":
                        query_term,
                        "fields": [
                            "name.raw^30", "name^5", "first_name.raw^5",
                            "last_name.raw^5", "first_name^3", "last_name^3",
                            "email^3"
                        ]
                    }
                }
            }
        }

        return self._search_helper(page_index=page_index,
                                   client=s,
                                   query_name=query_name,
                                   model=User)
Beispiel #10
0
    def test_search_with_one_user_result(self,
                                         mock_search: MagicMock) -> None:

        mock_results = MagicMock()
        mock_results.hits.total = 1
        mock_results.__iter__.return_value = [Response(result=vars(self.mock_result4))]
        mock_search.return_value = mock_results

        expected = SearchResult(total_results=1,
                                results=[User(full_name='First Last',
                                              first_name='First',
                                              last_name='Last',
                                              team_name='Test team',
                                              email='*****@*****.**',
                                              github_username='******',
                                              manager_email='*****@*****.**',
                                              is_active=True,
                                              employee_type='FTE')])

        resp = self.es_proxy.fetch_user_search_results(query_term='test_query_term',
                                                       index='user_search_index')

        self.assertEquals(resp.total_results, expected.total_results,
                          "search result is not of length 1")
        self.assertIsInstance(resp.results[0],
                              User,
                              "Search result received is not of 'Table' type!")
        self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]),
                             "Search Result doesn't match with expected result!")
    def test_search_normal(self) -> None:
        expected = SearchResult(total_results=2,
                                results=[
                                    Table(name=self.entity1_name,
                                          key=f"{self.entity_type}://"
                                          f"{self.cluster}.{self.db}/"
                                          f"{self.entity1_name}",
                                          description=self.entity1_description,
                                          cluster=self.cluster,
                                          database=self.entity_type,
                                          schema=self.db,
                                          column_names=[],
                                          tags=[Tag(tag_name='PII_DATA')],
                                          badges=[Tag(tag_name='PII_DATA')],
                                          last_updated_timestamp=123)
                                ])
        entity1 = self.to_class(self.entity1)
        entity_collection = MagicMock()
        entity_collection.entities = [entity1]
        entity_collection._data = {'approximateCount': 1}

        result = MagicMock(return_value=entity_collection)

        with patch.object(self.proxy.atlas.search_basic, 'create', result):
            resp = self.proxy.fetch_table_search_results(query_term="Table")
            self.assertEquals(resp.total_results, 1)
            self.assertIsInstance(
                resp.results[0], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[0]), vars(expected.results[0]),
                "Search Result doesn't match with expected result!")
Beispiel #12
0
    def test_search_with_one_table_result(self,
                                          mock_search: MagicMock) -> None:

        mock_results = MagicMock()
        mock_results.hits.total = 1
        mock_results.__iter__.return_value = [Response(result=vars(self.mock_result1))]
        mock_search.return_value = mock_results

        expected = SearchResult(total_results=1,
                                results=[Table(name='test_table',
                                               key='test_key',
                                               description='test_description',
                                               cluster='gold',
                                               database='test_db',
                                               schema='test_schema',
                                               column_names=['test_col1', 'test_col2'],
                                               tags=[],
                                               badges=self.mock_empty_badge,
                                               last_updated_timestamp=1527283287)])

        resp = self.es_proxy.fetch_table_search_results(query_term='test_query_term')

        self.assertEquals(resp.total_results, expected.total_results,
                          "search result is not of length 1")
        self.assertIsInstance(resp.results[0],
                              Table,
                              "Search result received is not of 'Table' type!")
        self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]),
                             "Search Result doesn't match with expected result!")
Beispiel #13
0
    def test_search_table_filter(self, mock_search: MagicMock) -> None:
        mock_results = MagicMock()
        mock_results.hits.total = 1
        mock_results.__iter__.return_value = [Response(result=vars(self.mock_result1))]
        mock_search.return_value = mock_results

        expected = SearchResult(total_results=1,
                                results=[Table(name='test_table',
                                               key='test_key',
                                               description='test_description',
                                               cluster='gold',
                                               database='test_db',
                                               schema='test_schema',
                                               column_names=['test_col1', 'test_col2'],
                                               tags=self.mock_empty_tag,
                                               badges=self.mock_empty_badge,
                                               last_updated_timestamp=1527283287)])
        search_request = {
            'type': 'AND',
            'filters': {
                'database': ['hive', 'bigquery'],
                'schema': ['test-schema1', 'test-schema2'],
                'table': ['*amundsen*'],
                'column': ['*ds*'],
                'tag': ['test-tag'],
            }
        }
        resp = self.es_proxy.fetch_table_search_results_with_filter(search_request=search_request, query_term='test')

        self.assertEquals(resp.total_results, expected.total_results)
        self.assertIsInstance(resp.results[0], Table)
        self.assertDictEqual(vars(resp.results[0]), vars(expected.results[0]))
 def test_unknown_field(self) -> None:
     expected = SearchResult(total_results=0, results=[])
     self.proxy.atlas.search_dsl = self.dsl_inject([
         (lambda dsl: "select count()" in dsl, {
             "attributes": {
                 "name": ["count()"],
                 "values": [[0]]
             }
         }),
         (lambda dsl: any(x in dsl
                          for x in ["select table", "from Table"]), {
                              'entities': []
                          })
     ])
     self.proxy.atlas.entity_bulk = self.bulk_inject(
         [self.entity1, self.entity2, self.db_entity])
     resp = self.proxy.fetch_table_search_results(
         query_term="unknown:Table1")
     self.assertTrue(resp.total_results == 0,
                     "there should no search results")
     self.assertIsInstance(
         resp, SearchResult,
         "Search result received is not of 'SearchResult' type!")
     self.assertDictEqual(
         vars(resp), vars(expected),
         "Search Result doesn't match with expected result!")
    def fetch_table_search_results_with_filter(
            self,
            *,
            query_term: str,
            search_request: dict,
            page_index: int = 0,
            index: str = '') -> SearchResult:
        """
        Conduct an 'Advanced Search' to narrow down search results with a use of filters.

        Using Atlas Basic Search with filters to retrieve precise results and sort them by popularity score.


        :param query_term: A Search Query Term
        :param search_request: Values from Filters
        :param page_index: Index of search page user is currently on (for pagination)
        :param index: Search Index (different resource corresponding to different index)
        :return: SearchResult Object
        """
        _filters = search_request.get('filters', dict())

        db_filter_value = _filters.get('database')
        table_filter_value = _filters.get('table')
        cluster_filter_value = _filters.get('cluster')
        badges_filter_value = _filters.get('badges', list())
        tags_filter_value = _filters.get('tag', list())

        filters = list()

        # qualifiedName follows pattern ${db}.${table}@${cluster}
        if db_filter_value:
            filters.append((self.ATLAS_QN_ATTRIBUTE, 'STARTSWITH',
                            db_filter_value[0] + '.'))

        if cluster_filter_value:
            filters.append((self.ATLAS_QN_ATTRIBUTE, 'ENDSWITH',
                            '@' + cluster_filter_value[0]))

        if table_filter_value:
            filters.append(('name', 'CONTAINS', table_filter_value[0]))

        classifications: List[str] = list()  # noqa: E701

        if badges_filter_value or tags_filter_value:
            classifications = list(set(badges_filter_value +
                                       tags_filter_value))

        # Currently Atlas doesn't allow mixing search by filters and classifications
        if filters:
            query_params = self._prepare_basic_search_query(self.page_size,
                                                            page_index,
                                                            filters=filters)
        elif classifications:
            query_params = self._prepare_basic_search_query(
                self.page_size, page_index, classifications=classifications)

        tables, approx_count = self._atlas_basic_search(query_params)

        return SearchResult(total_results=approx_count, results=tables)
Beispiel #16
0
    def fetch_table_search_results(self,
                                   *,
                                   query_term: str,
                                   page_index: int = 0,
                                   index: str = '') -> SearchResult:
        """
        Query Atlas and return results as list of Table objects
        We use the Atlas DSL for querying the tables.
        https://atlas.apache.org/Search-Advanced.html

        :param query_term: search query term
        :param page_index: index of search page user is currently on
        :param index: search index (different resource corresponding to different index)
        :return: SearchResult Object
        """

        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        # define query
        sql = f"Table from Table " \
            f"where name like '*{query_term}*' or " \
            f"description like '*{query_term}*' "

        # count amount of tables
        count_params = {'query': f"{sql} select count()"}
        count_results = list(self.atlas.search_dsl(**count_params))[0]
        count_value = count_results._data['attributes']['values'][0][0]

        # select tables
        params = {
            'query':
            f"{sql} "
            f"limit {self.page_size} "
            f"offset {page_index * self.page_size}"
        }
        search_results = self.atlas.search_dsl(**params)

        # retrieve results
        tables = []
        if 0 < count_value >= page_index * self.page_size:
            for s in search_results:
                tables.extend(self._parse_results(response=s.entities))

        return SearchResult(total_results=count_value, results=tables)
    def test_search_not_match_with_field(self, mock_search: MagicMock) -> None:

        mock_search.return_value = SearchResult(total_results=0, results=[])

        resp = self.es_proxy.fetch_search_results_with_field(
            query_term='test_query_term',
            field_name='tag_names',
            field_value='match')
        self.assertEquals(resp.total_results, 0)
 def test_search_normal(self):
     expected = SearchResult(
         total_results=1,
         results=[
             Table(
                 name=self._qualified('table', 'Table1'),
                 key=f"TEST_ENTITY://TEST_CLUSTER."
                 f"{self._qualified('db', 'TEST_DB')}/"
                 f"{self._qualified('table', 'Table1')}",
                 description='Dummy Description',
                 cluster='TEST_CLUSTER',
                 database='TEST_ENTITY',
                 schema_name=self._qualified('db', 'TEST_DB'),
                 column_names=[
                     # 'column@name'
                 ],
                 tags=['PII_DATA'],
                 last_updated_epoch=123),
             Table(
                 name='Table2',
                 key=f"TEST_ENTITY://./Table2",
                 description='Dummy Description',
                 cluster='',
                 database='TEST_ENTITY',
                 schema_name='',
                 column_names=[
                     # 'column@name'
                 ],
                 tags=[],
                 last_updated_epoch=234)
         ])
     self.proxy.atlas.search_dsl = self.dsl_inject([
         (lambda dsl: "select count()" in dsl and "Table" in dsl, {
             "attributes": {
                 "name": ["count()"],
                 "values": [[2]]
             }
         }),
         (lambda dsl: "Table" in dsl and any(
             x in dsl for x in ["select table", "from Table"]), {
                 'entities': [self.entity1, self.entity2]
             })
     ])
     self.proxy.atlas.entity_bulk = self.bulk_inject(
         [self.entity1, self.entity2, self.db_entity])
     resp = self.proxy.fetch_table_search_results(query_term="Table")
     self.assertTrue(resp.total_results == 2,
                     "there should be 2 search result")
     self.assertIsInstance(
         resp.results[0], Table,
         "Search result received is not of 'Table' type!")
     self.assertDictEqual(
         vars(resp.results[0]), vars(expected.results[0]),
         "Search Result doesn't match with expected result!")
     self.assertDictEqual(
         vars(resp.results[1]), vars(expected.results[1]),
         "Search Result doesn't match with expected result!")
    def test_should_give_empty_result_when_there_are_no_results_from_proxy(
            self) -> None:
        self.mock_proxy.fetch_table_search_results.return_value = \
            SearchResult(total_results=0, results=[])

        response = self.app.test_client().get('/search?query_term=searchterm')

        expected_response = {"total_results": 0, "results": []}
        self.assertEqual(response.json, expected_response)
Beispiel #20
0
    def test_search_with_empty_result(self,
                                      mock_search: MagicMock) -> None:

        mock_results = MagicMock()
        mock_results.hits.total = 0
        mock_search.return_value = mock_results

        expected = SearchResult(total_results=0, results=[])
        result = self.es_proxy.fetch_table_search_results(query_term='test_query_term')
        self.assertDictEqual(vars(result), vars(expected),
                             "Received non-empty search results!")
Beispiel #21
0
    def test_should_get_default_response_values_when_values_not_in_proxy_response(self) -> None:
        self.mock_proxy.fetch_table_search_results_with_field.return_value = \
            SearchResult(total_results=1, results=[{}])

        response = self.app.test_client().get('/search/field/field_name/field_val/myvalue')

        expected_response = {
            "total_results": 1,
            "results": [default_json_response()]
        }
        self.assertEqual(response.json, expected_response)
Beispiel #22
0
    def test_search_with_empty_query_string(self, mock_search: MagicMock) -> None:

        expected = SearchResult(total_results=0, results=[])
        result = self.es_proxy.fetch_table_search_results(query_term='')

        # check the output was empty list
        self.assertDictEqual(vars(result), vars(expected),
                             "Received non-empty search results!")

        # ensure elasticsearch_dsl Search endpoint was not called
        # assert_not_called doesn't work. See here: http://engineroom.trackmaven.com/blog/mocking-mistakes/
        self.assertTrue(mock_search.call_count == 0)
Beispiel #23
0
    def test_should_get_default_response_values_when_values_not_in_proxy_response(
            self) -> None:
        self.mock_proxy.fetch_dashboard_search_results.return_value = \
            SearchResult(total_results=1, results=[{}])

        response = self.app.test_client().get(
            '/search_dashboard?query_term=searchterm')

        expected_response = {
            "total_results": 1,
            "results": [default_json_response()]
        }
        self.assertEqual(response.json, expected_response)
Beispiel #24
0
    def test_should_get_result_for_search(self) -> None:
        result = mock_proxy_results()
        self.mock_proxy.fetch_dashboard_search_results.return_value = SearchResult(total_results=1, results=[result])

        response = self.app.test_client().get('/search_dashboard?query_term=searchterm')
        expected_response = {
            "total_results": 1,
            "results": [mock_json_response()]
        }

        self.assertEqual(response.json, expected_response)
        self.assertEqual(response.status_code, HTTPStatus.OK)
        self.mock_proxy.fetch_dashboard_search_results.assert_called_with(query_term='searchterm', page_index=0,
                                                                          index='dashboard_search_index')
    def test_search_fields(self):
        fields = ['tag', 'schema', 'table', 'column']
        for field in fields:

            expected = SearchResult(
                total_results=1,
                results=[
                    Table(
                        name=self._qualified('table', 'Table1'),
                        key=f"TEST_ENTITY://TEST_CLUSTER"
                        f".{self._qualified('db', 'TEST_DB')}/"
                        f"{self._qualified('table', 'Table1')}",
                        description='Dummy Description',
                        cluster='TEST_CLUSTER',
                        database='TEST_ENTITY',
                        schema_name=self._qualified('db', 'TEST_DB'),
                        column_names=[
                            # 'column@name'
                        ],
                        tags=['PII_DATA'],
                        last_updated_epoch=123)
                ])
            self.proxy.atlas.search_dsl = self.dsl_inject([
                (lambda dsl: "select count()" in dsl, {
                    "attributes": {
                        "name": ["count()"],
                        "values": [[1]]
                    }
                }),
                (lambda dsl: any(
                    x in dsl
                    for x in ["select table", "from Table", "hive_column"]), {
                        'entities': [self.entity1]
                    })
            ])
            self.proxy.atlas.entity_bulk = self.bulk_inject(
                [self.entity1, self.db_entity])
            resp = self.proxy.fetch_table_search_results_with_field(
                query_term=field + "Table1",
                field_name=field,
                field_value="Table1")
            self.assertTrue(resp.total_results == 1,
                            "there should be 1 search result")
            self.assertIsInstance(
                resp.results[0], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[0]), vars(expected.results[0]),
                "Search Result doesn't match with expected result!")
    def test_search_schema_column(self) -> None:
        fields = ['schema', 'column']
        for field in fields:

            expected = SearchResult(
                total_results=1,
                results=[
                    Table(name=self.entity1_name,
                          key=f"{self.entity_type}://"
                          f"{self.cluster}.{self.db}/"
                          f"{self.entity1_name}",
                          description=self.entity1_description,
                          cluster=self.cluster,
                          database=self.entity_type,
                          schema=self.db,
                          column_names=[],
                          tags=[Tag(tag_name='PII_DATA')],
                          badges=[],
                          last_updated_timestamp=123)
                ])
            self.proxy.atlas.search_dsl = self.dsl_inject([
                (lambda dsl: "select count()" in dsl, {
                    "attributes": {
                        "name": ["count()"],
                        "values": [[1]]
                    }
                }),
                (lambda dsl: any(
                    x in dsl
                    for x in ["select table", "from Table", "hive_column"]), {
                        'entities': [self.entity1]
                    })
            ])
            self.proxy.atlas.entity_bulk = self.bulk_inject(
                [self.entity1, self.db_entity])
            resp = self.proxy.fetch_table_search_results_with_field(
                query_term=field + "Table1",
                field_name=field,
                field_value="Table1")
            self.assertTrue(resp.total_results == 1,
                            "there should be 1 search result")
            self.assertIsInstance(
                resp.results[0], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[0]), vars(expected.results[0]),
                "Search Result doesn't match with expected result!")
Beispiel #27
0
    def test_search_with_multiple_result(self, mock_search: MagicMock) -> None:

        mock_results = MagicMock()
        mock_results.hits.total = 2
        mock_results.__iter__.return_value = [
            Response(result=vars(self.mock_result1)),
            Response(result=vars(self.mock_result2))
        ]
        mock_search.return_value = mock_results

        expected = SearchResult(
            total_results=2,
            results=[
                Table(name='test_table',
                      key='test_key',
                      description='test_description',
                      cluster='gold',
                      database='test_db',
                      schema_name='test_schema',
                      column_names=['test_col1', 'test_col2'],
                      tags=[],
                      last_updated_epoch=1527283287),
                Table(name='test_table2',
                      key='test_key2',
                      description='test_description2',
                      cluster='gold',
                      database='test_db2',
                      schema_name='test_schema2',
                      column_names=['test_col1', 'test_col2'],
                      tags=[],
                      last_updated_epoch=1527283287)
            ])

        resp = self.es_proxy.fetch_table_search_results(
            query_term='test_query_term')

        self.assertEquals(resp.total_results, expected.total_results,
                          "search result is not of length 2")
        for i in range(2):
            self.assertIsInstance(
                resp.results[i], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[i]), vars(expected.results[i]),
                "Search result doesn't match with expected result!")
Beispiel #28
0
    def fetch_table_search_results(self,
                                   *,
                                   query_term: str,
                                   page_index: int = 0,
                                   index: str = '') -> SearchResult:
        """
        Query Elasticsearch and return results as list of Table objects

        :param query_term: search query term
        :param page_index: index of search page user is currently on
        :param index: current index for search. Provide different index for different resource.
        :return: SearchResult Object
        """
        current_index = index if index else \
            current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX)
        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])

        s = Search(using=self.elasticsearch, index=current_index)
        query_name = {
            "function_score": {
                "query": {
                    "multi_match": {
                        "query":
                        query_term,
                        "fields": [
                            "display_name^1000", "name.raw^75", "name^5",
                            "schema^3", "description^3", "column_names^2",
                            "column_descriptions", "tags", "badges"
                        ],
                    }
                },
                "field_value_factor": {
                    "field": "total_usage",
                    "modifier": "log2p"
                }
            }
        }

        return self._search_helper(page_index=page_index,
                                   client=s,
                                   query_name=query_name,
                                   model=Table)
Beispiel #29
0
    def fetch_dashboard_search_results(self,
                                       *,
                                       query_term: str,
                                       page_index: int = 0,
                                       index: str = '') -> SearchResult:
        """
        Fetch dashboard search result with fuzzy search

        :param query_term:
        :param page_index:
        :param index:
        :return:
        """
        current_index = index if index else \
            current_app.config.get(config.ELASTICSEARCH_INDEX_KEY, DEFAULT_ES_INDEX)

        if not query_term:
            # return empty result for blank query term
            return SearchResult(total_results=0, results=[])
        s = Search(using=self.elasticsearch, index=current_index)

        query_name = {
            "function_score": {
                "query": {
                    "multi_match": {
                        "query":
                        query_term,
                        "fields": [
                            "name.raw^75", "name^5", "group_name.raw^5",
                            "description^3", "query_names^3"
                        ]
                    }
                },
                "field_value_factor": {
                    "field": "total_usage",
                    "modifier": "log2p"
                }
            }
        }

        return self._search_helper(page_index=page_index,
                                   client=s,
                                   query_name=query_name,
                                   model=Dashboard)
    def _get_search_result(self, page_index: int, client: Search,
                           model: Any) -> SearchResult:
        """
        Common helper function to get result.

        :param page_index:
        :param client:
        :param model: The model to import result(table, user etc)
        :return:
        """
        if model is None:
            raise Exception('ES Doc model must be provided!')

        results = []
        # Use {page_index} to calculate index of results to fetch from
        if page_index != -1:
            start_from = page_index * self.page_size
            end_at = start_from + self.page_size
            client = client[start_from:end_at]
        else:
            # if page index is -1, return everything
            client = client[0:client.count()]

        response = client.execute()

        for hit in response:

            try:
                # ES hit: {'_d_': {'key': xxx...}
                es_payload = hit.__dict__.get('_d_', {})
                if not es_payload:
                    raise Exception('The ES doc not contain required field')
                result = {}

                for attr, val in es_payload.items():
                    if attr in model.get_attrs():
                        result[attr] = val
                results.append(model(**result))
            except Exception:
                LOGGING.exception('The record doesnt contain specified field.')

        return SearchResult(total_results=response.hits.total, results=results)