Beispiel #1
0
    def setUp(self) -> None:
        self.app = create_app(config_module_class='search_service.config.LocalConfig')
        self.app_context = self.app.app_context()
        self.app_context.push()

        mock_elasticsearch_client = MagicMock()
        self.es_proxy = ElasticsearchProxy(client=mock_elasticsearch_client)
        self.mock_badge = Tag(tag_name='name')
        self.mock_tag = Tag(tag_name='match')
        self.mock_empty_badge = []  # type: List[Tag]
        self.mock_empty_tag = []  # type: List[Tag]
        self.mock_result1 = MockSearchResult(name='test_table',
                                             key='test_key',
                                             description='test_description',
                                             cluster='gold',
                                             database='test_db',
                                             schema='test_schema',
                                             column_names=['test_col1', 'test_col2'],
                                             tags=self.mock_empty_tag,
                                             badges=self.mock_empty_badge,
                                             last_updated_timestamp=1527283287)

        self.mock_result2 = MockSearchResult(name='test_table2',
                                             key='test_key2',
                                             description='test_description2',
                                             cluster='gold',
                                             database='test_db2',
                                             schema='test_schema2',
                                             column_names=['test_col1', 'test_col2'],
                                             tags=self.mock_empty_tag,
                                             badges=self.mock_empty_badge,
                                             last_updated_timestamp=1527283287)

        self.mock_result3 = Table(name='test_table3',
                                  key='test_key3',
                                  description='test_description3',
                                  cluster='gold',
                                  database='test_db3',
                                  schema='test_schema3',
                                  column_names=['test_col1', 'test_col2'],
                                  tags=[self.mock_tag],
                                  badges=[self.mock_badge],
                                  last_updated_timestamp=1527283287)

        self.mock_result4 = MockKVSearchResult(full_name='First Last',
                                               first_name='First',
                                               last_name='Last',
                                               team_name='Test team',
                                               email='*****@*****.**',
                                               github_username='******',
                                               manager_email='*****@*****.**',
                                               is_active=True,
                                               employee_type='FTE',
                                               new_attr='aaa')
Beispiel #2
0
def mock_proxy_results():
    return Table(name='hello',
                 key='world',
                 description='des1',
                 cluster='clust',
                 database='db',
                 display_name=None,
                 schema='schema',
                 column_names=['col1', 'col2'],
                 tags=[Tag(tag_name='tag')],
                 badges=[Tag(tag_name='badge1')],
                 last_updated_timestamp=1568324871)
    def _prepare_tables(self,
                        response: EntityCollection,
                        enhance_metadata: bool = False) -> List[Table]:
        """
        Based on an Atlas {response} with table entities, we render Table objects.

        :param response: Collection of Atlas Entities
        :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not
        return all available attributes)
        :return: List of Table objects
        """

        result = list()

        # if condition is satisfied then we query Atlas again to collect all available information regarding each table
        # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal
        # amount of attributes.
        if enhance_metadata:
            ids = list()

            for hit in response:
                ids.append(hit.guid)

            entities = self._extract_entities(
                self.atlas.entity_bulk(guid=ids, ignoreRelationships=False))
        else:
            entities = response

        for entity in entities:
            entity_attrs = entity.attributes

            qn = parse_table_qualified_name(
                qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE))

            entity_name = qn.get('table_name') or entity_attrs.get('name')
            db_name = qn.get('db_name', '')
            db_cluster = qn.get('cluster_name', '')

            tags: List[Tag] = []

            for classification in entity.classificationNames or list():
                tags.append(Tag(tag_name=classification))

            badges: List[Tag] = tags

            table = Table(
                name=entity_name,
                key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
                description=entity_attrs.get('description'),
                cluster=db_cluster,
                database=entity.typeName,
                schema=db_name,
                tags=tags,
                badges=badges,
                column_names=[],
                last_updated_timestamp=entity_attrs.get('updateTime'))

            result.append(table)

        return result
Beispiel #4
0
    def _parse_results(self, response: EntityCollection) -> List[Table]:
        """
        based on an atlas {response} with table entities, we map the required information
        :return: list of tables
        """
        table_results = []
        ids = list()
        for hit in response:
            ids.append(hit.guid)
        # Receive all entities, with attributes
        # FixMe: Can ask for the Description and Qualified Name
        # FixMe: in DSL query above, once it uses indexes
        entities = self._entities(
            self.atlas.entity_bulk(guid=ids, ignoreRelationships=True))

        for table in entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            tags = []  # type: List[Tag]
            # Using or in case, if the key 'classifications' is there with attrs None
            for classification in table_attrs.get("classifications") or list():
                tags.append(Tag(tag_name=classification.get('typeName')))

            # TODO need to populate these
            badges = []  # type: List[Tag]

            # TODO: Implement columns: Not sure if we need this for the search results.
            columns: List[str] = []
            # for column in attrs.get('columns') or list():
            #     col_entity = entity.referredEntities[column['guid']]
            #     col_attrs = col_entity['attributes']
            #     columns.append(col_attrs.get(self.NAME_KEY))
            # table_name = attrs.get(self.NAME_ATTRIBUTE)
            table = Table(
                name=table_name,
                key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}",
                description=table_attrs.get('description'),
                cluster=db_cluster,
                database=table.typeName,
                schema=db_name,
                column_names=columns,
                tags=tags,
                badges=badges,
                last_updated_timestamp=table_attrs.get('updateTime'))

            table_results.append(table)

        return table_results
Beispiel #5
0
    def test_put_multiple_tables(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None:
        mock_proxy = get_proxy.return_value = Mock()
        input_data = [
            json.dumps({
                'id': 'table1',
                'key': 'table1',
                'cluster': 'cluster1',
                'database': 'database1',
                'name': 'name1',
                'schema': 'schema1',
                'last_updated_timestamp': 12345678,
                'tags': [{'tag_name': 'tag1'}, {'tag_name': 'tag2'}]
            }),
            json.dumps({
                'id': 'table2',
                'key': 'table2',
                'cluster': 'cluster2',
                'database': 'database2',
                'name': 'name2',
                'schema': 'schema2',
                'last_updated_timestamp': 12345678,
                'tags': [{'tag_name': 'tag3'}, {'tag_name': 'tag4'}]
            })
        ]
        RequestParser().parse_args.return_value = dict(data=input_data, index='fake_index')

        expected_data = [Table(id='table1', database='database1', cluster='cluster1', schema='schema1', name='name1',
                               key='table1', tags=[Tag(tag_name='tag1'), Tag(tag_name='tag2')],
                               last_updated_timestamp=12345678),
                         Table(id='table2', database='database2', cluster='cluster2', schema='schema2', name='name2',
                               key='table2', tags=[Tag(tag_name='tag3'), Tag(tag_name='tag4')],
                               last_updated_timestamp=12345678)]

        response = DocumentTablesAPI().put()
        self.assertEqual(list(response)[1], HTTPStatus.OK)
        mock_proxy.update_document.assert_called_with(data=expected_data, index='fake_index')
    def test_search_normal(self) -> None:
        expected = SearchTableResult(
            total_results=2,
            results=[
                Table(id=f"{self.entity_type}://"
                      f"{self.cluster}.{self.db}/"
                      f"{self.entity1_name}",
                      name=self.entity1_name,
                      key=f"{self.entity_type}://"
                      f"{self.cluster}.{self.db}/"
                      f"{self.entity1_name}",
                      description=self.entity1_description,
                      cluster=self.cluster,
                      database=self.entity_type,
                      schema=self.db,
                      column_names=[],
                      tags=[Tag(tag_name='PII_DATA')],
                      badges=[Tag(tag_name='PII_DATA')],
                      last_updated_timestamp=123)
            ])
        entity1 = self.to_class(self.entity1)
        entity_collection = MagicMock()
        entity_collection.entities = [entity1]
        entity_collection._data = {'approximateCount': 1}

        result = MagicMock(return_value=entity_collection)

        with patch.object(self.proxy.atlas.search_basic, 'create', result):
            resp = self.proxy.fetch_table_search_results(query_term="Table")
            self.assertEqual(resp.total_results, 1)
            self.assertIsInstance(
                resp.results[0], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[0]), vars(expected.results[0]),
                "Search Result doesn't match with expected result!")
    def test_search_schema_column(self) -> None:
        fields = ['schema', 'column']
        for field in fields:

            expected = SearchResult(
                total_results=1,
                results=[
                    Table(name=self.entity1_name,
                          key=f"{self.entity_type}://"
                          f"{self.cluster}.{self.db}/"
                          f"{self.entity1_name}",
                          description=self.entity1_description,
                          cluster=self.cluster,
                          database=self.entity_type,
                          schema=self.db,
                          column_names=[],
                          tags=[Tag(tag_name='PII_DATA')],
                          badges=[],
                          last_updated_timestamp=123)
                ])
            self.proxy.atlas.search_dsl = self.dsl_inject([
                (lambda dsl: "select count()" in dsl, {
                    "attributes": {
                        "name": ["count()"],
                        "values": [[1]]
                    }
                }),
                (lambda dsl: any(
                    x in dsl
                    for x in ["select table", "from Table", "hive_column"]), {
                        'entities': [self.entity1]
                    })
            ])
            self.proxy.atlas.entity_bulk = self.bulk_inject(
                [self.entity1, self.db_entity])
            resp = self.proxy.fetch_table_search_results_with_field(
                query_term=field + "Table1",
                field_name=field,
                field_value="Table1")
            self.assertTrue(resp.total_results == 1,
                            "there should be 1 search result")
            self.assertIsInstance(
                resp.results[0], Table,
                "Search result received is not of 'Table' type!")
            self.assertDictEqual(
                vars(resp.results[0]), vars(expected.results[0]),
                "Search Result doesn't match with expected result!")
Beispiel #8
0
 def test__get_instance_badge(self) -> None:
     result = self.es_proxy._get_instance('badges', ['badge1'])
     badges = [Tag(tag_name='badge1')]
     self.assertEqual(badges, result)
Beispiel #9
0
 def test__get_instance_tag(self) -> None:
     result = self.es_proxy._get_instance('tags', ['value'])
     tags = [Tag(tag_name='value')]
     self.assertEqual(tags, result)
Beispiel #10
0
    def test_put_multiple_features(self, get_proxy: MagicMock,
                                   RequestParser: MagicMock) -> None:
        mock_proxy = get_proxy.return_value = Mock()
        input_data = [
            json.dumps({
                'id': '123aaabbb',
                'feature_group': 'group1',
                'feature_name': 'name1',
                'version': '7',
                'key': 'group1/name1/7',
                'total_usage': 12,
                'description': 'friendly description of a feature',
                'last_updated_timestamp': 12345678,
                'tags': [{
                    'tag_name': 'tag1'
                }, {
                    'tag_name': 'tag2'
                }]
            }),
            json.dumps({
                'id':
                '456bbbccc',
                'feature_group':
                'group1',
                'feature_name':
                'name2',
                'version':
                'v1.0.0',
                'key':
                'group1/name2/v1.0.0',
                'total_usage':
                0,
                'availability': ['postgres'],
                'last_updated_timestamp':
                12345678,
                'badges': [{
                    'tag_name': 'badge1'
                }, {
                    'tag_name': 'badge2'
                }]
            })
        ]
        RequestParser().parse_args.return_value = dict(data=input_data,
                                                       index='fake_index')

        expected_data = [
            Feature(id='123aaabbb',
                    feature_group='group1',
                    feature_name='name1',
                    version='7',
                    key='group1/name1/7',
                    total_usage=12,
                    description='friendly description of a feature',
                    last_updated_timestamp=12345678,
                    tags=[Tag(tag_name='tag1'),
                          Tag(tag_name='tag2')]),
            Feature(id='456bbbccc',
                    feature_group='group1',
                    feature_name='name2',
                    version='v1.0.0',
                    key='group1/name2/v1.0.0',
                    total_usage=0,
                    availability=['postgres'],
                    last_updated_timestamp=12345678,
                    badges=[Tag(tag_name='badge1'),
                            Tag(tag_name='badge2')])
        ]

        response = DocumentFeaturesAPI().put()
        self.assertEqual(list(response)[1], HTTPStatus.OK)
        mock_proxy.update_document.assert_called_with(data=expected_data,
                                                      index='fake_index')