def setUp(self) -> None: self.app = create_app(config_module_class='search_service.config.LocalConfig') self.app_context = self.app.app_context() self.app_context.push() mock_elasticsearch_client = MagicMock() self.es_proxy = ElasticsearchProxy(client=mock_elasticsearch_client) self.mock_badge = Tag(tag_name='name') self.mock_tag = Tag(tag_name='match') self.mock_empty_badge = [] # type: List[Tag] self.mock_empty_tag = [] # type: List[Tag] self.mock_result1 = MockSearchResult(name='test_table', key='test_key', description='test_description', cluster='gold', database='test_db', schema='test_schema', column_names=['test_col1', 'test_col2'], tags=self.mock_empty_tag, badges=self.mock_empty_badge, last_updated_timestamp=1527283287) self.mock_result2 = MockSearchResult(name='test_table2', key='test_key2', description='test_description2', cluster='gold', database='test_db2', schema='test_schema2', column_names=['test_col1', 'test_col2'], tags=self.mock_empty_tag, badges=self.mock_empty_badge, last_updated_timestamp=1527283287) self.mock_result3 = Table(name='test_table3', key='test_key3', description='test_description3', cluster='gold', database='test_db3', schema='test_schema3', column_names=['test_col1', 'test_col2'], tags=[self.mock_tag], badges=[self.mock_badge], last_updated_timestamp=1527283287) self.mock_result4 = MockKVSearchResult(full_name='First Last', first_name='First', last_name='Last', team_name='Test team', email='*****@*****.**', github_username='******', manager_email='*****@*****.**', is_active=True, employee_type='FTE', new_attr='aaa')
def mock_proxy_results(): return Table(name='hello', key='world', description='des1', cluster='clust', database='db', display_name=None, schema='schema', column_names=['col1', 'col2'], tags=[Tag(tag_name='tag')], badges=[Tag(tag_name='badge1')], last_updated_timestamp=1568324871)
def _prepare_tables(self, response: EntityCollection, enhance_metadata: bool = False) -> List[Table]: """ Based on an Atlas {response} with table entities, we render Table objects. :param response: Collection of Atlas Entities :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not return all available attributes) :return: List of Table objects """ result = list() # if condition is satisfied then we query Atlas again to collect all available information regarding each table # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal # amount of attributes. if enhance_metadata: ids = list() for hit in response: ids.append(hit.guid) entities = self._extract_entities( self.atlas.entity_bulk(guid=ids, ignoreRelationships=False)) else: entities = response for entity in entities: entity_attrs = entity.attributes qn = parse_table_qualified_name( qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE)) entity_name = qn.get('table_name') or entity_attrs.get('name') db_name = qn.get('db_name', '') db_cluster = qn.get('cluster_name', '') tags: List[Tag] = [] for classification in entity.classificationNames or list(): tags.append(Tag(tag_name=classification)) badges: List[Tag] = tags table = Table( name=entity_name, key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}", description=entity_attrs.get('description'), cluster=db_cluster, database=entity.typeName, schema=db_name, tags=tags, badges=badges, column_names=[], last_updated_timestamp=entity_attrs.get('updateTime')) result.append(table) return result
def _parse_results(self, response: EntityCollection) -> List[Table]: """ based on an atlas {response} with table entities, we map the required information :return: list of tables """ table_results = [] ids = list() for hit in response: ids.append(hit.guid) # Receive all entities, with attributes # FixMe: Can ask for the Description and Qualified Name # FixMe: in DSL query above, once it uses indexes entities = self._entities( self.atlas.entity_bulk(guid=ids, ignoreRelationships=True)) for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') tags = [] # type: List[Tag] # Using or in case, if the key 'classifications' is there with attrs None for classification in table_attrs.get("classifications") or list(): tags.append(Tag(tag_name=classification.get('typeName'))) # TODO need to populate these badges = [] # type: List[Tag] # TODO: Implement columns: Not sure if we need this for the search results. columns: List[str] = [] # for column in attrs.get('columns') or list(): # col_entity = entity.referredEntities[column['guid']] # col_attrs = col_entity['attributes'] # columns.append(col_attrs.get(self.NAME_KEY)) # table_name = attrs.get(self.NAME_ATTRIBUTE) table = Table( name=table_name, key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}", description=table_attrs.get('description'), cluster=db_cluster, database=table.typeName, schema=db_name, column_names=columns, tags=tags, badges=badges, last_updated_timestamp=table_attrs.get('updateTime')) table_results.append(table) return table_results
def test_put_multiple_tables(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None: mock_proxy = get_proxy.return_value = Mock() input_data = [ json.dumps({ 'id': 'table1', 'key': 'table1', 'cluster': 'cluster1', 'database': 'database1', 'name': 'name1', 'schema': 'schema1', 'last_updated_timestamp': 12345678, 'tags': [{'tag_name': 'tag1'}, {'tag_name': 'tag2'}] }), json.dumps({ 'id': 'table2', 'key': 'table2', 'cluster': 'cluster2', 'database': 'database2', 'name': 'name2', 'schema': 'schema2', 'last_updated_timestamp': 12345678, 'tags': [{'tag_name': 'tag3'}, {'tag_name': 'tag4'}] }) ] RequestParser().parse_args.return_value = dict(data=input_data, index='fake_index') expected_data = [Table(id='table1', database='database1', cluster='cluster1', schema='schema1', name='name1', key='table1', tags=[Tag(tag_name='tag1'), Tag(tag_name='tag2')], last_updated_timestamp=12345678), Table(id='table2', database='database2', cluster='cluster2', schema='schema2', name='name2', key='table2', tags=[Tag(tag_name='tag3'), Tag(tag_name='tag4')], last_updated_timestamp=12345678)] response = DocumentTablesAPI().put() self.assertEqual(list(response)[1], HTTPStatus.OK) mock_proxy.update_document.assert_called_with(data=expected_data, index='fake_index')
def test_search_normal(self) -> None: expected = SearchTableResult( total_results=2, results=[ Table(id=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", name=self.entity1_name, key=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", description=self.entity1_description, cluster=self.cluster, database=self.entity_type, schema=self.db, column_names=[], tags=[Tag(tag_name='PII_DATA')], badges=[Tag(tag_name='PII_DATA')], last_updated_timestamp=123) ]) entity1 = self.to_class(self.entity1) entity_collection = MagicMock() entity_collection.entities = [entity1] entity_collection._data = {'approximateCount': 1} result = MagicMock(return_value=entity_collection) with patch.object(self.proxy.atlas.search_basic, 'create', result): resp = self.proxy.fetch_table_search_results(query_term="Table") self.assertEqual(resp.total_results, 1) self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test_search_schema_column(self) -> None: fields = ['schema', 'column'] for field in fields: expected = SearchResult( total_results=1, results=[ Table(name=self.entity1_name, key=f"{self.entity_type}://" f"{self.cluster}.{self.db}/" f"{self.entity1_name}", description=self.entity1_description, cluster=self.cluster, database=self.entity_type, schema=self.db, column_names=[], tags=[Tag(tag_name='PII_DATA')], badges=[], last_updated_timestamp=123) ]) self.proxy.atlas.search_dsl = self.dsl_inject([ (lambda dsl: "select count()" in dsl, { "attributes": { "name": ["count()"], "values": [[1]] } }), (lambda dsl: any( x in dsl for x in ["select table", "from Table", "hive_column"]), { 'entities': [self.entity1] }) ]) self.proxy.atlas.entity_bulk = self.bulk_inject( [self.entity1, self.db_entity]) resp = self.proxy.fetch_table_search_results_with_field( query_term=field + "Table1", field_name=field, field_value="Table1") self.assertTrue(resp.total_results == 1, "there should be 1 search result") self.assertIsInstance( resp.results[0], Table, "Search result received is not of 'Table' type!") self.assertDictEqual( vars(resp.results[0]), vars(expected.results[0]), "Search Result doesn't match with expected result!")
def test__get_instance_badge(self) -> None: result = self.es_proxy._get_instance('badges', ['badge1']) badges = [Tag(tag_name='badge1')] self.assertEqual(badges, result)
def test__get_instance_tag(self) -> None: result = self.es_proxy._get_instance('tags', ['value']) tags = [Tag(tag_name='value')] self.assertEqual(tags, result)
def test_put_multiple_features(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None: mock_proxy = get_proxy.return_value = Mock() input_data = [ json.dumps({ 'id': '123aaabbb', 'feature_group': 'group1', 'feature_name': 'name1', 'version': '7', 'key': 'group1/name1/7', 'total_usage': 12, 'description': 'friendly description of a feature', 'last_updated_timestamp': 12345678, 'tags': [{ 'tag_name': 'tag1' }, { 'tag_name': 'tag2' }] }), json.dumps({ 'id': '456bbbccc', 'feature_group': 'group1', 'feature_name': 'name2', 'version': 'v1.0.0', 'key': 'group1/name2/v1.0.0', 'total_usage': 0, 'availability': ['postgres'], 'last_updated_timestamp': 12345678, 'badges': [{ 'tag_name': 'badge1' }, { 'tag_name': 'badge2' }] }) ] RequestParser().parse_args.return_value = dict(data=input_data, index='fake_index') expected_data = [ Feature(id='123aaabbb', feature_group='group1', feature_name='name1', version='7', key='group1/name1/7', total_usage=12, description='friendly description of a feature', last_updated_timestamp=12345678, tags=[Tag(tag_name='tag1'), Tag(tag_name='tag2')]), Feature(id='456bbbccc', feature_group='group1', feature_name='name2', version='v1.0.0', key='group1/name2/v1.0.0', total_usage=0, availability=['postgres'], last_updated_timestamp=12345678, badges=[Tag(tag_name='badge1'), Tag(tag_name='badge2')]) ] response = DocumentFeaturesAPI().put() self.assertEqual(list(response)[1], HTTPStatus.OK) mock_proxy.update_document.assert_called_with(data=expected_data, index='fake_index')