def test_create_document(self, mock_uuid: MagicMock) -> None: mock_elasticsearch = self.es_proxy.elasticsearch new_index_name = 'tester_index_name' mock_uuid.return_value = new_index_name mock_elasticsearch.indices.get_alias.return_value = dict([ (new_index_name, {}) ]) start_data = [ Table(id='snowflake://blue.test_schema/bank_accounts', cluster='blue', column_names=['1', '2'], database='snowflake', schema='test_schema', description='A table for something', key='snowflake://blue.test_schema/bank_accounts', last_updated_timestamp=0, name='bank_accounts', tags=[], badges=self.mock_empty_badge, column_descriptions=['desc'], schema_description='schema description 1'), Table(id='snowflake://blue.test_schema/bitcoin_wallets', cluster='blue', column_names=['5', '6'], database='snowflake', schema='test_schema', description='A table for lots of things!', key='snowflake://blue.test_schema/bitcoin_wallets', last_updated_timestamp=0, name='bitcoin_wallets', tags=[], badges=self.mock_empty_badge, schema_description='schema description 2', programmatic_descriptions=["test"]) ] expected_data = [{ 'index': { '_index': new_index_name, '_type': 'table', '_id': 'snowflake://blue.test_schema/bank_accounts' } }, { 'id': 'snowflake://blue.test_schema/bank_accounts', 'cluster': 'blue', 'column_names': ['1', '2'], 'column_descriptions': ['desc'], 'database': 'snowflake', 'schema': 'test_schema', 'description': 'A table for something', 'display_name': None, 'key': 'snowflake://blue.test_schema/bank_accounts', 'last_updated_timestamp': 0, 'name': 'bank_accounts', 'tags': [], 'badges': [], 'total_usage': 0, 'programmatic_descriptions': None, 'schema_description': 'schema description 1', }, { 'index': { '_index': new_index_name, '_type': 'table', '_id': 'snowflake://blue.test_schema/bitcoin_wallets' } }, { 'id': 'snowflake://blue.test_schema/bitcoin_wallets', 'cluster': 'blue', 'column_names': ['5', '6'], 'column_descriptions': None, 'database': 'snowflake', 'schema': 'test_schema', 'description': 'A table for lots of things!', 'display_name': None, 'key': 'snowflake://blue.test_schema/bitcoin_wallets', 'last_updated_timestamp': 0, 'name': 'bitcoin_wallets', 'tags': [], 'badges': [], 'total_usage': 0, 'schema_description': 'schema description 2', 'programmatic_descriptions': ["test"] }] mock_elasticsearch.bulk.return_value = {'errors': False} expected_alias = 'table_search_index' result = self.es_proxy.create_document(data=start_data, index=expected_alias) self.assertEqual(expected_alias, result) mock_elasticsearch.bulk.assert_called_with(expected_data)
def _prepare_tables(self, response: EntityCollection, enhance_metadata: bool = False) -> List[Table]: """ Based on an Atlas {response} with table entities, we render Table objects. :param response: Collection of Atlas Entities :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not return all available attributes) :return: List of Table objects """ result = list() # if condition is satisfied then we query Atlas again to collect all available information regarding each table # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal # amount of attributes. if enhance_metadata: ids = list() for hit in response: ids.append(hit.guid) entities = self._extract_entities( self.atlas.entity_bulk(guid=ids, ignoreRelationships=False)) else: entities = response for entity in entities: entity_attrs = entity.attributes qn = parse_table_qualified_name( qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE)) entity_name = qn.get('table_name') or entity_attrs.get('name') db_name = qn.get('db_name', '') db_cluster = qn.get('cluster_name', '') tags: List[Tag] = [] for classification in entity.classificationNames or list(): tags.append(Tag(tag_name=classification)) badges: List[Tag] = tags table = Table( id=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}", name=entity_name, key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}", description=entity_attrs.get('description'), cluster=db_cluster, database=entity.typeName, schema=db_name, tags=tags, badges=badges, column_names=[], last_updated_timestamp=entity_attrs.get('updateTime')) result.append(table) return result
def setUp(self) -> None: self.app = create_app( config_module_class='search_service.config.LocalConfig') self.app_context = self.app.app_context() self.app_context.push() mock_elasticsearch_client = MagicMock() self.es_proxy = ElasticsearchProxy(client=mock_elasticsearch_client) self.mock_badge = Tag(tag_name='name') self.mock_tag = Tag(tag_name='match') self.mock_empty_badge = [] # type: List[Tag] self.mock_empty_tag = [] # type: List[Tag] self.mock_result1 = MockSearchResult( name='test_table', key='test_key', description='test_description', cluster='gold', database='test_db', schema='test_schema', column_names=['test_col1', 'test_col2'], tags=self.mock_empty_tag, badges=self.mock_empty_badge, last_updated_timestamp=1527283287, programmatic_descriptions=[]) self.mock_result2 = MockSearchResult( name='test_table2', key='test_key2', description='test_description2', cluster='gold', database='test_db2', schema='test_schema2', column_names=['test_col1', 'test_col2'], tags=self.mock_empty_tag, badges=self.mock_empty_badge, last_updated_timestamp=1527283287) self.mock_result3 = Table(id='test_key3', name='test_table3', key='test_key3', description='test_description3', cluster='gold', database='test_db3', schema='test_schema3', column_names=['test_col1', 'test_col2'], tags=[self.mock_tag], badges=[self.mock_badge], last_updated_timestamp=1527283287) self.mock_result4 = MockUserSearchResult( full_name='First Last', first_name='First', last_name='Last', team_name='Test team', email='*****@*****.**', github_username='******', manager_email='*****@*****.**', is_active=True, employee_type='FTE', role_name='swe', new_attr='aaa') self.mock_dashboard_result = Dashboard( id='mode_dashboard', uri='dashboard_uri', cluster='gold', group_name='mode_dashboard_group', group_url='mode_dashboard_group_url', product='mode', name='mode_dashboard', url='mode_dashboard_url', description='test_dashboard', last_successful_run_timestamp=1000)
def _parse_results(self, response: EntityCollection) -> List[Table]: """ based on an atlas {response} with table entities, we map the required information :return: list of tables """ table_results = [] ids = list() for hit in response: ids.append(hit.guid) # receive all entities entities = self._entities(self.atlas.entity_bulk(guid=ids)) db_ids = [] for entity in entities: relations = entity.relationshipAttributes database = relations.get(self.DB_ATTRIBUTE) if database: db_ids.append(database['guid']) # request databases dbs_list = self._entities(self.atlas.entity_bulk( guid=db_ids)) if len(db_ids) > 0 else [] dbs_dict: Dict[str, Entity] = {db.guid: db for db in dbs_list} for entity in entities: relations = entity.relationshipAttributes attrs = entity.attributes database = relations.get(self.DB_ATTRIBUTE) if database and database['guid'] in dbs_dict: db_entity = dbs_dict[database['guid']] db_attrs = db_entity.attributes db_name = db_attrs.get(self.NAME_ATTRIBUTE) db_cluster = db_attrs.get("clusterName", "") else: db_cluster = '' db_name = '' tags = [] # Using or in case, if the key 'classifications' is there with attrs None for classification in attrs.get("classifications") or list(): tags.append(classification.get('typeName')) # TODO: Implement columns columns: List[str] = [] # for column in attrs.get('columns') or list(): # col_entity = entity.referredEntities[column['guid']] # col_attrs = col_entity['attributes'] # columns.append(col_attrs.get(self.NAME_KEY)) table_name = attrs.get(self.NAME_ATTRIBUTE) table = Table( name=table_name, key=f"{entity.typeName}://{db_cluster}.{db_name}/{table_name}", description=attrs.get('description'), cluster=db_cluster, database=entity.typeName or 'Table', schema_name=db_name, column_names=columns, tags=tags, last_updated_epoch=attrs.get('updateTime')) table_results.append(table) return table_results