예제 #1
0
    def test_create_document(self, mock_uuid: MagicMock) -> None:
        mock_elasticsearch = self.es_proxy.elasticsearch
        new_index_name = 'tester_index_name'
        mock_uuid.return_value = new_index_name
        mock_elasticsearch.indices.get_alias.return_value = dict([
            (new_index_name, {})
        ])
        start_data = [
            Table(id='snowflake://blue.test_schema/bank_accounts',
                  cluster='blue',
                  column_names=['1', '2'],
                  database='snowflake',
                  schema='test_schema',
                  description='A table for something',
                  key='snowflake://blue.test_schema/bank_accounts',
                  last_updated_timestamp=0,
                  name='bank_accounts',
                  tags=[],
                  badges=self.mock_empty_badge,
                  column_descriptions=['desc'],
                  schema_description='schema description 1'),
            Table(id='snowflake://blue.test_schema/bitcoin_wallets',
                  cluster='blue',
                  column_names=['5', '6'],
                  database='snowflake',
                  schema='test_schema',
                  description='A table for lots of things!',
                  key='snowflake://blue.test_schema/bitcoin_wallets',
                  last_updated_timestamp=0,
                  name='bitcoin_wallets',
                  tags=[],
                  badges=self.mock_empty_badge,
                  schema_description='schema description 2',
                  programmatic_descriptions=["test"])
        ]
        expected_data = [{
            'index': {
                '_index': new_index_name,
                '_type': 'table',
                '_id': 'snowflake://blue.test_schema/bank_accounts'
            }
        }, {
            'id': 'snowflake://blue.test_schema/bank_accounts',
            'cluster': 'blue',
            'column_names': ['1', '2'],
            'column_descriptions': ['desc'],
            'database': 'snowflake',
            'schema': 'test_schema',
            'description': 'A table for something',
            'display_name': None,
            'key': 'snowflake://blue.test_schema/bank_accounts',
            'last_updated_timestamp': 0,
            'name': 'bank_accounts',
            'tags': [],
            'badges': [],
            'total_usage': 0,
            'programmatic_descriptions': None,
            'schema_description': 'schema description 1',
        }, {
            'index': {
                '_index': new_index_name,
                '_type': 'table',
                '_id': 'snowflake://blue.test_schema/bitcoin_wallets'
            }
        }, {
            'id': 'snowflake://blue.test_schema/bitcoin_wallets',
            'cluster': 'blue',
            'column_names': ['5', '6'],
            'column_descriptions': None,
            'database': 'snowflake',
            'schema': 'test_schema',
            'description': 'A table for lots of things!',
            'display_name': None,
            'key': 'snowflake://blue.test_schema/bitcoin_wallets',
            'last_updated_timestamp': 0,
            'name': 'bitcoin_wallets',
            'tags': [],
            'badges': [],
            'total_usage': 0,
            'schema_description': 'schema description 2',
            'programmatic_descriptions': ["test"]
        }]
        mock_elasticsearch.bulk.return_value = {'errors': False}

        expected_alias = 'table_search_index'
        result = self.es_proxy.create_document(data=start_data,
                                               index=expected_alias)
        self.assertEqual(expected_alias, result)
        mock_elasticsearch.bulk.assert_called_with(expected_data)
예제 #2
0
    def _prepare_tables(self,
                        response: EntityCollection,
                        enhance_metadata: bool = False) -> List[Table]:
        """
        Based on an Atlas {response} with table entities, we render Table objects.

        :param response: Collection of Atlas Entities
        :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not
        return all available attributes)
        :return: List of Table objects
        """

        result = list()

        # if condition is satisfied then we query Atlas again to collect all available information regarding each table
        # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal
        # amount of attributes.
        if enhance_metadata:
            ids = list()

            for hit in response:
                ids.append(hit.guid)

            entities = self._extract_entities(
                self.atlas.entity_bulk(guid=ids, ignoreRelationships=False))
        else:
            entities = response

        for entity in entities:
            entity_attrs = entity.attributes

            qn = parse_table_qualified_name(
                qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE))

            entity_name = qn.get('table_name') or entity_attrs.get('name')
            db_name = qn.get('db_name', '')
            db_cluster = qn.get('cluster_name', '')

            tags: List[Tag] = []

            for classification in entity.classificationNames or list():
                tags.append(Tag(tag_name=classification))

            badges: List[Tag] = tags

            table = Table(
                id=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
                name=entity_name,
                key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
                description=entity_attrs.get('description'),
                cluster=db_cluster,
                database=entity.typeName,
                schema=db_name,
                tags=tags,
                badges=badges,
                column_names=[],
                last_updated_timestamp=entity_attrs.get('updateTime'))

            result.append(table)

        return result
예제 #3
0
    def setUp(self) -> None:
        self.app = create_app(
            config_module_class='search_service.config.LocalConfig')
        self.app_context = self.app.app_context()
        self.app_context.push()

        mock_elasticsearch_client = MagicMock()
        self.es_proxy = ElasticsearchProxy(client=mock_elasticsearch_client)
        self.mock_badge = Tag(tag_name='name')
        self.mock_tag = Tag(tag_name='match')
        self.mock_empty_badge = []  # type: List[Tag]
        self.mock_empty_tag = []  # type: List[Tag]
        self.mock_result1 = MockSearchResult(
            name='test_table',
            key='test_key',
            description='test_description',
            cluster='gold',
            database='test_db',
            schema='test_schema',
            column_names=['test_col1', 'test_col2'],
            tags=self.mock_empty_tag,
            badges=self.mock_empty_badge,
            last_updated_timestamp=1527283287,
            programmatic_descriptions=[])

        self.mock_result2 = MockSearchResult(
            name='test_table2',
            key='test_key2',
            description='test_description2',
            cluster='gold',
            database='test_db2',
            schema='test_schema2',
            column_names=['test_col1', 'test_col2'],
            tags=self.mock_empty_tag,
            badges=self.mock_empty_badge,
            last_updated_timestamp=1527283287)

        self.mock_result3 = Table(id='test_key3',
                                  name='test_table3',
                                  key='test_key3',
                                  description='test_description3',
                                  cluster='gold',
                                  database='test_db3',
                                  schema='test_schema3',
                                  column_names=['test_col1', 'test_col2'],
                                  tags=[self.mock_tag],
                                  badges=[self.mock_badge],
                                  last_updated_timestamp=1527283287)

        self.mock_result4 = MockUserSearchResult(
            full_name='First Last',
            first_name='First',
            last_name='Last',
            team_name='Test team',
            email='*****@*****.**',
            github_username='******',
            manager_email='*****@*****.**',
            is_active=True,
            employee_type='FTE',
            role_name='swe',
            new_attr='aaa')

        self.mock_dashboard_result = Dashboard(
            id='mode_dashboard',
            uri='dashboard_uri',
            cluster='gold',
            group_name='mode_dashboard_group',
            group_url='mode_dashboard_group_url',
            product='mode',
            name='mode_dashboard',
            url='mode_dashboard_url',
            description='test_dashboard',
            last_successful_run_timestamp=1000)
예제 #4
0
    def _parse_results(self, response: EntityCollection) -> List[Table]:
        """
        based on an atlas {response} with table entities, we map the required information
        :return: list of tables
        """
        table_results = []
        ids = list()
        for hit in response:
            ids.append(hit.guid)
        # receive all entities
        entities = self._entities(self.atlas.entity_bulk(guid=ids))
        db_ids = []
        for entity in entities:
            relations = entity.relationshipAttributes
            database = relations.get(self.DB_ATTRIBUTE)
            if database:
                db_ids.append(database['guid'])

        # request databases
        dbs_list = self._entities(self.atlas.entity_bulk(
            guid=db_ids)) if len(db_ids) > 0 else []
        dbs_dict: Dict[str, Entity] = {db.guid: db for db in dbs_list}
        for entity in entities:
            relations = entity.relationshipAttributes
            attrs = entity.attributes
            database = relations.get(self.DB_ATTRIBUTE)
            if database and database['guid'] in dbs_dict:
                db_entity = dbs_dict[database['guid']]
                db_attrs = db_entity.attributes

                db_name = db_attrs.get(self.NAME_ATTRIBUTE)
                db_cluster = db_attrs.get("clusterName", "")
            else:
                db_cluster = ''
                db_name = ''

            tags = []
            # Using or in case, if the key 'classifications' is there with attrs None
            for classification in attrs.get("classifications") or list():
                tags.append(classification.get('typeName'))

            # TODO: Implement columns
            columns: List[str] = []
            # for column in attrs.get('columns') or list():
            #     col_entity = entity.referredEntities[column['guid']]
            #     col_attrs = col_entity['attributes']
            #     columns.append(col_attrs.get(self.NAME_KEY))
            table_name = attrs.get(self.NAME_ATTRIBUTE)
            table = Table(
                name=table_name,
                key=f"{entity.typeName}://{db_cluster}.{db_name}/{table_name}",
                description=attrs.get('description'),
                cluster=db_cluster,
                database=entity.typeName or 'Table',
                schema_name=db_name,
                column_names=columns,
                tags=tags,
                last_updated_epoch=attrs.get('updateTime'))

            table_results.append(table)

        return table_results