Esempio n. 1
0
    def test_get_readers(self) -> None:
        basic_search_result = MagicMock()
        basic_search_result.entities = self.reader_entities

        self.proxy._driver.search_basic.create = MagicMock(
            return_value=basic_search_result)

        entity_bulk_result = MagicMock()
        entity_bulk_result.entities = self.reader_entities
        self.proxy._driver.entity_bulk = MagicMock(
            return_value=[entity_bulk_result])

        res = self.proxy._get_readers('dummy', 1)

        expected: List[Reader] = []

        expected += [
            Reader(user=User(email='test_user_1', user_id='test_user_1'),
                   read_count=5)
        ]
        expected += [
            Reader(user=User(email='test_user_2', user_id='test_user_2'),
                   read_count=150)
        ]

        self.assertEqual(res, expected)
    def _get_readers(self, entity: AtlasEntityWithExtInfo, top: Optional[int] = 15) -> List[Reader]:
        _readers = entity.get('relationshipAttributes', dict()).get('readers', list())

        guids = [_reader.get('guid') for _reader in _readers
                 if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE
                 and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE]

        if not guids:
            return []

        readers = self.client.entity.get_entities_by_guids(guids=list(guids), ignore_relationships=False)

        _result = []

        for _reader in readers.entities or list():
            read_count = _reader.attributes['count']

            if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']):
                reader_qn = _reader.relationshipAttributes['user']['displayText']
                reader_details = self._get_user_details(reader_qn)
                reader = Reader(user=User(**reader_details), read_count=read_count)

                _result.append(reader)

        result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top]

        return result
Esempio n. 3
0
    def test_get_readers(self) -> None:
        entity_bulk_result = MagicMock()
        entity_bulk_result.entities = self.reader_entities
        self.proxy.client.entity.get_entities_by_guids = MagicMock(
            return_value=entity_bulk_result)

        res = self.proxy._get_readers(
            dict(relationshipAttributes=dict(readers=[
                dict(
                    guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE')
            ])), Reader, 1)
        expected_readers = [
            Reader(user=User(email='test_user_2', user_id='test_user_2'),
                   read_count=150)
        ]
        self.assertEqual(expected_readers, res)

        res = self.proxy._get_readers(
            dict(relationshipAttributes=dict(readers=[
                dict(
                    guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE')
            ])), User, 1)
        expected_users = [User(email='test_user_1', user_id='test_user_1')]
        self.assertEqual(expected_users, res)

        res = self.proxy._get_readers(
            dict(relationshipAttributes=dict(readers=[
                dict(
                    guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE')
            ])), 'WRONG_MODEL', 1)
        expected = []  # type: ignore
        self.assertEqual(expected, res)
Esempio n. 4
0
    def _get_readers(self,
                     qualified_name: str,
                     top: Optional[int] = 15) -> List[Reader]:
        params = {
            'typeName': self.READER_TYPE,
            'offset': '0',
            'limit': top,
            'excludeDeletedEntities': True,
            'entityFilters': {
                'condition':
                'AND',
                'criterion': [{
                    'attributeName':
                    self.QN_KEY,
                    'operator':
                    'STARTSWITH',
                    'attributeValue':
                    qualified_name.split('@')[0] + '.'
                }, {
                    'attributeName':
                    'count',
                    'operator':
                    'gte',
                    'attributeValue':
                    f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}'
                }]
            },
            'attributes': ['count', self.QN_KEY],
            'sortBy': 'count',
            'sortOrder': 'DESCENDING'
        }

        search_results = self._driver.search_basic.create(
            data=params, ignoreRelationships=False)

        readers = []

        for record in search_results.entities:
            readers.append(record.guid)

        results = []

        if readers:
            read_entities = extract_entities(
                self._driver.entity_bulk(guid=readers,
                                         ignoreRelationships=False))

            for read_entity in read_entities:
                reader_qn = read_entity.relationshipAttributes['user'][
                    'displayText']
                reader_details = self.user_detail_method(reader_qn) or {
                    'email': reader_qn,
                    'user_id': reader_qn
                }
                reader = Reader(user=User(**reader_details),
                                read_count=read_entity.attributes['count'])

                results.append(reader)

        return results
Esempio n. 5
0
    def test_get_readers(self) -> None:
        basic_search_result = MagicMock()
        basic_search_result.entities = self.reader_entities

        self.proxy._driver.search_basic.create = MagicMock(return_value=basic_search_result)

        entity_bulk_result = MagicMock()
        entity_bulk_result.entities = self.reader_entities
        self.proxy._driver.entity_bulk = MagicMock(return_value=[entity_bulk_result])

        res = self.proxy._get_readers(dict(relationshipAttributes=dict(readers=[dict(guid=1, entityStatus='ACTIVE',
                                                                                     relationshipStatus='ACTIVE')])),
                                      1)

        expected = [Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150)]

        self.assertEqual(expected, res)
    def _exec_usage_query(self, table_uri: str) -> List[Reader]:
        # Return Value: List[Reader]

        usage_query = textwrap.dedent("""\
        MATCH (user:User)-[read:READ]->(table:Table {key: $tbl_key})
        RETURN user.email as email, read.read_count as read_count, table.name as table_name
        ORDER BY read.read_count DESC LIMIT 5;
        """)

        usage_neo4j_records = self._execute_cypher_query(statement=usage_query,
                                                         param_dict={'tbl_key': table_uri})
        readers = []  # type: List[Reader]
        for usage_neo4j_record in usage_neo4j_records:
            reader = Reader(user=User(email=usage_neo4j_record['email']),
                            read_count=usage_neo4j_record['read_count'])
            readers.append(reader)

        return readers
Esempio n. 7
0
    def test_get_table(self, mock_rds_client: Any) -> None:
        database = RDSDatabase(name='hive')
        cluster = RDSCluster(name='gold')
        schema = RDSSchema(name='foo_schema')
        schema.cluster = cluster
        cluster.database = database

        table = RDSTable(name='foo_table')
        table.schema = schema
        table.description = RDSTableDescription(description='foo description')

        col1 = RDSColumn(name='bar_id_1', type='varchar', sort_order=0)
        col1.description = RDSColumnDescription(
            description='bar col description')
        col1.stats = [
            RDSColumnStat(stat_type='avg',
                          start_epoch='1',
                          end_epoch='1',
                          stat_val='1')
        ]

        col2 = RDSColumn(name='bar_id_2', type='bigint', sort_order=1)
        col2.description = RDSColumnDescription(
            description='bar col2 description')
        col2.stats = [
            RDSColumnStat(stat_type='avg',
                          start_epoch='2',
                          end_epoch='2',
                          stat_val='2')
        ]
        col2.badges = [RDSBadge(rk='primary key', category='column')]
        columns = [col1, col2]

        table.watermarks = [
            RDSTableWatermark(
                rk='hive://gold.test_schema/test_table/high_watermark/',
                partition_key='ds',
                partition_value='fake_value',
                create_time='fake_time'),
            RDSTableWatermark(
                rk='hive://gold.test_schema/test_table/low_watermark/',
                partition_key='ds',
                partition_value='fake_value',
                create_time='fake_time')
        ]

        table.application = RDSApplication(
            application_url='airflow_host/admin/airflow/tree?dag_id=test_table',
            description='DAG generating a table',
            name='Airflow',
            id='dag/task_id')
        table.timestamp = RDSTableTimestamp(last_updated_timestamp=1)

        table.owners = [
            RDSUser(rk='*****@*****.**', email='*****@*****.**')
        ]
        table.tags = [RDSTag(rk='test', tag_type='default')]
        table.badges = [RDSBadge(rk='golden', category='table_status')]
        table.source = RDSTableSource(rk='some key',
                                      source_type='github',
                                      source='/source_file_loc')
        table.programmatic_descriptions = [
            RDSTableProgrammaticDescription(description_source='s3_crawler',
                                            description='Test Test Test'),
            RDSTableProgrammaticDescription(
                description_source='quality_report', description='Test Test')
        ]

        readers = [RDSTableUsage(user_rk='*****@*****.**', read_count=5)]

        mock_client = MagicMock()
        mock_rds_client.return_value = mock_client

        mock_create_session = MagicMock()
        mock_client.create_session.return_value = mock_create_session

        mock_session = MagicMock()
        mock_create_session.__enter__.return_value = mock_session

        mock_session_query = MagicMock()
        mock_session.query.return_value = mock_session_query

        mock_session_query_filter = MagicMock()
        mock_session_query.filter.return_value = mock_session_query_filter
        mock_session_query_filter.first.return_value = table

        mock_session_query_filter_orderby = MagicMock()
        mock_session_query_filter.order_by.return_value = mock_session_query_filter_orderby

        mock_session_query_filter_orderby_limit = MagicMock()
        mock_session_query_filter_orderby.limit.return_value = mock_session_query_filter_orderby_limit
        mock_session_query_filter_orderby_limit.all.return_value = readers

        mock_session_query_filter_options = MagicMock()
        mock_session_query_filter.options.return_value = mock_session_query_filter_options
        mock_session_query_filter_options.all.return_value = columns

        proxy = MySQLProxy()
        actual_table = proxy.get_table(table_uri='dummy_uri')

        expected = Table(
            database='hive',
            cluster='gold',
            schema='foo_schema',
            name='foo_table',
            tags=[Tag(tag_name='test', tag_type='default')],
            badges=[Badge(badge_name='golden', category='table_status')],
            table_readers=[
                Reader(user=User(email='*****@*****.**'), read_count=5)
            ],
            description='foo description',
            watermarks=[
                Watermark(watermark_type='high_watermark',
                          partition_key='ds',
                          partition_value='fake_value',
                          create_time='fake_time'),
                Watermark(watermark_type='low_watermark',
                          partition_key='ds',
                          partition_value='fake_value',
                          create_time='fake_time')
            ],
            columns=[
                Column(name='bar_id_1',
                       description='bar col description',
                       col_type='varchar',
                       sort_order=0,
                       stats=[
                           Stat(start_epoch=1,
                                end_epoch=1,
                                stat_type='avg',
                                stat_val='1')
                       ],
                       badges=[]),
                Column(name='bar_id_2',
                       description='bar col2 description',
                       col_type='bigint',
                       sort_order=1,
                       stats=[
                           Stat(start_epoch=2,
                                end_epoch=2,
                                stat_type='avg',
                                stat_val='2')
                       ],
                       badges=[
                           Badge(badge_name='primary key', category='column')
                       ])
            ],
            owners=[User(email='*****@*****.**')],
            table_writer=Application(
                application_url=
                'airflow_host/admin/airflow/tree?dag_id=test_table',
                description='DAG generating a table',
                name='Airflow',
                id='dag/task_id'),
            last_updated_timestamp=1,
            source=Source(source='/source_file_loc', source_type='github'),
            is_view=False,
            programmatic_descriptions=[
                ProgrammaticDescription(source='quality_report',
                                        text='Test Test'),
                ProgrammaticDescription(source='s3_crawler',
                                        text='Test Test Test')
            ])

        self.assertEqual(str(expected), str(actual_table))