def test_get_readers(self) -> None: basic_search_result = MagicMock() basic_search_result.entities = self.reader_entities self.proxy._driver.search_basic.create = MagicMock( return_value=basic_search_result) entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy._driver.entity_bulk = MagicMock( return_value=[entity_bulk_result]) res = self.proxy._get_readers('dummy', 1) expected: List[Reader] = [] expected += [ Reader(user=User(email='test_user_1', user_id='test_user_1'), read_count=5) ] expected += [ Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150) ] self.assertEqual(res, expected)
def _get_readers(self, entity: AtlasEntityWithExtInfo, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = self.client.entity.get_entities_by_guids(guids=list(guids), ignore_relationships=False) _result = [] for _reader in readers.entities or list(): read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result
def test_get_readers(self) -> None: entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy.client.entity.get_entities_by_guids = MagicMock( return_value=entity_bulk_result) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), Reader, 1) expected_readers = [ Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150) ] self.assertEqual(expected_readers, res) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), User, 1) expected_users = [User(email='test_user_1', user_id='test_user_1')] self.assertEqual(expected_users, res) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), 'WRONG_MODEL', 1) expected = [] # type: ignore self.assertEqual(expected, res)
def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader_qn = read_entity.relationshipAttributes['user'][ 'displayText'] reader_details = self.user_detail_method(reader_qn) or { 'email': reader_qn, 'user_id': reader_qn } reader = Reader(user=User(**reader_details), read_count=read_entity.attributes['count']) results.append(reader) return results
def test_get_readers(self) -> None: basic_search_result = MagicMock() basic_search_result.entities = self.reader_entities self.proxy._driver.search_basic.create = MagicMock(return_value=basic_search_result) entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy._driver.entity_bulk = MagicMock(return_value=[entity_bulk_result]) res = self.proxy._get_readers(dict(relationshipAttributes=dict(readers=[dict(guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE')])), 1) expected = [Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150)] self.assertEqual(expected, res)
def _exec_usage_query(self, table_uri: str) -> List[Reader]: # Return Value: List[Reader] usage_query = textwrap.dedent("""\ MATCH (user:User)-[read:READ]->(table:Table {key: $tbl_key}) RETURN user.email as email, read.read_count as read_count, table.name as table_name ORDER BY read.read_count DESC LIMIT 5; """) usage_neo4j_records = self._execute_cypher_query(statement=usage_query, param_dict={'tbl_key': table_uri}) readers = [] # type: List[Reader] for usage_neo4j_record in usage_neo4j_records: reader = Reader(user=User(email=usage_neo4j_record['email']), read_count=usage_neo4j_record['read_count']) readers.append(reader) return readers
def test_get_table(self, mock_rds_client: Any) -> None: database = RDSDatabase(name='hive') cluster = RDSCluster(name='gold') schema = RDSSchema(name='foo_schema') schema.cluster = cluster cluster.database = database table = RDSTable(name='foo_table') table.schema = schema table.description = RDSTableDescription(description='foo description') col1 = RDSColumn(name='bar_id_1', type='varchar', sort_order=0) col1.description = RDSColumnDescription( description='bar col description') col1.stats = [ RDSColumnStat(stat_type='avg', start_epoch='1', end_epoch='1', stat_val='1') ] col2 = RDSColumn(name='bar_id_2', type='bigint', sort_order=1) col2.description = RDSColumnDescription( description='bar col2 description') col2.stats = [ RDSColumnStat(stat_type='avg', start_epoch='2', end_epoch='2', stat_val='2') ] col2.badges = [RDSBadge(rk='primary key', category='column')] columns = [col1, col2] table.watermarks = [ RDSTableWatermark( rk='hive://gold.test_schema/test_table/high_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time'), RDSTableWatermark( rk='hive://gold.test_schema/test_table/low_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time') ] table.application = RDSApplication( application_url='airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id') table.timestamp = RDSTableTimestamp(last_updated_timestamp=1) table.owners = [ RDSUser(rk='*****@*****.**', email='*****@*****.**') ] table.tags = [RDSTag(rk='test', tag_type='default')] table.badges = [RDSBadge(rk='golden', category='table_status')] table.source = RDSTableSource(rk='some key', source_type='github', source='/source_file_loc') table.programmatic_descriptions = [ RDSTableProgrammaticDescription(description_source='s3_crawler', description='Test Test Test'), RDSTableProgrammaticDescription( description_source='quality_report', description='Test Test') ] readers = [RDSTableUsage(user_rk='*****@*****.**', read_count=5)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = table mock_session_query_filter_orderby = MagicMock() mock_session_query_filter.order_by.return_value = mock_session_query_filter_orderby mock_session_query_filter_orderby_limit = MagicMock() mock_session_query_filter_orderby.limit.return_value = mock_session_query_filter_orderby_limit mock_session_query_filter_orderby_limit.all.return_value = readers mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.return_value = columns proxy = MySQLProxy() actual_table = proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[ Reader(user=User(email='*****@*****.**'), read_count=5) ], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ], badges=[ Badge(badge_name='primary key', category='column') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url= 'airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id'), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(actual_table))