def test_get_readers(self) -> None: entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy.client.entity.get_entities_by_guids = MagicMock( return_value=entity_bulk_result) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), Reader, 1) expected_readers = [ Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150) ] self.assertEqual(expected_readers, res) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), User, 1) expected_users = [User(email='test_user_1', user_id='test_user_1')] self.assertEqual(expected_users, res) res = self.proxy._get_readers( dict(relationshipAttributes=dict(readers=[ dict( guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE') ])), 'WRONG_MODEL', 1) expected = [] # type: ignore self.assertEqual(expected, res)
def test_get_readers(self) -> None: basic_search_result = MagicMock() basic_search_result.entities = self.reader_entities self.proxy._driver.search_basic.create = MagicMock( return_value=basic_search_result) entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy._driver.entity_bulk = MagicMock( return_value=[entity_bulk_result]) res = self.proxy._get_readers('dummy', 1) expected: List[Reader] = [] expected += [ Reader(user=User(email='test_user_1', user_id='test_user_1'), read_count=5) ] expected += [ Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150) ] self.assertEqual(res, expected)
def _get_table(self, custom_stats_format: bool = False) -> None: if custom_stats_format: test_exp_col = self.test_exp_col_stats_formatted else: test_exp_col = self.test_exp_col_stats_raw ent_attrs = cast(dict, self.entity1['attributes']) self._mock_get_table_entity() self._create_mocked_report_entities_collection() self.proxy._get_owners = MagicMock( return_value=[User(email=ent_attrs['owner'])]) # type: ignore self.proxy._driver.entity_bulk = MagicMock( return_value=self.report_entity_collection) response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0][ 'typeName'] col_attrs = cast(dict, self.test_column['attributes']) exp_col_stats = list() for stats in test_exp_col: exp_col_stats.append( Stat( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], )) exp_col = Column(name=col_attrs['name'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table( database=self.entity_type, cluster=self.cluster, schema=self.db, name=ent_attrs['name'], tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], resource_reports=[ ResourceReport(name='test_report', url='http://test'), ResourceReport(name='test_report3', url='http://test3') ], last_updated_timestamp=int(str(self.entity1['updateTime'])[:10]), columns=[exp_col] * self.active_columns, programmatic_descriptions=[ ProgrammaticDescription(source='test parameter key a', text='testParameterValueA'), ProgrammaticDescription(source='test parameter key b', text='testParameterValueB') ], is_view=False) self.assertEqual(str(expected), str(response))
def _get_owners(self, data_owners: list, fallback_owner: str) -> List[User]: owners_detail = list() active_owners = filter( lambda item: item['entityStatus'] == Status.ACTIVE and item[ 'relationshipStatus'] == Status.ACTIVE, data_owners) for owner in active_owners: owner_qn = owner['displayText'] owner_data = self._get_user_details(owner_qn) owners_detail.append(User(**owner_data)) return owners_detail or [ User(email=fallback_owner, user_id=fallback_owner) ]
def _get_readers(self, entity: AtlasEntityWithExtInfo, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = self.client.entity.get_entities_by_guids(guids=list(guids), ignore_relationships=False) _result = [] for _reader in readers.entities or list(): read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result
def _get_readers(self, qualified_name: str, top: Optional[int] = 15) -> List[Reader]: params = { 'typeName': self.READER_TYPE, 'offset': '0', 'limit': top, 'excludeDeletedEntities': True, 'entityFilters': { 'condition': 'AND', 'criterion': [{ 'attributeName': self.QN_KEY, 'operator': 'STARTSWITH', 'attributeValue': qualified_name.split('@')[0] + '.' }, { 'attributeName': 'count', 'operator': 'gte', 'attributeValue': f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}' }] }, 'attributes': ['count', self.QN_KEY], 'sortBy': 'count', 'sortOrder': 'DESCENDING' } search_results = self._driver.search_basic.create( data=params, ignoreRelationships=False) readers = [] for record in search_results.entities: readers.append(record.guid) results = [] if readers: read_entities = extract_entities( self._driver.entity_bulk(guid=readers, ignoreRelationships=False)) for read_entity in read_entities: reader_qn = read_entity.relationshipAttributes['user'][ 'displayText'] reader_details = self.user_detail_method(reader_qn) or { 'email': reader_qn, 'user_id': reader_qn } reader = Reader(user=User(**reader_details), read_count=read_entity.attributes['count']) results.append(reader) return results
def test_get_table(self) -> None: self._mock_get_table_entity() response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0]['typeName'] ent_attrs = cast(dict, self.entity1['attributes']) col_attrs = cast(dict, self.test_column['attributes']) col_metadata_attrs = cast(dict, self.column_metadata_entity['attributes']) exp_col_stats = list() for stats in col_metadata_attrs['statistics']: exp_col_stats.append( Statistics( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], ) ) exp_col = Column(name=col_attrs['name'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table(database=self.entity_type, cluster=self.cluster, schema=self.db, name=ent_attrs['name'], tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], columns=[exp_col], last_updated_timestamp=cast(int, self.entity1['updateTime'])) self.assertEqual(str(expected), str(response))
def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters')) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], resource_reports=self._get_reports(guids=reports_guids), columns=columns, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri))
def _get_owners(self, data_owners: list, fallback_owner: str = None) -> List[User]: owners_detail = list() active_owners_list = list() active_owners = filter(lambda item: item['entityStatus'] == Status.ACTIVE and item['relationshipStatus'] == Status.ACTIVE, data_owners) for owner in active_owners: owner_qn = owner['displayText'] owner_data = self._get_user_details(owner_qn) owners_detail.append(User(**owner_data)) active_owners_list.append(owner_qn) # To avoid the duplication, # we are checking if the fallback is not in data_owners if fallback_owner and (fallback_owner not in active_owners_list): owners_detail.append(User(**self._get_user_details(fallback_owner))) return owners_detail
def test_get_owners_details_only_fallback(self) -> None: self.app.config['USER_DETAIL_METHOD'] = None user_id = "*****@*****.**" res = self.proxy._get_owners(data_owners=list(), fallback_owner=user_id) self.assertEqual(1, len(res)) self.assertListEqual(res, [User(**{ 'email': user_id, 'user_id': user_id })])
def test_get_table_view_only(self) -> None: col_usage_return_value = copy.deepcopy(self.col_usage_return_value) for col in col_usage_return_value: col['tbl']['is_view'] = True with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [col_usage_return_value, [], self.table_level_return_value] neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table(database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[], description='foo description', watermarks=[Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time')], columns=[Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1')], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2')], badges=[Badge(badge_name='primary key', category='column')])], owners=[User(email='*****@*****.**')], table_writer=Application(application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=True, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(table))
def test_get_readers(self) -> None: basic_search_result = MagicMock() basic_search_result.entities = self.reader_entities self.proxy._driver.search_basic.create = MagicMock(return_value=basic_search_result) entity_bulk_result = MagicMock() entity_bulk_result.entities = self.reader_entities self.proxy._driver.entity_bulk = MagicMock(return_value=[entity_bulk_result]) res = self.proxy._get_readers(dict(relationshipAttributes=dict(readers=[dict(guid=1, entityStatus='ACTIVE', relationshipStatus='ACTIVE')])), 1) expected = [Reader(user=User(email='test_user_2', user_id='test_user_2'), read_count=150)] self.assertEqual(expected, res)
def _exec_usage_query(self, table_uri: str) -> List[Reader]: # Return Value: List[Reader] usage_query = textwrap.dedent("""\ MATCH (user:User)-[read:READ]->(table:Table {key: $tbl_key}) RETURN user.email as email, read.read_count as read_count, table.name as table_name ORDER BY read.read_count DESC LIMIT 5; """) usage_neo4j_records = self._execute_cypher_query(statement=usage_query, param_dict={'tbl_key': table_uri}) readers = [] # type: List[Reader] for usage_neo4j_record in usage_neo4j_records: reader = Reader(user=User(email=usage_neo4j_record['email']), read_count=usage_neo4j_record['read_count']) readers.append(reader) return readers
def test_get_dashboard(self) -> None: self.proxy.client.entity.get_entity_by_attribute = MagicMock( return_value=self.dashboard_data) # type: ignore self.proxy._get_dashboard_group = MagicMock( return_value=self.dashboard_group_data) # type: ignore self.proxy.client.entity.get_entities_by_guids = MagicMock( return_value=DottedDict({'entities': [DottedDict(self.entity1)]})) expected = DashboardDetail( uri='superset_dashboard://datalab.prod/1', cluster='datalab', group_name='prod superset', group_url='https://superset.prod', product='superset', name='Prod Usage', url='https://prod.superset/dashboards/1', description='Robs famous dashboard', created_timestamp=1619517099, updated_timestamp=1619626531, last_successful_run_timestamp=1619517099, last_run_timestamp=1619517150, last_run_state='failed', owners=[ User(user_id='lisa_salinas', email='lisa_salinas', first_name=None, last_name=None, full_name=None, display_name=None, is_active=True, github_username=None, team_name=None, slack_id=None, employee_type=None, manager_fullname=None, manager_email=None, manager_id=None, role_name=None, profile_url=None, other_key_values={}) ], frequent_users=[], chart_names=['Count Users by Time', 'Total Count'], query_names=['User Count By Time', 'Total Count'], queries=[ DashboardQuery( name='User Count By Time', url='https://prod.superset/dashboards/1/query/1', query_text='SELECT date, COUNT(1) FROM db.table GROUP BY 1' ), DashboardQuery( name='Total Count', url='https://prod.superset/dashboards/1/query/2', query_text='SELECT COUNT(1) FROM db.table') ], tables=[ PopularTable(database='hive_table', cluster='TEST_CLUSTER', schema='TEST_DB', name='Table1', description='Dummy Description') ], tags=[], badges=[], recent_view_count=0) result = self.proxy.get_dashboard( id='superset_dashboard://datalab.prod/1') self.assertEqual(expected, result)
def test_get_dashboard(self) -> None: with patch.object(GraphDatabase, 'driver'), patch.object( Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.return_value.single.side_effect = [{ 'cluster_name': 'cluster_name', 'uri': 'foo_dashboard://gold.bar/dashboard_id', 'url': 'http://www.foo.bar/dashboard_id', 'product': 'foobar', 'name': 'dashboard name', 'created_timestamp': 123456789, 'description': 'description', 'group_name': 'group_name', 'group_url': 'http://www.group_url.com', 'last_successful_run_timestamp': 9876543210, 'last_run_timestamp': 987654321, 'last_run_state': 'good_state', 'updated_timestamp': 123456654321, 'recent_view_count': 100, 'owners': [{ 'employee_type': 'teamMember', 'full_name': 'test_full_name', 'is_active': 'True', 'github_username': '******', 'slack_id': 'test_id', 'last_name': 'test_last_name', 'first_name': 'test_first_name', 'team_name': 'test_team', 'email': 'test_email', }, { 'employee_type': 'teamMember', 'full_name': 'test_full_name2', 'is_active': 'True', 'github_username': '******', 'slack_id': 'test_id2', 'last_name': 'test_last_name2', 'first_name': 'test_first_name2', 'team_name': 'test_team2', 'email': 'test_email2', }], 'tags': [{ 'key': 'tag_key1', 'tag_type': 'tag_type1' }, { 'key': 'tag_key2', 'tag_type': 'tag_type2' }], 'charts': [{ 'name': 'chart1' }, { 'name': 'chart2' }], 'queries': [{ 'name': 'query1' }, { 'name': 'query2' }], 'tables': [{ 'database': 'db1', 'name': 'table1', 'description': 'table description 1', 'cluster': 'cluster1', 'schema': 'schema1' }, { 'database': 'db2', 'name': 'table2', 'description': None, 'cluster': 'cluster2', 'schema': 'schema2' }] }, { 'cluster_name': 'cluster_name', 'uri': 'foo_dashboard://gold.bar/dashboard_id', 'url': 'http://www.foo.bar/dashboard_id', 'product': 'foobar', 'name': 'dashboard name', 'created_timestamp': 123456789, 'description': None, 'group_name': 'group_name', 'group_url': 'http://www.group_url.com', 'last_run_timestamp': None, 'last_run_state': None, 'updated_timestamp': None, 'recent_view_count': 0, 'owners': [], 'tags': [], 'charts': [], 'queries': [], 'tables': [] }] neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) dashboard = neo4j_proxy.get_dashboard(id='dashboard_id') expected = DashboardDetail( uri='foo_dashboard://gold.bar/dashboard_id', cluster='cluster_name', group_name='group_name', group_url='http://www.group_url.com', product='foobar', name='dashboard name', url='http://www.foo.bar/dashboard_id', description='description', created_timestamp=123456789, last_successful_run_timestamp=9876543210, updated_timestamp=123456654321, last_run_timestamp=987654321, last_run_state='good_state', owners=[ User(email='test_email', first_name='test_first_name', last_name='test_last_name', full_name='test_full_name', is_active='True', github_username='******', team_name='test_team', slack_id='test_id', employee_type='teamMember', manager_fullname=''), User(email='test_email2', first_name='test_first_name2', last_name='test_last_name2', full_name='test_full_name2', is_active='True', github_username='******', team_name='test_team2', slack_id='test_id2', employee_type='teamMember', manager_fullname='') ], frequent_users=[], chart_names=['chart1', 'chart2'], query_names=['query1', 'query2'], tables=[ PopularTable(database='db1', name='table1', description='table description 1', cluster='cluster1', schema='schema1'), PopularTable(database='db2', name='table2', cluster='cluster2', schema='schema2'), ], tags=[ Tag(tag_type='tag_type1', tag_name='tag_key1'), Tag(tag_type='tag_type2', tag_name='tag_key2') ], recent_view_count=100) self.assertEqual(expected, dashboard) dashboard2 = neo4j_proxy.get_dashboard(id='dashboard_id') expected2 = DashboardDetail( uri='foo_dashboard://gold.bar/dashboard_id', cluster='cluster_name', group_name='group_name', group_url='http://www.group_url.com', product='foobar', name='dashboard name', url='http://www.foo.bar/dashboard_id', description=None, created_timestamp=123456789, updated_timestamp=None, last_run_timestamp=None, last_run_state=None, owners=[], frequent_users=[], chart_names=[], query_names=[], tables=[], tags=[], last_successful_run_timestamp=None, recent_view_count=0) self.assertEqual(expected2, dashboard2)
def test_get_table(self) -> None: with patch.object(GraphDatabase, 'driver'), patch.object( Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [ self.col_usage_return_value, [], self.table_level_return_value ] neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Tag(tag_name='golden', tag_type='badge')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False) self.assertEqual(str(expected), str(table))
def test_get_dashboard(self, mock_rds_client: Any) -> None: # dashboard_metadata dashboard = RDSDashboard( rk='foo_dashboard://gold.bar/dashboard_id', name='dashboard name', dashboard_url='http://www.foo.bar/dashboard_id', created_timestamp=123456789) dashboard_group = RDSDashboardGroup( name='group_name', dashboard_group_url='http://www.group_url.com') dashboard_group.cluster = RDSCluster(name='cluster_name') dashboard.group = dashboard_group dashboard.description = RDSDashboardDescription( description='description') dashboard.execution = [ RDSDashboardExecution(rk='dashboard_last_successful_execution', timestamp=9876543210), RDSDashboardExecution(rk='dashboard_last_execution', timestamp=987654321, state='good_state') ] dashboard.timestamp = RDSDashboardTimestamp(timestamp=123456654321) dashboard.tags = [ RDSTag(rk='tag_key1', tag_type='default'), RDSTag(rk='tag_key2', tag_type='default') ] dashboard.badges = [RDSBadge(rk='golden', category='table_status')] dashboard.owners = [ RDSUser(email='test_email', first_name='test_first_name', last_name='test_last_name', full_name='test_full_name', is_active=True, github_username='******', team_name='test_team', slack_id='test_id', employee_type='teamMember'), RDSUser(email='test_email2', first_name='test_first_name2', last_name='test_last_name2', full_name='test_full_name2', is_active=True, github_username='******', team_name='test_team2', slack_id='test_id2', employee_type='teamMember') ] dashboard.usage = [RDSDashboardUsage(read_count=100)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = dashboard # queries query1 = RDSDashboardQuery(name='query1') query2 = RDSDashboardQuery(name='query2', url='http://foo.bar/query', query_text='SELECT * FROM foo.bar') query1.charts = [RDSDashboardChart(name='chart1')] query2.charts = [RDSDashboardChart(name='chart2')] queries = [query1, query2] # tables database1 = RDSDatabase(name='db1') database2 = RDSDatabase(name='db2') cluster1 = RDSCluster(name='cluster1') cluster2 = RDSCluster(name='cluster2') schema1 = RDSSchema(name='schema1') schema2 = RDSSchema(name='schema2') table1 = RDSTable(name='table1') table2 = RDSTable(name='table2') description1 = RDSTableDescription(description='table description 1') schema1.cluster = cluster1 cluster1.database = database1 schema2.cluster = cluster2 cluster2.database = database2 table1.schema = schema1 table2.schema = schema2 table1.description = description1 tables = [table1, table2] mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.side_effect = [queries, tables] expected = DashboardDetail( uri='foo_dashboard://gold.bar/dashboard_id', cluster='cluster_name', group_name='group_name', group_url='http://www.group_url.com', product='foo', name='dashboard name', url='http://www.foo.bar/dashboard_id', description='description', created_timestamp=123456789, last_successful_run_timestamp=9876543210, updated_timestamp=123456654321, last_run_timestamp=987654321, last_run_state='good_state', owners=[ User(email='test_email', first_name='test_first_name', last_name='test_last_name', full_name='test_full_name', is_active=True, github_username='******', team_name='test_team', slack_id='test_id', employee_type='teamMember', manager_fullname=''), User(email='test_email2', first_name='test_first_name2', last_name='test_last_name2', full_name='test_full_name2', is_active=True, github_username='******', team_name='test_team2', slack_id='test_id2', employee_type='teamMember', manager_fullname='') ], frequent_users=[], chart_names=['chart1', 'chart2'], query_names=['query1', 'query2'], queries=[ DashboardQuery(name='query1'), DashboardQuery(name='query2', url='http://foo.bar/query', query_text='SELECT * FROM foo.bar') ], tables=[ PopularTable(database='db1', name='table1', description='table description 1', cluster='cluster1', schema='schema1'), PopularTable(database='db2', name='table2', cluster='cluster2', schema='schema2'), ], tags=[ Tag(tag_type='default', tag_name='tag_key1'), Tag(tag_type='default', tag_name='tag_key2') ], badges=[Badge(badge_name='golden', category='table_status')], recent_view_count=100) proxy = MySQLProxy() actual = proxy.get_dashboard(id='dashboard_id') self.assertEqual(expected, actual)
def test_get_table(self, mock_rds_client: Any) -> None: database = RDSDatabase(name='hive') cluster = RDSCluster(name='gold') schema = RDSSchema(name='foo_schema') schema.cluster = cluster cluster.database = database table = RDSTable(name='foo_table') table.schema = schema table.description = RDSTableDescription(description='foo description') col1 = RDSColumn(name='bar_id_1', type='varchar', sort_order=0) col1.description = RDSColumnDescription( description='bar col description') col1.stats = [ RDSColumnStat(stat_type='avg', start_epoch='1', end_epoch='1', stat_val='1') ] col2 = RDSColumn(name='bar_id_2', type='bigint', sort_order=1) col2.description = RDSColumnDescription( description='bar col2 description') col2.stats = [ RDSColumnStat(stat_type='avg', start_epoch='2', end_epoch='2', stat_val='2') ] col2.badges = [RDSBadge(rk='primary key', category='column')] columns = [col1, col2] table.watermarks = [ RDSTableWatermark( rk='hive://gold.test_schema/test_table/high_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time'), RDSTableWatermark( rk='hive://gold.test_schema/test_table/low_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time') ] table.application = RDSApplication( application_url='airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id') table.timestamp = RDSTableTimestamp(last_updated_timestamp=1) table.owners = [ RDSUser(rk='*****@*****.**', email='*****@*****.**') ] table.tags = [RDSTag(rk='test', tag_type='default')] table.badges = [RDSBadge(rk='golden', category='table_status')] table.source = RDSTableSource(rk='some key', source_type='github', source='/source_file_loc') table.programmatic_descriptions = [ RDSTableProgrammaticDescription(description_source='s3_crawler', description='Test Test Test'), RDSTableProgrammaticDescription( description_source='quality_report', description='Test Test') ] readers = [RDSTableUsage(user_rk='*****@*****.**', read_count=5)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = table mock_session_query_filter_orderby = MagicMock() mock_session_query_filter.order_by.return_value = mock_session_query_filter_orderby mock_session_query_filter_orderby_limit = MagicMock() mock_session_query_filter_orderby.limit.return_value = mock_session_query_filter_orderby_limit mock_session_query_filter_orderby_limit.all.return_value = readers mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.return_value = columns proxy = MySQLProxy() actual_table = proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[ Reader(user=User(email='*****@*****.**'), read_count=5) ], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ], badges=[ Badge(badge_name='primary key', category='column') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url= 'airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id'), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(actual_table))
def _exec_table_query(self, table_uri: str) -> Tuple: """ Queries one Cypher record with watermark list, Application, ,timestamp, owner records and tag records. """ # Return Value: (Watermark Results, Table Writer, Last Updated Timestamp, owner records, tag records) table_level_query = textwrap.dedent("""\ MATCH (tbl:Table {key: $tbl_key}) OPTIONAL MATCH (wmk:Watermark)-[:BELONG_TO_TABLE]->(tbl) OPTIONAL MATCH (application:Application)-[:GENERATES]->(tbl) OPTIONAL MATCH (tbl)-[:LAST_UPDATED_AT]->(t:Timestamp) OPTIONAL MATCH (owner:User)<-[:OWNER]-(tbl) OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(tag:Tag{tag_type: $tag_normal_type}) OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(badge:Tag{tag_type: $tag_badge_type}) OPTIONAL MATCH (tbl)-[:SOURCE]->(src:Source) OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(prog_descriptions:Programmatic_Description) RETURN collect(distinct wmk) as wmk_records, application, t.last_updated_timestamp as last_updated_timestamp, collect(distinct owner) as owner_records, collect(distinct tag) as tag_records, collect(distinct badge) as badge_records, src, collect(distinct prog_descriptions) as prog_descriptions """) table_records = self._execute_cypher_query(statement=table_level_query, param_dict={'tbl_key': table_uri, 'tag_normal_type': 'default', 'tag_badge_type': 'badge'}) table_records = table_records.single() wmk_results = [] table_writer = None wmk_records = table_records['wmk_records'] for record in wmk_records: if record['key'] is not None: watermark_type = record['key'].split('/')[-2] wmk_result = Watermark(watermark_type=watermark_type, partition_key=record['partition_key'], partition_value=record['partition_value'], create_time=record['create_time']) wmk_results.append(wmk_result) tags = [] if table_records.get('tag_records'): tag_records = table_records['tag_records'] for record in tag_records: tag_result = Tag(tag_name=record['key'], tag_type=record['tag_type']) tags.append(tag_result) badges = [] if table_records.get('badge_records'): badge_records = table_records['badge_records'] for record in badge_records: badge_result = Tag(tag_name=record['key'], tag_type=record['tag_type']) badges.append(badge_result) application_record = table_records['application'] if application_record is not None: table_writer = Application( application_url=application_record['application_url'], description=application_record['description'], name=application_record['name'], id=application_record.get('id', '') ) timestamp_value = table_records['last_updated_timestamp'] owner_record = [] for owner in table_records.get('owner_records', []): owner_record.append(User(email=owner['email'])) src = None if table_records['src']: src = Source(source_type=table_records['src']['source_type'], source=table_records['src']['source']) prog_descriptions = self._extract_programmatic_descriptions_from_query( table_records.get('prog_descriptions', []) ) return wmk_results, table_writer, timestamp_value, owner_record, tags, src, badges, prog_descriptions