def test_get_table(self) -> None: self._mock_get_table_entity() response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0]['typeName'] ent_attrs = cast(dict, self.entity1['attributes']) col_attrs = cast(dict, self.test_column['attributes']) col_metadata_attrs = cast(dict, self.column_metadata_entity['attributes']) exp_col_stats = list() for stats in col_metadata_attrs['statistics']: exp_col_stats.append( Statistics( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], ) ) exp_col = Column(name=col_attrs['name'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table(database=self.entity_type, cluster=self.cluster, schema=self.db, name=ent_attrs['name'], tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], columns=[exp_col], last_updated_timestamp=cast(int, self.entity1['updateTime'])) self.assertEqual(str(expected), str(response))
def get_table(self, *, table_uri: str) -> Table: """ :param table_uri: Table URI :return: A Table object """ cols, last_neo4j_record = self._exec_col_query(table_uri) readers = self._exec_usage_query(table_uri) wmk_results, table_writer, timestamp_value, owners, tags, source, badges, prog_descs = \ self._exec_table_query(table_uri) table = Table(database=last_neo4j_record['db']['name'], cluster=last_neo4j_record['clstr']['name'], schema=last_neo4j_record['schema']['name'], name=last_neo4j_record['tbl']['name'], tags=tags, badges=badges, description=self._safe_get(last_neo4j_record, 'tbl_dscrpt', 'description'), columns=cols, owners=owners, table_readers=readers, watermarks=wmk_results, table_writer=table_writer, last_updated_timestamp=timestamp_value, source=source, is_view=self._safe_get(last_neo4j_record, 'tbl', 'is_view'), programmatic_descriptions=prog_descs ) return table
def _get_table(self, custom_stats_format: bool = False) -> None: if custom_stats_format: test_exp_col = self.test_exp_col_stats_formatted else: test_exp_col = self.test_exp_col_stats_raw ent_attrs = cast(dict, self.entity1['attributes']) self._mock_get_table_entity() self._create_mocked_report_entities_collection() self.proxy._get_owners = MagicMock( return_value=[User(email=ent_attrs['owner'])]) # type: ignore self.proxy._driver.entity_bulk = MagicMock( return_value=self.report_entity_collection) response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0][ 'typeName'] col_attrs = cast(dict, self.test_column['attributes']) exp_col_stats = list() for stats in test_exp_col: exp_col_stats.append( Stat( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], )) exp_col = Column(name=col_attrs['name'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table( database=self.entity_type, cluster=self.cluster, schema=self.db, name=ent_attrs['name'], tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], resource_reports=[ ResourceReport(name='test_report', url='http://test'), ResourceReport(name='test_report3', url='http://test3') ], last_updated_timestamp=int(str(self.entity1['updateTime'])[:10]), columns=[exp_col] * self.active_columns, programmatic_descriptions=[ ProgrammaticDescription(source='test parameter key a', text='testParameterValueA'), ProgrammaticDescription(source='test parameter key b', text='testParameterValueB') ], is_view=False) self.assertEqual(str(expected), str(response))
def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters')) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], resource_reports=self._get_reports(guids=reports_guids), columns=columns, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri))
def test_get_table_view_only(self) -> None: col_usage_return_value = copy.deepcopy(self.col_usage_return_value) for col in col_usage_return_value: col['tbl']['is_view'] = True with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [col_usage_return_value, [], self.table_level_return_value] neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table(database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[], description='foo description', watermarks=[Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time')], columns=[Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1')], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2')], badges=[Badge(badge_name='primary key', category='column')])], owners=[User(email='*****@*****.**')], table_writer=Application(application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=True, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(table))
def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions(attrs.get('parameters', dict())) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) badges = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get('classifications') or list(): badges.append( Badge( badge_name=classification.get('typeName'), category="default" ) ) tags = [] for term in table_details.get(self.REL_ATTRS_KEY).get("meanings") or list(): if term.get('entityStatus') == Status.ACTIVE and \ term.get('relationshipStatus') == Status.ACTIVE: tags.append( Tag( tag_name=term.get("displayText"), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) reports_guids = [report.get("guid") for report in attrs.get("reports") or list()] table_type = attrs.get('tableType') or 'table' is_view = 'view' in table_type.lower() readers = self._get_readers(table_details) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), badges=badges, tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=self._get_owners( table_details[self.REL_ATTRS_KEY].get('ownedBy', []), attrs.get('owner')), resource_reports=self._get_reports(guids=reports_guids), columns=columns, is_view=is_view, table_readers=readers, last_updated_timestamp=self._parse_date(table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions, watermarks=self._get_table_watermarks(table_details)) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri))
def test_get_table(self, mock_rds_client: Any) -> None: database = RDSDatabase(name='hive') cluster = RDSCluster(name='gold') schema = RDSSchema(name='foo_schema') schema.cluster = cluster cluster.database = database table = RDSTable(name='foo_table') table.schema = schema table.description = RDSTableDescription(description='foo description') col1 = RDSColumn(name='bar_id_1', type='varchar', sort_order=0) col1.description = RDSColumnDescription( description='bar col description') col1.stats = [ RDSColumnStat(stat_type='avg', start_epoch='1', end_epoch='1', stat_val='1') ] col2 = RDSColumn(name='bar_id_2', type='bigint', sort_order=1) col2.description = RDSColumnDescription( description='bar col2 description') col2.stats = [ RDSColumnStat(stat_type='avg', start_epoch='2', end_epoch='2', stat_val='2') ] col2.badges = [RDSBadge(rk='primary key', category='column')] columns = [col1, col2] table.watermarks = [ RDSTableWatermark( rk='hive://gold.test_schema/test_table/high_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time'), RDSTableWatermark( rk='hive://gold.test_schema/test_table/low_watermark/', partition_key='ds', partition_value='fake_value', create_time='fake_time') ] table.application = RDSApplication( application_url='airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id') table.timestamp = RDSTableTimestamp(last_updated_timestamp=1) table.owners = [ RDSUser(rk='*****@*****.**', email='*****@*****.**') ] table.tags = [RDSTag(rk='test', tag_type='default')] table.badges = [RDSBadge(rk='golden', category='table_status')] table.source = RDSTableSource(rk='some key', source_type='github', source='/source_file_loc') table.programmatic_descriptions = [ RDSTableProgrammaticDescription(description_source='s3_crawler', description='Test Test Test'), RDSTableProgrammaticDescription( description_source='quality_report', description='Test Test') ] readers = [RDSTableUsage(user_rk='*****@*****.**', read_count=5)] mock_client = MagicMock() mock_rds_client.return_value = mock_client mock_create_session = MagicMock() mock_client.create_session.return_value = mock_create_session mock_session = MagicMock() mock_create_session.__enter__.return_value = mock_session mock_session_query = MagicMock() mock_session.query.return_value = mock_session_query mock_session_query_filter = MagicMock() mock_session_query.filter.return_value = mock_session_query_filter mock_session_query_filter.first.return_value = table mock_session_query_filter_orderby = MagicMock() mock_session_query_filter.order_by.return_value = mock_session_query_filter_orderby mock_session_query_filter_orderby_limit = MagicMock() mock_session_query_filter_orderby.limit.return_value = mock_session_query_filter_orderby_limit mock_session_query_filter_orderby_limit.all.return_value = readers mock_session_query_filter_options = MagicMock() mock_session_query_filter.options.return_value = mock_session_query_filter_options mock_session_query_filter_options.all.return_value = columns proxy = MySQLProxy() actual_table = proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Badge(badge_name='golden', category='table_status')], table_readers=[ Reader(user=User(email='*****@*****.**'), read_count=5) ], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Stat(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ], badges=[]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Stat(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ], badges=[ Badge(badge_name='primary key', category='column') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url= 'airflow_host/admin/airflow/tree?dag_id=test_table', description='DAG generating a table', name='Airflow', id='dag/task_id'), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False, programmatic_descriptions=[ ProgrammaticDescription(source='quality_report', text='Test Test'), ProgrammaticDescription(source='s3_crawler', text='Test Test Test') ]) self.assertEqual(str(expected), str(actual_table))
def test_get_table(self) -> None: with patch.object(GraphDatabase, 'driver'), patch.object( Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [ self.col_usage_return_value, [], self.table_level_return_value ] neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], badges=[Tag(tag_name='golden', tag_type='badge')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github'), is_view=False) self.assertEqual(str(expected), str(table))