def test_get_table(self) -> None: with patch.object(GraphDatabase, 'driver'), patch.object( Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [ self.col_usage_return_value, [], self.table_level_return_value ] neo4j_proxy = Neo4jProxy(endpoint='bogus') table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github')) self.assertEqual(str(expected), str(table))
def test_get_table(self): self._mock_get_table_entity() response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0]['typeName'] ent_attrs = self.entity1['attributes'] col_attrs = self.test_column['attributes'] exp_col_stats = list() for stats in col_attrs['stats']: exp_col_stats.append( Statistics( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], ) ) exp_col = Column(name=col_attrs['qualifiedName'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table(database=self.entity_type, cluster=self.cluster, schema=self.db, name=self.name, tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], columns=[exp_col], last_updated_timestamp=self.entity1['updateTime']) self.assertEqual(str(expected), str(response))
def get_table(self, *, table_id: str, table_info: Dict) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_id: :param table_info: Additional table information (entity, db, cluster, name) :return: A Table object with all the information available or gathered from different entities. """ table_entity = self._get_table_entity(table_id=table_id) table_details = table_entity.entity try: attrs = table_details[self.ATTRS_KEY] rel_attrs = table_details[self.REL_ATTRS_KEY] tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = [] for column in rel_attrs.get('columns') or list(): col_entity = table_entity.referredEntities[column['guid']] col_attrs = col_entity[self.ATTRS_KEY] columns.append( Column( name=col_attrs.get(self.NAME_ATTRIBUTE), description=col_attrs.get('description'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position'), )) table = Table( database=table_info['entity'], cluster=table_info['cluster'], schema=table_info['db'], name=table_info['name'], tags=tags, description=attrs.get('description'), owners=[User(email=attrs.get('owner'))], columns=columns, last_updated_timestamp=table_details.get('updateTime')) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_id} )'.format(table_id=table_id))
def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity, table_info = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY) ) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get("classifications") or list(): tags.append( Tag( tag_name=classification.get('typeName'), tag_type="default" ) ) columns = self._serialize_columns(entity=entity) table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], columns=columns, last_updated_timestamp=table_details.get('updateTime')) return table except KeyError as ex: LOGGER.exception('Error while accessing table information. {}' .format(str(ex))) raise BadRequest('Some of the required attributes ' 'are missing in : ( {table_uri} )' .format(table_uri=table_uri))
def test_get_resources_by_user_relation(self) -> None: with patch.object(GraphDatabase, 'driver'), \ patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute, \ patch.object(Neo4jProxy, 'get_table') as mock_get_table: mock_execute.return_value.single.return_value = { 'table_records': [{ 'key': 'table_uri', }] } mock_get_table.return_value = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github')) neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) result = neo4j_proxy.get_table_by_user_relation( user_email='test_user', relation_type=UserResourceRel.follow) self.assertEqual(len(result['table']), 1) self.assertEqual(result['table'][0].name, 'foo_table')
def _exec_table_query(self, table_uri: str) -> Tuple: """ Queries one Cypher record with watermark list, Application, ,timestamp, owner records and tag records. """ # Return Value: (Watermark Results, Table Writer, Last Updated Timestamp, owner records, tag records) table_level_query = textwrap.dedent("""\ MATCH (tbl:Table {key: $tbl_key}) OPTIONAL MATCH (wmk:Watermark)-[:BELONG_TO_TABLE]->(tbl) OPTIONAL MATCH (application:Application)-[:GENERATES]->(tbl) OPTIONAL MATCH (tbl)-[:LAST_UPDATED_AT]->(t:Timestamp) OPTIONAL MATCH (owner:User)-[:OWNER_OF]->(tbl) OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(tag:Tag) OPTIONAL MATCH (tbl)-[:SOURCE]->(src:Source) RETURN collect(distinct wmk) as wmk_records, application, t.last_updated_timestamp as last_updated_timestamp, collect(distinct owner) as owner_records, collect(distinct tag) as tag_records, src """) table_records = self._execute_cypher_query( statement=table_level_query, param_dict={'tbl_key': table_uri}) table_records = table_records.single() wmk_results = [] table_writer = None wmk_records = table_records['wmk_records'] for record in wmk_records: if record['key'] is not None: watermark_type = record['key'].split('/')[-2] wmk_result = Watermark( watermark_type=watermark_type, partition_key=record['partition_key'], partition_value=record['partition_value'], create_time=record['create_time']) wmk_results.append(wmk_result) tags = [] if table_records.get('tag_records'): tag_records = table_records['tag_records'] for record in tag_records: tag_result = Tag(tag_name=record['key'], tag_type=record['tag_type']) tags.append(tag_result) application_record = table_records['application'] if application_record is not None: table_writer = Application( application_url=application_record['application_url'], description=application_record['description'], name=application_record['name'], id=application_record.get('id', '')) timestamp_value = table_records['last_updated_timestamp'] owner_record = [] for owner in table_records.get('owner_records', []): owner_record.append(User(email=owner['email'])) src = None if table_records['src']: src = Source(source_type=table_records['src']['source_type'], source=table_records['src']['source']) return wmk_results, table_writer, timestamp_value, owner_record, tags, src