def test_get_table(self) -> None: with patch.object(GraphDatabase, 'driver'), patch.object( Neo4jProxy, '_execute_cypher_query') as mock_execute: mock_execute.side_effect = [ self.col_usage_return_value, [], self.table_level_return_value ] neo4j_proxy = Neo4jProxy(endpoint='bogus') table = neo4j_proxy.get_table(table_uri='dummy_uri') expected = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github')) self.assertEqual(str(expected), str(table))
def test_get_table(self): self._mock_get_table_entity() response = self.proxy.get_table(table_uri=self.table_uri) classif_name = self.classification_entity['classifications'][0]['typeName'] ent_attrs = self.entity1['attributes'] col_attrs = self.test_column['attributes'] exp_col_stats = list() for stats in col_attrs['stats']: exp_col_stats.append( Statistics( stat_type=stats['attributes']['stat_name'], stat_val=stats['attributes']['stat_val'], start_epoch=stats['attributes']['start_epoch'], end_epoch=stats['attributes']['end_epoch'], ) ) exp_col = Column(name=col_attrs['qualifiedName'], description='column description', col_type='Managed', sort_order=col_attrs['position'], stats=exp_col_stats) expected = Table(database=self.entity_type, cluster=self.cluster, schema=self.db, name=self.name, tags=[Tag(tag_name=classif_name, tag_type="default")], description=ent_attrs['description'], owners=[User(email=ent_attrs['owner'])], columns=[exp_col], last_updated_timestamp=self.entity1['updateTime']) self.assertEqual(str(expected), str(response))
def _exec_col_query(self, table_uri: str) -> Tuple: # Return Value: (Columns, Last Processed Record) column_level_query = textwrap.dedent(""" MATCH (db:Database)-[:CLUSTER]->(clstr:Cluster)-[:SCHEMA]->(schema:Schema) -[:TABLE]->(tbl:Table {key: $tbl_key})-[:COLUMN]->(col:Column) OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description) OPTIONAL MATCH (col:Column)-[:DESCRIPTION]->(col_dscrpt:Description) OPTIONAL MATCH (col:Column)-[:STAT]->(stat:Stat) RETURN db, clstr, schema, tbl, tbl_dscrpt, col, col_dscrpt, collect(distinct stat) as col_stats ORDER BY col.sort_order;""") tbl_col_neo4j_records = self._execute_cypher_query( statement=column_level_query, param_dict={'tbl_key': table_uri}) cols = [] last_neo4j_record = None for tbl_col_neo4j_record in tbl_col_neo4j_records: # Getting last record from this for loop as Neo4j's result's random access is O(n) operation. col_stats = [] for stat in tbl_col_neo4j_record['col_stats']: col_stat = Statistics(stat_type=stat['stat_name'], stat_val=stat['stat_val'], start_epoch=int( float(stat['start_epoch'])), end_epoch=int(float(stat['end_epoch']))) col_stats.append(col_stat) last_neo4j_record = tbl_col_neo4j_record col = Column( name=tbl_col_neo4j_record['col']['name'], description=self._safe_get(tbl_col_neo4j_record, 'col_dscrpt', 'description'), col_type=tbl_col_neo4j_record['col']['type'], sort_order=int(tbl_col_neo4j_record['col']['sort_order']), stats=col_stats) cols.append(col) if not cols: raise NotFoundException( 'Table URI( {table_uri} ) does not exist'.format( table_uri=table_uri)) return sorted(cols, key=lambda item: item.sort_order), last_neo4j_record
def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \ Union[List[Column], List]: """ Helper function to fetch the columns from entity and serialize them using Column and Statistics model. :param entity: EntityUniqueAttribute object, along with relationshipAttributes :return: A list of Column objects, if there are any columns available, else an empty list. """ columns = list() for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list(): col_entity = entity.referredEntities[column['guid']] col_attrs = col_entity[self.ATTRS_KEY] col_rel_attrs = col_entity[self.REL_ATTRS_KEY] col_metadata = col_rel_attrs.get('metadata') statistics = list() if col_metadata: col_metadata = entity.referredEntities.get(col_metadata.get('guid')) for stats in col_metadata['attributes'].get('statistics') or list(): stats_attrs = stats['attributes'] statistics.append( Statistics( stat_type=stats_attrs.get('stat_name'), stat_val=stats_attrs.get('stat_val'), start_epoch=stats_attrs.get('start_epoch'), end_epoch=stats_attrs.get('end_epoch'), ) ) columns.append( Column( name=col_attrs.get('name'), description=col_attrs.get('description') or col_attrs.get('comment'), col_type=col_attrs.get('type') or col_attrs.get('dataType'), sort_order=col_attrs.get('position'), stats=statistics, ) ) return sorted(columns, key=lambda item: item.sort_order)
def test_get_resources_by_user_relation(self) -> None: with patch.object(GraphDatabase, 'driver'), \ patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute, \ patch.object(Neo4jProxy, 'get_table') as mock_get_table: mock_execute.return_value.single.return_value = { 'table_records': [{ 'key': 'table_uri', }] } mock_get_table.return_value = Table( database='hive', cluster='gold', schema='foo_schema', name='foo_table', tags=[Tag(tag_name='test', tag_type='default')], table_readers=[], description='foo description', watermarks=[ Watermark(watermark_type='high_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time'), Watermark(watermark_type='low_watermark', partition_key='ds', partition_value='fake_value', create_time='fake_time') ], columns=[ Column(name='bar_id_1', description='bar col description', col_type='varchar', sort_order=0, stats=[ Statistics(start_epoch=1, end_epoch=1, stat_type='avg', stat_val='1') ]), Column(name='bar_id_2', description='bar col2 description', col_type='bigint', sort_order=1, stats=[ Statistics(start_epoch=2, end_epoch=2, stat_type='avg', stat_val='2') ]) ], owners=[User(email='*****@*****.**')], table_writer=Application( application_url=self.table_writer['application_url'], description=self.table_writer['description'], name=self.table_writer['name'], id=self.table_writer['id']), last_updated_timestamp=1, source=Source(source='/source_file_loc', source_type='github')) neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000) result = neo4j_proxy.get_table_by_user_relation( user_email='test_user', relation_type=UserResourceRel.follow) self.assertEqual(len(result['table']), 1) self.assertEqual(result['table'][0].name, 'foo_table')