Example #1
0
    def test_get_table(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(
                Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.side_effect = [
                self.col_usage_return_value, [], self.table_level_return_value
            ]

            neo4j_proxy = Neo4jProxy(endpoint='bogus')
            table = neo4j_proxy.get_table(table_uri='dummy_uri')

            expected = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            self.assertEqual(str(expected), str(table))
    def test_get_resources_by_user_relation(self) -> None:
        with patch.object(GraphDatabase, 'driver'), \
            patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute, \
                patch.object(Neo4jProxy, 'get_table') as mock_get_table:

            mock_execute.return_value.single.return_value = {
                'table_records': [{
                    'key': 'table_uri',
                }]
            }
            mock_get_table.return_value = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            result = neo4j_proxy.get_table_by_user_relation(
                user_email='test_user', relation_type=UserResourceRel.follow)
            self.assertEqual(len(result['table']), 1)
            self.assertEqual(result['table'][0].name, 'foo_table')
Example #3
0
    def _exec_table_query(self, table_uri: str) -> Tuple:
        """
        Queries one Cypher record with watermark list, Application,
        ,timestamp, owner records and tag records.
        """

        # Return Value: (Watermark Results, Table Writer, Last Updated Timestamp, owner records, tag records)

        table_level_query = textwrap.dedent("""\
        MATCH (tbl:Table {key: $tbl_key})
        OPTIONAL MATCH (wmk:Watermark)-[:BELONG_TO_TABLE]->(tbl)
        OPTIONAL MATCH (application:Application)-[:GENERATES]->(tbl)
        OPTIONAL MATCH (tbl)-[:LAST_UPDATED_AT]->(t:Timestamp)
        OPTIONAL MATCH (owner:User)-[:OWNER_OF]->(tbl)
        OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(tag:Tag)
        OPTIONAL MATCH (tbl)-[:SOURCE]->(src:Source)
        RETURN collect(distinct wmk) as wmk_records,
        application,
        t.last_updated_timestamp as last_updated_timestamp,
        collect(distinct owner) as owner_records,
        collect(distinct tag) as tag_records,
        src
        """)

        table_records = self._execute_cypher_query(
            statement=table_level_query, param_dict={'tbl_key': table_uri})

        table_records = table_records.single()

        wmk_results = []
        table_writer = None

        wmk_records = table_records['wmk_records']

        for record in wmk_records:
            if record['key'] is not None:
                watermark_type = record['key'].split('/')[-2]
                wmk_result = Watermark(
                    watermark_type=watermark_type,
                    partition_key=record['partition_key'],
                    partition_value=record['partition_value'],
                    create_time=record['create_time'])
                wmk_results.append(wmk_result)

        tags = []
        if table_records.get('tag_records'):
            tag_records = table_records['tag_records']
            for record in tag_records:
                tag_result = Tag(tag_name=record['key'],
                                 tag_type=record['tag_type'])
                tags.append(tag_result)

        application_record = table_records['application']
        if application_record is not None:
            table_writer = Application(
                application_url=application_record['application_url'],
                description=application_record['description'],
                name=application_record['name'],
                id=application_record.get('id', ''))

        timestamp_value = table_records['last_updated_timestamp']

        owner_record = []

        for owner in table_records.get('owner_records', []):
            owner_record.append(User(email=owner['email']))

        src = None

        if table_records['src']:
            src = Source(source_type=table_records['src']['source_type'],
                         source=table_records['src']['source'])

        return wmk_results, table_writer, timestamp_value, owner_record, tags, src