Beispiel #1
0
    def test_get_table(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(
                Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.side_effect = [
                self.col_usage_return_value, [], self.table_level_return_value
            ]

            neo4j_proxy = Neo4jProxy(endpoint='bogus')
            table = neo4j_proxy.get_table(table_uri='dummy_uri')

            expected = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            self.assertEqual(str(expected), str(table))
    def test_get_table(self):
        self._mock_get_table_entity()
        response = self.proxy.get_table(table_uri=self.table_uri)

        classif_name = self.classification_entity['classifications'][0]['typeName']
        ent_attrs = self.entity1['attributes']

        col_attrs = self.test_column['attributes']
        exp_col_stats = list()

        for stats in col_attrs['stats']:
            exp_col_stats.append(
                Statistics(
                    stat_type=stats['attributes']['stat_name'],
                    stat_val=stats['attributes']['stat_val'],
                    start_epoch=stats['attributes']['start_epoch'],
                    end_epoch=stats['attributes']['end_epoch'],
                )
            )
        exp_col = Column(name=col_attrs['qualifiedName'],
                         description='column description',
                         col_type='Managed',
                         sort_order=col_attrs['position'],
                         stats=exp_col_stats)
        expected = Table(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=self.name,
                         tags=[Tag(tag_name=classif_name, tag_type="default")],
                         description=ent_attrs['description'],
                         owners=[User(email=ent_attrs['owner'])],
                         columns=[exp_col],
                         last_updated_timestamp=self.entity1['updateTime'])
        self.assertEqual(str(expected), str(response))
Beispiel #3
0
    def get_table(self, *, table_id: str, table_info: Dict) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_id:
        :param table_info: Additional table information (entity, db, cluster, name)
        :return: A Table object with all the information available
        or gathered from different entities.
        """

        table_entity = self._get_table_entity(table_id=table_id)
        table_details = table_entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]
            rel_attrs = table_details[self.REL_ATTRS_KEY]

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get(
                    "classifications") or list():
                tags.append(
                    Tag(tag_name=classification.get('typeName'),
                        tag_type="default"))

            columns = []
            for column in rel_attrs.get('columns') or list():
                col_entity = table_entity.referredEntities[column['guid']]
                col_attrs = col_entity[self.ATTRS_KEY]
                columns.append(
                    Column(
                        name=col_attrs.get(self.NAME_ATTRIBUTE),
                        description=col_attrs.get('description'),
                        col_type=col_attrs.get('type')
                        or col_attrs.get('dataType'),
                        sort_order=col_attrs.get('position'),
                    ))

            table = Table(
                database=table_info['entity'],
                cluster=table_info['cluster'],
                schema=table_info['db'],
                name=table_info['name'],
                tags=tags,
                description=attrs.get('description'),
                owners=[User(email=attrs.get('owner'))],
                columns=columns,
                last_updated_timestamp=table_details.get('updateTime'))

            return table
        except KeyError as ex:
            LOGGER.exception(
                'Error while accessing table information. {}'.format(str(ex)))
            raise BadRequest(
                'Some of the required attributes '
                'are missing in : ( {table_id} )'.format(table_id=table_id))
Beispiel #4
0
    def get_table(self, *, table_uri: str) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_uri:
        :return: A Table object with all the information available
        or gathered from different entities.
        """
        entity, table_info = self._get_table_entity(table_uri=table_uri)
        table_details = entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]

            table_qn = parse_table_qualified_name(
                qualified_name=attrs.get(self.QN_KEY)
            )

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get("classifications") or list():
                tags.append(
                    Tag(
                        tag_name=classification.get('typeName'),
                        tag_type="default"
                    )
                )

            columns = self._serialize_columns(entity=entity)

            table = Table(
                database=table_details.get('typeName'),
                cluster=table_qn.get('cluster_name', ''),
                schema=table_qn.get('db_name', ''),
                name=attrs.get('name') or table_qn.get("table_name", ''),
                tags=tags,
                description=attrs.get('description') or attrs.get('comment'),
                owners=[User(email=attrs.get('owner'))],
                columns=columns,
                last_updated_timestamp=table_details.get('updateTime'))

            return table
        except KeyError as ex:
            LOGGER.exception('Error while accessing table information. {}'
                             .format(str(ex)))
            raise BadRequest('Some of the required attributes '
                             'are missing in : ( {table_uri} )'
                             .format(table_uri=table_uri))
Beispiel #5
0
    def _exec_usage_query(self, table_uri: str) -> List[Reader]:
        # Return Value: List[Reader]

        usage_query = textwrap.dedent("""\
        MATCH (user:User)-[read:READ]->(table:Table {key: $tbl_key})
        RETURN user.email as email, read.read_count as read_count, table.name as table_name
        ORDER BY read.read_count DESC LIMIT 5;
        """)

        usage_neo4j_records = self._execute_cypher_query(
            statement=usage_query, param_dict={'tbl_key': table_uri})
        readers = []  # type: List[Reader]
        for usage_neo4j_record in usage_neo4j_records:
            reader = Reader(user=User(email=usage_neo4j_record['email']),
                            read_count=usage_neo4j_record['read_count'])
            readers.append(reader)

        return readers
    def test_get_resources_by_user_relation(self) -> None:
        with patch.object(GraphDatabase, 'driver'), \
            patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute, \
                patch.object(Neo4jProxy, 'get_table') as mock_get_table:

            mock_execute.return_value.single.return_value = {
                'table_records': [{
                    'key': 'table_uri',
                }]
            }
            mock_get_table.return_value = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            result = neo4j_proxy.get_table_by_user_relation(
                user_email='test_user', relation_type=UserResourceRel.follow)
            self.assertEqual(len(result['table']), 1)
            self.assertEqual(result['table'][0].name, 'foo_table')
Beispiel #7
0
    def _exec_table_query(self, table_uri: str) -> Tuple:
        """
        Queries one Cypher record with watermark list, Application,
        ,timestamp, owner records and tag records.
        """

        # Return Value: (Watermark Results, Table Writer, Last Updated Timestamp, owner records, tag records)

        table_level_query = textwrap.dedent("""\
        MATCH (tbl:Table {key: $tbl_key})
        OPTIONAL MATCH (wmk:Watermark)-[:BELONG_TO_TABLE]->(tbl)
        OPTIONAL MATCH (application:Application)-[:GENERATES]->(tbl)
        OPTIONAL MATCH (tbl)-[:LAST_UPDATED_AT]->(t:Timestamp)
        OPTIONAL MATCH (owner:User)-[:OWNER_OF]->(tbl)
        OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(tag:Tag)
        OPTIONAL MATCH (tbl)-[:SOURCE]->(src:Source)
        RETURN collect(distinct wmk) as wmk_records,
        application,
        t.last_updated_timestamp as last_updated_timestamp,
        collect(distinct owner) as owner_records,
        collect(distinct tag) as tag_records,
        src
        """)

        table_records = self._execute_cypher_query(
            statement=table_level_query, param_dict={'tbl_key': table_uri})

        table_records = table_records.single()

        wmk_results = []
        table_writer = None

        wmk_records = table_records['wmk_records']

        for record in wmk_records:
            if record['key'] is not None:
                watermark_type = record['key'].split('/')[-2]
                wmk_result = Watermark(
                    watermark_type=watermark_type,
                    partition_key=record['partition_key'],
                    partition_value=record['partition_value'],
                    create_time=record['create_time'])
                wmk_results.append(wmk_result)

        tags = []
        if table_records.get('tag_records'):
            tag_records = table_records['tag_records']
            for record in tag_records:
                tag_result = Tag(tag_name=record['key'],
                                 tag_type=record['tag_type'])
                tags.append(tag_result)

        application_record = table_records['application']
        if application_record is not None:
            table_writer = Application(
                application_url=application_record['application_url'],
                description=application_record['description'],
                name=application_record['name'],
                id=application_record.get('id', ''))

        timestamp_value = table_records['last_updated_timestamp']

        owner_record = []

        for owner in table_records.get('owner_records', []):
            owner_record.append(User(email=owner['email']))

        src = None

        if table_records['src']:
            src = Source(source_type=table_records['src']['source_type'],
                         source=table_records['src']['source'])

        return wmk_results, table_writer, timestamp_value, owner_record, tags, src