コード例 #1
0
    def test_get_table(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(
                Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.side_effect = [
                self.col_usage_return_value, [], self.table_level_return_value
            ]

            neo4j_proxy = Neo4jProxy(endpoint='bogus')
            table = neo4j_proxy.get_table(table_uri='dummy_uri')

            expected = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            self.assertEqual(str(expected), str(table))
コード例 #2
0
    def test_get_table(self):
        self._mock_get_table_entity()
        response = self.proxy.get_table(table_uri=self.table_uri)

        classif_name = self.classification_entity['classifications'][0]['typeName']
        ent_attrs = self.entity1['attributes']

        col_attrs = self.test_column['attributes']
        exp_col_stats = list()

        for stats in col_attrs['stats']:
            exp_col_stats.append(
                Statistics(
                    stat_type=stats['attributes']['stat_name'],
                    stat_val=stats['attributes']['stat_val'],
                    start_epoch=stats['attributes']['start_epoch'],
                    end_epoch=stats['attributes']['end_epoch'],
                )
            )
        exp_col = Column(name=col_attrs['qualifiedName'],
                         description='column description',
                         col_type='Managed',
                         sort_order=col_attrs['position'],
                         stats=exp_col_stats)
        expected = Table(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=self.name,
                         tags=[Tag(tag_name=classif_name, tag_type="default")],
                         description=ent_attrs['description'],
                         owners=[User(email=ent_attrs['owner'])],
                         columns=[exp_col],
                         last_updated_timestamp=self.entity1['updateTime'])
        self.assertEqual(str(expected), str(response))
コード例 #3
0
    def get_table(self, *, table_id: str, table_info: Dict) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_id:
        :param table_info: Additional table information (entity, db, cluster, name)
        :return: A Table object with all the information available
        or gathered from different entities.
        """

        table_entity = self._get_table_entity(table_id=table_id)
        table_details = table_entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]
            rel_attrs = table_details[self.REL_ATTRS_KEY]

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get(
                    "classifications") or list():
                tags.append(
                    Tag(tag_name=classification.get('typeName'),
                        tag_type="default"))

            columns = []
            for column in rel_attrs.get('columns') or list():
                col_entity = table_entity.referredEntities[column['guid']]
                col_attrs = col_entity[self.ATTRS_KEY]
                columns.append(
                    Column(
                        name=col_attrs.get(self.NAME_ATTRIBUTE),
                        description=col_attrs.get('description'),
                        col_type=col_attrs.get('type')
                        or col_attrs.get('dataType'),
                        sort_order=col_attrs.get('position'),
                    ))

            table = Table(
                database=table_info['entity'],
                cluster=table_info['cluster'],
                schema=table_info['db'],
                name=table_info['name'],
                tags=tags,
                description=attrs.get('description'),
                owners=[User(email=attrs.get('owner'))],
                columns=columns,
                last_updated_timestamp=table_details.get('updateTime'))

            return table
        except KeyError as ex:
            LOGGER.exception(
                'Error while accessing table information. {}'.format(str(ex)))
            raise BadRequest(
                'Some of the required attributes '
                'are missing in : ( {table_id} )'.format(table_id=table_id))
コード例 #4
0
    def _exec_col_query(self, table_uri: str) -> Tuple:
        # Return Value: (Columns, Last Processed Record)

        column_level_query = textwrap.dedent("""
        MATCH (db:Database)-[:CLUSTER]->(clstr:Cluster)-[:SCHEMA]->(schema:Schema)
        -[:TABLE]->(tbl:Table {key: $tbl_key})-[:COLUMN]->(col:Column)
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
        OPTIONAL MATCH (col:Column)-[:DESCRIPTION]->(col_dscrpt:Description)
        OPTIONAL MATCH (col:Column)-[:STAT]->(stat:Stat)
        RETURN db, clstr, schema, tbl, tbl_dscrpt, col, col_dscrpt, collect(distinct stat) as col_stats
        ORDER BY col.sort_order;""")

        tbl_col_neo4j_records = self._execute_cypher_query(
            statement=column_level_query, param_dict={'tbl_key': table_uri})
        cols = []
        last_neo4j_record = None
        for tbl_col_neo4j_record in tbl_col_neo4j_records:
            # Getting last record from this for loop as Neo4j's result's random access is O(n) operation.
            col_stats = []
            for stat in tbl_col_neo4j_record['col_stats']:
                col_stat = Statistics(stat_type=stat['stat_name'],
                                      stat_val=stat['stat_val'],
                                      start_epoch=int(
                                          float(stat['start_epoch'])),
                                      end_epoch=int(float(stat['end_epoch'])))
                col_stats.append(col_stat)

            last_neo4j_record = tbl_col_neo4j_record
            col = Column(
                name=tbl_col_neo4j_record['col']['name'],
                description=self._safe_get(tbl_col_neo4j_record, 'col_dscrpt',
                                           'description'),
                col_type=tbl_col_neo4j_record['col']['type'],
                sort_order=int(tbl_col_neo4j_record['col']['sort_order']),
                stats=col_stats)

            cols.append(col)

        if not cols:
            raise NotFoundException(
                'Table URI( {table_uri} ) does not exist'.format(
                    table_uri=table_uri))

        return sorted(cols,
                      key=lambda item: item.sort_order), last_neo4j_record
コード例 #5
0
    def _serialize_columns(self, *, entity: EntityUniqueAttribute) -> \
            Union[List[Column], List]:
        """
        Helper function to fetch the columns from entity and serialize them
        using Column and Statistics model.
        :param entity: EntityUniqueAttribute object,
        along with relationshipAttributes
        :return: A list of Column objects, if there are any columns available,
        else an empty list.
        """
        columns = list()
        for column in entity.entity[self.REL_ATTRS_KEY].get('columns') or list():
            col_entity = entity.referredEntities[column['guid']]
            col_attrs = col_entity[self.ATTRS_KEY]
            col_rel_attrs = col_entity[self.REL_ATTRS_KEY]
            col_metadata = col_rel_attrs.get('metadata')
            statistics = list()

            if col_metadata:
                col_metadata = entity.referredEntities.get(col_metadata.get('guid'))

                for stats in col_metadata['attributes'].get('statistics') or list():
                    stats_attrs = stats['attributes']
                    statistics.append(
                        Statistics(
                            stat_type=stats_attrs.get('stat_name'),
                            stat_val=stats_attrs.get('stat_val'),
                            start_epoch=stats_attrs.get('start_epoch'),
                            end_epoch=stats_attrs.get('end_epoch'),
                        )
                    )

            columns.append(
                Column(
                    name=col_attrs.get('name'),
                    description=col_attrs.get('description') or col_attrs.get('comment'),
                    col_type=col_attrs.get('type') or col_attrs.get('dataType'),
                    sort_order=col_attrs.get('position'),
                    stats=statistics,
                )
            )
        return sorted(columns, key=lambda item: item.sort_order)
コード例 #6
0
    def test_get_resources_by_user_relation(self) -> None:
        with patch.object(GraphDatabase, 'driver'), \
            patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute, \
                patch.object(Neo4jProxy, 'get_table') as mock_get_table:

            mock_execute.return_value.single.return_value = {
                'table_records': [{
                    'key': 'table_uri',
                }]
            }
            mock_get_table.return_value = Table(
                database='hive',
                cluster='gold',
                schema='foo_schema',
                name='foo_table',
                tags=[Tag(tag_name='test', tag_type='default')],
                table_readers=[],
                description='foo description',
                watermarks=[
                    Watermark(watermark_type='high_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time'),
                    Watermark(watermark_type='low_watermark',
                              partition_key='ds',
                              partition_value='fake_value',
                              create_time='fake_time')
                ],
                columns=[
                    Column(name='bar_id_1',
                           description='bar col description',
                           col_type='varchar',
                           sort_order=0,
                           stats=[
                               Statistics(start_epoch=1,
                                          end_epoch=1,
                                          stat_type='avg',
                                          stat_val='1')
                           ]),
                    Column(name='bar_id_2',
                           description='bar col2 description',
                           col_type='bigint',
                           sort_order=1,
                           stats=[
                               Statistics(start_epoch=2,
                                          end_epoch=2,
                                          stat_type='avg',
                                          stat_val='2')
                           ])
                ],
                owners=[User(email='*****@*****.**')],
                table_writer=Application(
                    application_url=self.table_writer['application_url'],
                    description=self.table_writer['description'],
                    name=self.table_writer['name'],
                    id=self.table_writer['id']),
                last_updated_timestamp=1,
                source=Source(source='/source_file_loc', source_type='github'))

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            result = neo4j_proxy.get_table_by_user_relation(
                user_email='test_user', relation_type=UserResourceRel.follow)
            self.assertEqual(len(result['table']), 1)
            self.assertEqual(result['table'][0].name, 'foo_table')