def test_get_popular_tables(self) -> None:
        # Test cache hit
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.return_value = [{'table_key': 'foo'}, {'table_key': 'bar'}]

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            self.assertEqual(neo4j_proxy._get_popular_tables_uris(2), ['foo', 'bar'])
            self.assertEqual(neo4j_proxy._get_popular_tables_uris(2), ['foo', 'bar'])
            self.assertEqual(neo4j_proxy._get_popular_tables_uris(2), ['foo', 'bar'])

            self.assertEquals(mock_execute.call_count, 1)

        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jProxy, '_execute_cypher_query') as mock_execute:
            mock_execute.return_value = [
                {'database_name': 'db', 'cluster_name': 'clstr', 'schema_name': 'sch', 'table_name': 'foo',
                 'table_description': 'test description'},
                {'database_name': 'db', 'cluster_name': 'clstr', 'schema_name': 'sch', 'table_name': 'bar'}
            ]

            neo4j_proxy = Neo4jProxy(host='DOES_NOT_MATTER', port=0000)
            actual = neo4j_proxy.get_popular_tables(num_entries=2)

            expected = [
                PopularTable(database='db', cluster='clstr', schema='sch', name='foo', description='test description'),
                PopularTable(database='db', cluster='clstr', schema='sch', name='bar'),
            ]

            self.assertEqual(actual.__repr__(), expected.__repr__())
    def test_get_popular_tables_without_db(self):
        meta1 = self.metadata1.copy()
        meta2 = self.metadata2.copy()

        for meta in [meta1, meta2]:
            meta['relationshipAttributes']['parentEntity']['attributes']['qualifiedName'] = 'meta@cluster'

        metadata1 = self.to_class(meta1)
        metadata2 = self.to_class(meta2)
        self.proxy._get_flat_values_from_dsl = MagicMock(return_value=[])

        metadata_collection = MagicMock()
        metadata_collection.entities_with_relationships = MagicMock(return_value=[metadata1, metadata2])

        self.proxy._driver.entity_bulk = MagicMock(return_value=[metadata_collection])
        response = self.proxy.get_popular_tables(num_entries=2)
        ent1_attrs = self.entity1['attributes']
        ent2_attrs = self.entity2['attributes']

        expected = [
            PopularTable(database=self.entity_type, cluster='', schema='',
                         name=ent1_attrs['qualifiedName'], description=ent1_attrs['description']),
            PopularTable(database=self.entity_type, cluster='', schema='',
                         name=ent2_attrs['qualifiedName'], description=ent1_attrs['description']),
        ]

        self.assertEqual(expected.__repr__(), response.__repr__())
    def test_get_popular_tables(self) -> None:
        meta1: Dict = copy.deepcopy(self.metadata1)
        meta2: Dict = copy.deepcopy(self.metadata2)

        meta1['attributes']['table'] = self.entity1
        meta2['attributes']['table'] = self.entity2

        metadata1 = self.to_class(meta1)
        metadata2 = self.to_class(meta2)

        metadata_collection = MagicMock()
        metadata_collection.entities = [metadata1, metadata2]

        result = MagicMock(return_value=metadata_collection)

        with patch.object(self.proxy._driver.search_basic, 'create', result):
            entities_collection = MagicMock()
            entities_collection.entities = [
                self.to_class(self.entity1),
                self.to_class(self.entity2)
            ]

            self.proxy._driver.entity_bulk = MagicMock(
                return_value=[entities_collection])

            response = self.proxy.get_popular_tables(num_entries=2)

            # Call multiple times for cache test.
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)

            self.assertEqual(self.proxy._driver.entity_bulk.call_count, 1)

            ent1_attrs = cast(dict, self.entity1['attributes'])
            ent2_attrs = cast(dict, self.entity2['attributes'])

            expected = [
                PopularTable(database=self.entity_type,
                             cluster=self.cluster,
                             schema=self.db,
                             name=ent1_attrs['name'],
                             description=ent1_attrs['description']),
                PopularTable(database=self.entity_type,
                             cluster=self.cluster,
                             schema=self.db,
                             name=ent2_attrs['name'],
                             description=ent1_attrs['description']),
            ]

            self.assertEqual(expected.__repr__(), response.__repr__())
    def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, Any]:
        """
        Retrieves all Table the resources per user on READ relation.

        :param user_email: the email of the user
        :return:
        """

        query = textwrap.dedent("""
        MATCH (user:User {key: $query_key})-[r:READ]->(tbl:Table)
        WHERE EXISTS(r.published_tag) AND r.published_tag IS NOT NULL
        WITH user, r, tbl ORDER BY r.published_tag DESC, r.total_reads DESC LIMIT 50
        MATCH (tbl:Table)<-[:TABLE]-(schema:Schema)<-[:SCHEMA]-(clstr:Cluster)<-[:CLUSTER]-(db:Database)
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
        RETURN db, clstr, schema, tbl, tbl_dscrpt
        """)

        table_records = self._execute_cypher_query(statement=query, param_dict={'query_key': user_email})

        if not table_records:
            raise NotFoundException('User {user_id} does not READ any resources'.format(user_id=user_email))
        results = []

        for record in table_records:
            results.append(PopularTable(
                database=record['db']['name'],
                cluster=record['clstr']['name'],
                schema=record['schema']['name'],
                name=record['tbl']['name'],
                description=self._safe_get(record, 'tbl_dscrpt', 'description')))
        return {'table': results}
    def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]:
        """
        Retrive all follow the resources per user based on the relation.
        We start with table resources only, then add dashboard.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        rel_clause: str = self._get_user_table_relationship_clause(relation_type=relation_type,
                                                                   tbl_key='',
                                                                   user_key=user_email)
        query = textwrap.dedent(f"""
        MATCH {rel_clause}<-[:TABLE]-(schema:Schema)<-[:SCHEMA]-(clstr:Cluster)<-[:CLUSTER]-(db:Database)
        WITH db, clstr, schema, tbl
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
        RETURN db, clstr, schema, tbl, tbl_dscrpt""")

        table_records = self._execute_cypher_query(statement=query, param_dict={'query_key': user_email})

        if not table_records:
            raise NotFoundException('User {user_id} does not {relation} any resources'.format(user_id=user_email,
                                                                                              relation=relation_type))
        results = []
        for record in table_records:
            results.append(PopularTable(
                database=record['db']['name'],
                cluster=record['clstr']['name'],
                schema=record['schema']['name'],
                name=record['tbl']['name'],
                description=self._safe_get(record, 'tbl_dscrpt', 'description')))
        return {'table': results}
    def get_popular_tables(self, *,
                           num_entries: int = 10) -> List[PopularTable]:
        """
        FixMe: For now it simply returns ALL the tables available,
        Need to generate the formula for popular tables only.
        :param num_entries:
        :return:
        """
        popular_tables = list()
        params = {'typeName': self.TABLE_ENTITY, 'excludeDeletedEntities': True}
        guids = self._get_ids_from_basic_search(params=params)

        entity_collection = self._driver.entity_bulk(guid=guids)
        for _collection in entity_collection:
            for entity in _collection.entities:
                attrs = entity.attributes
                # ToDo (Verdan): Investigate why db is not in referredEntities
                database = attrs.get(self.DB_KEY)
                if database:
                    db_entity = self._driver.entity_guid(database['guid'])
                    db_attrs = db_entity.entity['attributes']
                    db_name = db_attrs.get(self.NAME_KEY)
                    db_cluster = db_attrs.get('clusterName')
                else:
                    db_name = ''
                    db_cluster = ''

                popular_table = PopularTable(database=entity.typeName,
                                             cluster=db_cluster,
                                             schema=db_name,
                                             name=attrs.get(self.NAME_KEY),
                                             description=attrs.get('description'))
                popular_tables.append(popular_table)
        return popular_tables
Beispiel #7
0
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        popular_query_params = {'typeName': 'table_metadata',
                                'sortBy': 'popularityScore',
                                'sortOrder': 'DESCENDING',
                                'excludeDeletedEntities': True,
                                'limit': num_entries,
                                'attributes': ['table']}

        table_entities = self._get_metadata_entities(popular_query_params)

        for table in table_entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY)
            )

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            popular_table = PopularTable(
                database=table.typeName,
                cluster=db_cluster,
                schema=db_name,
                name=table_name,
                description=table_attrs.get('description') or table_attrs.get('comment'))
            popular_tables.append(popular_table)

        return popular_tables
Beispiel #8
0
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        query_metadata_ids = {
            'query':
            f'FROM Table SELECT metadata.__guid '
            f'ORDERBY popularityScore desc '
            f'LIMIT {num_entries}'
        }

        metadata_entities = self._get_metadata_entities(query_metadata_ids)

        for metadata in metadata_entities:
            table = metadata.relationshipAttributes.get("parentEntity")
            table_attrs = table.get(self.ATTRS_KEY)

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            popular_table = PopularTable(
                database=table.get("typeName"),
                cluster=db_cluster,
                schema=db_name,
                name=table_name,
                description=table_attrs.get('description'))
            popular_tables.append(popular_table)

        return popular_tables
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        Retrieve popular tables. As popular table computation requires full scan of table and user relationship,
        it will utilize cached method _get_popular_tables_uris.

        :param num_entries:
        :return: Iterable of PopularTable
        """

        table_uris = self._get_popular_tables_uris(num_entries)
        if not table_uris:
            return []

        query = textwrap.dedent("""
        MATCH (db:Database)-[:CLUSTER]->(clstr:Cluster)-[:SCHEMA]->(schema:Schema)-[:TABLE]->(tbl:Table)
        WHERE tbl.key IN $table_uris
        WITH db.name as database_name, clstr.name as cluster_name, schema.name as schema_name, tbl
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(dscrpt:Description)
        RETURN database_name, cluster_name, schema_name, tbl.name as table_name,
        dscrpt.description as table_description;
        """)

        records = self._execute_cypher_query(statement=query,
                                             param_dict={'table_uris': table_uris})

        popular_tables = []
        for record in records:
            popular_table = PopularTable(database=record['database_name'],
                                         cluster=record['cluster_name'],
                                         schema=record['schema_name'],
                                         name=record['table_name'],
                                         description=self._safe_get(record, 'table_description'))
            popular_tables.append(popular_table)
        return popular_tables
    def test_get_popular_tables(self):
        entity1 = MagicMock()
        entity1.typeName = self.entity1['typeName']
        entity1.attributes = self.entity1['attributes']

        entity2 = MagicMock()
        entity2.typeName = self.entity2['typeName']
        entity2.attributes = self.entity2['attributes']

        basic_search_collection = MagicMock()
        basic_search_collection.entities = [entity1, entity2]

        self.proxy._driver.search_basic.create = MagicMock(
            return_value=basic_search_collection)

        db_entity = MagicMock()
        db_entity.attributes = {
            'qualifiedName': self.db,
            'clusterName': self.cluster
        }

        db_dict = {self.entity1['attributes']['db']['guid']: db_entity}

        self.proxy._get_rel_attributes_dict = MagicMock(return_value=db_dict)

        response = self.proxy.get_popular_tables(num_entries=2)
        ent1_attrs = self.entity1['attributes']
        ent2_attrs = self.entity2['attributes']

        expected = [
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent1_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent2_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
        ]

        self.assertEqual(expected.__repr__(), response.__repr__())
    def test_get_popular_tables(self, mock_basic_search):
        entity1 = MagicMock()
        entity1.typeName = self.entity1['typeName']
        entity1.attributes = self.entity1['attributes']

        entity2 = MagicMock()
        entity2.typeName = self.entity2['typeName']
        entity2.attributes = self.entity2['attributes']

        bulk_ent_collection = MagicMock()
        bulk_ent_collection.entities = [entity1, entity2]

        self.proxy._driver.entity_bulk = MagicMock(
            return_value=[bulk_ent_collection])

        db_entity = MagicMock()
        db_entity.entity = {
            'attributes': {
                'qualifiedName': self.db,
                'clusterName': self.cluster
            }
        }

        self.proxy._driver.entity_guid = MagicMock(return_value=db_entity)

        response = self.proxy.get_popular_tables(num_entries=2)
        ent1_attrs = self.entity1['attributes']
        ent2_attrs = self.entity2['attributes']

        expected = [
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent1_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent2_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
        ]

        self.assertEqual(response.__repr__(), expected.__repr__())
Beispiel #12
0
    def get_popular_tables(self,
                           *,
                           num_entries: int = 10) -> List[PopularTable]:
        """
        FixMe: For now it simply returns ALL the tables available,
        Need to generate the formula for popular tables only.
        :param num_entries:
        :return:
        """
        popular_tables = list()
        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True,
            self.ATTRS_KEY: [self.DB_ATTRIBUTE]
        }
        try:
            # Fetch all the Popular Tables
            _table_collection = self._driver.search_basic.create(data=params)
            # Inflate the table entities
            table_entities = _table_collection.entities
        except BadRequest as ex:
            LOGGER.exception(
                f'Please make sure you have assigned the appropriate '
                f'self.TABLE_ENTITY entity to your atlas tables. {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        # Make a dictionary of Database Entities to avoid multiple DB calls
        dbs_dict = self._get_rel_attributes_dict(entities=table_entities,
                                                 attribute=self.DB_ATTRIBUTE)

        # Make instances of PopularTable
        for entity in table_entities:
            attrs = entity.attributes

            # DB would be available in attributes
            # because it is in the request parameter.
            db_id = attrs.get(self.DB_ATTRIBUTE, {}).get('guid')
            db_entity = dbs_dict.get(db_id)

            if db_entity:
                db_attrs = db_entity.attributes
                db_name = db_attrs.get(self.NAME_ATTRIBUTE)
                db_cluster = db_attrs.get('clusterName')
            else:
                db_name = ''
                db_cluster = ''

            popular_table = PopularTable(database=entity.typeName,
                                         cluster=db_cluster,
                                         schema=db_name,
                                         name=attrs.get(self.NAME_ATTRIBUTE),
                                         description=attrs.get('description'))
            popular_tables.append(popular_table)
        return popular_tables
    def test_get_popular_tables_without_db(self):
        attrs_ent1 = self.entity1['attributes']
        attrs_ent1.pop('db')
        entity1 = MagicMock()
        entity1.typeName = self.entity1['typeName']
        entity1.attributes = attrs_ent1

        attrs_ent2 = self.entity2['attributes']
        attrs_ent2.pop('db')
        entity2 = MagicMock()
        entity2.typeName = self.entity2['typeName']
        entity2.attributes = attrs_ent2

        basic_search_collection = MagicMock()
        basic_search_collection.entities = [entity1, entity2]

        self.proxy._driver.search_basic.create = MagicMock(
            return_value=basic_search_collection)
        self.proxy._get_rel_attributes_dict = MagicMock(return_value=dict())

        response = self.proxy.get_popular_tables(num_entries=2)

        ent1_attrs = self.entity1['attributes']
        ent2_attrs = self.entity2['attributes']

        expected = [
            PopularTable(database=self.entity_type,
                         cluster='',
                         schema='',
                         name=ent1_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
            PopularTable(database=self.entity_type,
                         cluster='',
                         schema='',
                         name=ent2_attrs['qualifiedName'],
                         description=ent1_attrs['description']),
        ]

        self.assertEqual(expected.__repr__(), response.__repr__())
    def test_get_popular_tables(self):
        metadata1 = self.to_class(self.metadata1)
        metadata2 = self.to_class(self.metadata2)
        self.proxy._get_flat_values_from_dsl = MagicMock(return_value=[])

        metadata_collection = MagicMock()
        metadata_collection.entities_with_relationships = MagicMock(
            return_value=[metadata1, metadata2])

        self.proxy._driver.entity_bulk = MagicMock(
            return_value=[metadata_collection])

        response = self.proxy.get_popular_tables(num_entries=2)

        # Call multiple times for cache test.
        self.proxy.get_popular_tables(num_entries=2)
        self.proxy.get_popular_tables(num_entries=2)
        self.proxy.get_popular_tables(num_entries=2)
        self.proxy.get_popular_tables(num_entries=2)

        self.assertEqual(self.proxy._driver.entity_bulk.call_count, 1)

        ent1_attrs = self.entity1['attributes']
        ent2_attrs = self.entity2['attributes']

        expected = [
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent1_attrs['name'],
                         description=ent1_attrs['description']),
            PopularTable(database=self.entity_type,
                         cluster=self.cluster,
                         schema=self.db,
                         name=ent2_attrs['name'],
                         description=ent1_attrs['description']),
        ]

        self.assertEqual(expected.__repr__(), response.__repr__())
    def get_popular_tables(self,
                           *,
                           num_entries: int = 10) -> List[PopularTable]:
        """
        FixMe: For now it simply returns ALL the tables available,
        Need to generate the formula for popular tables only.
        :param num_entries:
        :return:
        """
        popular_tables = list()
        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True
        }
        try:
            guids = self._get_ids_from_basic_search(params=params)

            entity_collection = self._driver.entity_bulk(guid=guids)
        except BadRequest as ex:
            LOGGER.exception(
                f'Please make sure you have assigned the appropriate '
                f'self.TABLE_ENTITY entity to your atlas tables. {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        for _collection in entity_collection:
            for entity in _collection.entities:
                attrs = entity.attributes
                # ToDo (Verdan): Investigate why db is not in referredEntities
                database = attrs.get(self.DB_KEY)
                if database:
                    db_entity = self._driver.entity_guid(database['guid'])
                    db_attrs = db_entity.entity['attributes']
                    db_name = db_attrs.get(self.NAME_KEY)
                    db_cluster = db_attrs.get('clusterName')
                else:
                    db_name = ''
                    db_cluster = ''

                popular_table = PopularTable(
                    database=entity.typeName,
                    cluster=db_cluster,
                    schema=db_name,
                    name=attrs.get(self.NAME_KEY),
                    description=attrs.get('description'))
                popular_tables.append(popular_table)
        return popular_tables
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        try:
            # Fetch the metadata entities based on popularity score
            query_metadata_ids = {'query': f'FROM Table SELECT metadata.__guid '
                                           f'ORDERBY popularityScore desc '
                                           f'LIMIT {num_entries}'}
            metadata_ids = self._get_flat_values_from_dsl(dsl_param=query_metadata_ids)
            metadata_collection = self._driver.entity_bulk(guid=metadata_ids)
        except KeyError as ex:
            LOGGER.exception(f'DSL Search query failed: {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        if not metadata_collection:
            raise NotFoundException('Unable to fetch popular tables. '
                                    'Please check your configurations.')

        for _collection in metadata_collection:
            metadata_entities = _collection.entities_with_relationships(attributes=["parentEntity"])

            for metadata in metadata_entities:
                table = metadata.relationshipAttributes.get("parentEntity")
                table_attrs = table.get(self.ATTRS_KEY)

                _regex_result = self.TABLE_QN_REGEX.match(table_attrs.get(self.QN_KEY))
                table_qn = _regex_result.groupdict() if _regex_result else dict()

                # Hardcoded empty strings as default, because these values are not optional
                table_name = table_attrs.get(self.NAME_ATTRIBUTE) or table_qn.get("table_name", '')
                db_name = table_qn.get("db_name", '')
                db_cluster = table_qn.get("cluster_name", '')

                popular_table = PopularTable(database=table.get("typeName"),
                                             cluster=db_cluster,
                                             schema=db_name,
                                             name=table_name,
                                             description=table_attrs.get('description'))
                popular_tables.append(popular_table)

        return popular_tables
    def test_get_table_by_user_relation(self) -> None:
        reader1 = copy.deepcopy(self.reader_entity1)
        reader1 = self.to_class(reader1)
        reader_collection = MagicMock()
        reader_collection.entities = [reader1]

        self.proxy._driver.search_basic.create = MagicMock(
            return_value=reader_collection)
        res = self.proxy.get_table_by_user_relation(
            user_email='test_user_id', relation_type=UserResourceRel.follow)

        expected = [
            PopularTable(database=Data.entity_type,
                         cluster=Data.cluster,
                         schema=Data.db,
                         name=Data.name,
                         description=None)
        ]

        self.assertEqual(res, {'table': expected})
Beispiel #18
0
    def get_table_by_user_relation(
            self, *, user_email: str,
            relation_type: UserResourceRel) -> Dict[str, Any]:
        """
        Retrive all follow the resources per user based on the relation.
        We start with table resources only, then add dashboard.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        relation, _ = self._get_relation_by_type(relation_type)
        # relationship can't be parameterized
        query_key = 'key: "{user_id}"'.format(user_id=user_email)

        query = textwrap.dedent("""
        MATCH (user:User {{{key}}})-[:{relation}]->(tbl:Table)
        RETURN COLLECT(DISTINCT tbl) as table_records
        """).format(key=query_key, relation=relation)

        record = self._execute_cypher_query(statement=query, param_dict={})

        if not record:
            raise NotFoundException('User {user_id} does not {relation} '
                                    'any resources'.format(user_id=user_email,
                                                           relation=relation))
        results = []
        table_records = record.single().get('table_records', [])

        for record in table_records:
            _, last_neo4j_record = self._exec_col_query(record['key'])
            results.append(
                PopularTable(database=last_neo4j_record['db']['name'],
                             cluster=last_neo4j_record['clstr']['name'],
                             schema=last_neo4j_record['schema']['name'],
                             name=last_neo4j_record['tbl']['name'],
                             description=self._safe_get(
                                 last_neo4j_record, 'tbl_dscrpt',
                                 'description')))
        return {'table': results}
Beispiel #19
0
    def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]:
        params = {
            'typeName': self.READER_TYPE,
            'offset': '0',
            'limit': '1000',
            'entityFilters': {
                'condition': 'AND',
                'criterion': [
                    {
                        'attributeName': self.QN_KEY,
                        'operator': 'contains',
                        'attributeValue': user_email
                    },
                    {
                        'attributeName': self.BKMARKS_KEY,
                        'operator': 'eq',
                        'attributeValue': 'true'
                    }
                ]
            },
            'attributes': ['count', self.QN_KEY, self.ENTITY_URI_KEY]
        }
        # Fetches the reader entities based on filters
        search_results = self._driver.search_basic.create(data=params)

        results = []
        for record in search_results.entities:
            table_info = self._extract_info_from_uri(table_uri=record.attributes[self.ENTITY_URI_KEY])
            res = self._parse_reader_qn(record.attributes[self.QN_KEY])
            results.append(PopularTable(
                database=table_info['entity'],
                cluster=res['cluster'],
                schema=res['db'],
                name=res['table']))

        return {'table': results}
    def test_get_popular_tables_without_db(self) -> None:
        meta1: Dict = copy.deepcopy(self.metadata1)
        meta2: Dict = copy.deepcopy(self.metadata2)

        meta1['attributes']['table'] = self.entity1
        meta2['attributes']['table'] = self.entity2

        metadata1 = self.to_class(meta1)
        metadata2 = self.to_class(meta2)

        metadata_collection = MagicMock()
        metadata_collection.entities = [metadata1, metadata2]

        result = MagicMock(return_value=metadata_collection)

        with patch.object(self.proxy._driver.search_basic, 'create', result):
            entity1: Dict = copy.deepcopy(self.entity1)
            entity2: Dict = copy.deepcopy(self.entity2)

            for entity in [entity1, entity2]:
                entity['attributes']['qualifiedName'] = entity['attributes'][
                    'name']

            entities_collection = MagicMock()
            entities_collection.entities = [
                self.to_class(entity1),
                self.to_class(entity2)
            ]

            # Invalidate the cache to test the cache functionality
            popular_query_params = {
                'typeName': 'table_metadata',
                'sortBy': 'popularityScore',
                'sortOrder': 'DESCENDING',
                'excludeDeletedEntities': True,
                'limit': 2,
                'attributes': ['table']
            }
            self.proxy._CACHE.region_invalidate(
                self.proxy._get_metadata_entities, None,
                '_get_metadata_entities', popular_query_params)

            self.proxy._driver.entity_bulk = MagicMock(
                return_value=[entities_collection])
            response = self.proxy.get_popular_tables(num_entries=2)

            # Call multiple times for cache test.
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)
            self.proxy.get_popular_tables(num_entries=2)

            self.assertEqual(1, self.proxy._driver.entity_bulk.call_count)

            ent1_attrs = cast(dict, self.entity1['attributes'])
            ent2_attrs = cast(dict, self.entity2['attributes'])

            expected = [
                PopularTable(database=self.entity_type,
                             cluster='default',
                             schema='default',
                             name=ent1_attrs['name'],
                             description=ent1_attrs['description']),
                PopularTable(database=self.entity_type,
                             cluster='default',
                             schema='default',
                             name=ent2_attrs['name'],
                             description=ent1_attrs['description']),
            ]

            self.assertEqual(expected.__repr__(), response.__repr__())