def _get_bookmark_entity(self, entity_uri: str, user_id: str) -> EntityUniqueAttribute:
        """
        Fetch a Bookmark entity from parsing table uri and user id.
        If Bookmark is not present, create one for the user.
        :param table_uri:
        :param user_id: Qualified Name of a user
        :return:
        """
        table_info = self._extract_info_from_uri(table_uri=entity_uri)
        bookmark_qn = '{}.{}.{}.{}.bookmark@{}'.format(table_info.get('db'),
                                                       table_info.get('name'),
                                                       table_info.get('entity'),
                                                       user_id,
                                                       table_info.get('cluster'))

        try:
            bookmark_entity = self._driver.entity_unique_attribute(self.BOOKMARK_TYPE, qualifiedName=bookmark_qn)

            if not bookmark_entity.entity:
                table_entity = self._get_table_entity(table_uri=entity_uri)
                # Fetch user entity from user_id for relation
                user_entity = self._get_user_entity(user_id)
                # Create bookmark entity with the user relation.
                self._create_bookmark(table_entity,
                                      user_entity.entity[self.GUID_KEY], bookmark_qn, entity_uri)
                # Fetch bookmark entity after creating it.
                bookmark_entity = self._driver.entity_unique_attribute(self.BOOKMARK_TYPE, qualifiedName=bookmark_qn)

            return bookmark_entity

        except Exception as ex:
            LOGGER.exception(f'Bookmark not found. {str(ex)}')
            raise NotFoundException('Bookmark( {bookmark_qn} ) does not exist'
                                    .format(bookmark_qn=bookmark_qn))
    def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \
            -> Dict[str, List[PopularTable]]:
        """
        Retrive all follow the Table per user based on the relation.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        rel_clause: str = self._get_user_resource_relationship_clause(relation_type=relation_type,
                                                                      id='',
                                                                      resource_type=ResourceType.Table,
                                                                      user_key=user_email)

        query = textwrap.dedent(f"""
            MATCH {rel_clause}<-[:TABLE]-(schema:Schema)<-[:SCHEMA]-(clstr:Cluster)<-[:CLUSTER]-(db:Database)
            WITH db, clstr, schema, resource
            OPTIONAL MATCH (resource)-[:DESCRIPTION]->(tbl_dscrpt:Description)
            RETURN db, clstr, schema, resource, tbl_dscrpt""")

        table_records = self._execute_cypher_query(statement=query, param_dict={'user_key': user_email})

        if not table_records:
            raise NotFoundException('User {user_id} does not {relation} any resources'.format(user_id=user_email,
                                                                                              relation=relation_type))
        results = []
        for record in table_records:
            results.append(PopularTable(
                database=record['db']['name'],
                cluster=record['clstr']['name'],
                schema=record['schema']['name'],
                name=record['resource']['name'],
                description=self._safe_get(record, 'tbl_dscrpt', 'description')))
        return {ResourceType.Table.name.lower(): results}
Example #3
0
    def _get_metadata_entities(self, popular_query_params: dict) -> List:
        try:
            popular_tables_guids = list()

            # Fetch the metadata entities based on popularity score
            search_results = self._driver.search_basic.create(
                data=popular_query_params)
            for metadata in search_results.entities:
                table_guid = metadata.attributes.get("parentEntity").get(
                    "guid")
                popular_tables_guids.append(table_guid)

            table_collection = self._driver.entity_bulk(
                guid=popular_tables_guids, ignoreRelationships=True)

            table_entities: List = list()
            for _collection in table_collection:
                table_entities.extend(_collection.entities)

            return table_entities

        except (KeyError, TypeError) as ex:
            LOGGER.exception(f'_get_metadata_entities Failed : {ex}')
            raise NotFoundException('Unable to fetch popular tables. '
                                    'Please check your configurations.')
    def get_frequently_used_tables(self, *, user_email: str) -> Dict[str, Any]:
        """
        Retrieves all Table the resources per user on READ relation.

        :param user_email: the email of the user
        :return:
        """

        query = textwrap.dedent("""
        MATCH (user:User {key: $query_key})-[r:READ]->(tbl:Table)
        WHERE EXISTS(r.published_tag) AND r.published_tag IS NOT NULL
        WITH user, r, tbl ORDER BY r.published_tag DESC, r.read_count DESC LIMIT 50
        MATCH (tbl:Table)<-[:TABLE]-(schema:Schema)<-[:SCHEMA]-(clstr:Cluster)<-[:CLUSTER]-(db:Database)
        OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
        RETURN db, clstr, schema, tbl, tbl_dscrpt
        """)

        table_records = self._execute_cypher_query(statement=query, param_dict={'query_key': user_email})

        if not table_records:
            raise NotFoundException('User {user_id} does not READ any resources'.format(user_id=user_email))
        results = []

        for record in table_records:
            results.append(PopularTable(
                database=record['db']['name'],
                cluster=record['clstr']['name'],
                schema=record['schema']['name'],
                name=record['tbl']['name'],
                description=self._safe_get(record, 'tbl_dscrpt', 'description')))
        return {'table': results}
    def test_should_fail_to_update_column_description_when_table_does_not_exist(self) -> None:
        self.mock_proxy.put_column_description.side_effect = NotFoundException(message="table does not exist")

        response = self.app.test_client().put(f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description',
                                              json={"description": DESCRIPTION})

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
Example #6
0
    def test_should_fail_to_update_tag_when_table_not_found(self) -> None:
        self.mock_proxy.add_tag.side_effect = NotFoundException(
            message='cannot find table')

        response = self.app.test_client().put(f'/table/{TABLE_URI}/tag/{TAG}')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
Example #7
0
    def get_table_by_user_relation(
            self, *, user_email: str,
            relation_type: UserResourceRel) -> Dict[str, Any]:
        """
        Retrive all follow the resources per user based on the relation.
        We start with table resources only, then add dashboard.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        relation, _ = self._get_relation_by_type(relation_type)
        # relationship can't be parameterized
        query_key = 'key: "{user_id}"'.format(user_id=user_email)

        query = textwrap.dedent("""
        MATCH (user:User {{{key}}})-[:{relation}]->(tbl:Table)
        RETURN COLLECT(DISTINCT tbl) as table_records
        """).format(key=query_key, relation=relation)

        record = self._execute_cypher_query(statement=query, param_dict={})

        if not record:
            raise NotFoundException('User {user_id} does not {relation} '
                                    'any resources'.format(user_id=user_email,
                                                           relation=relation))
        results = []
        table_records = record.single().get('table_records', [])

        for record in table_records:
            # todo: decide whether we want to return a list of table entities or just table_uri
            results.append(self.get_table(table_uri=record['key']))
        return {'table': results}
    def get_table_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) -> Dict[str, Any]:
        """
        Retrive all follow the resources per user based on the relation.
        We start with table resources only, then add dashboard.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        relation, _ = self._get_relation_by_type(relation_type)

        query = textwrap.dedent("""
MATCH (user:User {{key: $query_key}})-[:{relation}]->(tbl:Table)-[:TABLE_OF]->
(schema:Schema)-[:SCHEMA_OF]->(clstr:Cluster)-[:CLUSTER_OF]->(db:Database)
WITH db, clstr, schema, tbl
OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(tbl_dscrpt:Description)
RETURN db, clstr, schema, tbl, tbl_dscrpt""").format(relation=relation)

        table_records = self._execute_cypher_query(statement=query, param_dict={'query_key': user_email})

        if not table_records:
            raise NotFoundException('User {user_id} does not {relation} any resources'.format(user_id=user_email,
                                                                                              relation=relation))
        results = []

        for record in table_records:
            results.append(PopularTable(
                database=record['db']['name'],
                cluster=record['clstr']['name'],
                schema=record['schema']['name'],
                name=record['tbl']['name'],
                description=self._safe_get(record, 'tbl_dscrpt', 'description')))
        return {'table': results}
    def test_should_fail_to_delete_tag_when_table_not_found(self) -> None:
        self.mock_proxy.delete_tag.side_effect = NotFoundException(
            message='foo')

        response = self.app.test_client().delete(f'/dashboard/{ID}/tag/{TAG}')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
    def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \
            -> List[Union[PopularTable, DashboardSummary, Any]]:
        """
        ToDo (Verdan): Dashboard still needs to be implemented.
        Helper function to get the resource, table, dashboard etc owned by a user.
        :param user_id: User ID of a user
        :param resource_type: Type of a resource that returns, could be table, dashboard etc.
        :return: A list of PopularTable, DashboardSummary or any other resource.
        """
        resources = list()
        user_entity = self._driver.entity_unique_attribute(
            self.USER_TYPE, qualifiedName=user_id).entity

        if not user_entity:
            LOGGER.exception(f'User ({user_id}) not found in Atlas')
            raise NotFoundException(f'User {user_id} not found.')

        resource_guids = list()
        for item in user_entity[self.REL_ATTRS_KEY].get('ownerOf') or list():
            if (item['entityStatus'] == Status.ACTIVE
                    and item['relationshipStatus'] == Status.ACTIVE
                    and item['typeName'] == resource_type):
                resource_guids.append(item[self.GUID_KEY])

        entities = extract_entities(
            self._driver.entity_bulk(guid=resource_guids,
                                     ignoreRelationships=True))
        if resource_type == self.TABLE_ENTITY:
            resources = self._serialize_popular_tables(entities)

        return resources
Example #11
0
    def _get_reader_entity(self, table_uri: str, user_id: str) -> EntityUniqueAttribute:
        """
        Fetch a Reader entity from parsing table uri and user id.
        If Reader is not present, create one for the user.
        :param table_uri:
        :param user_id: Qualified Name of a user
        :return:
        """
        table_info = self._extract_info_from_uri(table_uri=table_uri)
        reader_qn = '{}.{}.metadata.{}.reader@{}'.format(table_info.get('db'),
                                                         table_info.get('name'),
                                                         user_id,
                                                         table_info.get('cluster'))

        try:
            reader_entity = self._driver.entity_unique_attribute(
                self.READER_TYPE, qualifiedName=reader_qn)
            if not reader_entity.entity:
                # Fetch the table entity from the uri for obtaining metadata guid.
                table_entity, table_info = self._get_table_entity(table_uri=table_uri)
                # Fetch user entity from user_id for relation
                user_entity = self._get_user_entity(user_id)
                # Create reader entity with the metadata and user relation.
                self._create_reader(table_entity.entity[self.ATTRS_KEY][self.METADATA_KEY][self.GUID_KEY],
                                    user_entity.entity[self.GUID_KEY], reader_qn, table_uri)
                # Fetch reader entity after creating it.
                reader_entity = self._driver.entity_unique_attribute(self.READER_TYPE, qualifiedName=reader_qn)
            return reader_entity

        except Exception as ex:
            LOGGER.exception(f'Reader not found. {str(ex)}')
            raise NotFoundException('Reader( {reader_qn} ) does not exist'
                                    .format(reader_qn=reader_qn))
    def get_user(self, *, id: str) -> Union[UserEntity, None]:
        """
        Retrieve user detail based on user_id(email).

        :param user_id: the email for the given user
        :return:
        """

        query = textwrap.dedent("""
        MATCH (user:User {key: $user_id})
        OPTIONAL MATCH (user)-[:MANAGE_BY]->(manager:User)
        RETURN user as user_record, manager as manager_record
        """)

        record = self._execute_cypher_query(statement=query,
                                            param_dict={'user_id': id})
        single_result = record.single()

        if not single_result:
            raise NotFoundException('User {user_id} '
                                    'not found in the graph'.format(user_id=id))

        record = single_result.get('user_record', {})
        manager_record = single_result.get('manager_record', {})
        if manager_record:
            manager_name = manager_record.get('full_name', '')
        else:
            manager_name = ''

        return self._build_user_from_record(record=record, manager_name=manager_name)
Example #13
0
    def test_should_fail_when_table_doesnt_exist(self) -> None:
        self.mock_proxy.get_lineage.side_effect = NotFoundException(
            message='table not found')

        response = self.app.test_client().get(f'/table/{TABLE_URI}/lineage')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
Example #14
0
    def add_tag(self,
                *,
                table_uri: str,
                tag: str,
                tag_type: str = 'default') -> None:
        """
        Add new tag
        1. Create the node with type Tag if the node doesn't exist.
        2. Create the relation between tag and table if the relation doesn't exist.

        :param table_uri:
        :param tag:
        :param tag_type
        :return: None
        """
        LOGGER.info('New tag {} for table_uri {} with type {}'.format(
            tag, table_uri, tag_type))

        table_validation_query = 'MATCH (t:Table {key: $tbl_key}) return t'

        upsert_tag_query = textwrap.dedent("""
        MERGE (u:Tag {key: $tag})
        on CREATE SET u={tag_type: $tag_type, key: $tag}
        on MATCH SET u={tag_type: $tag_type, key: $tag}
        """)

        upsert_tag_relation_query = textwrap.dedent("""
        MATCH (n1:Tag {key: $tag, tag_type: $tag_type}), (n2:Table {key: $tbl_key})
        MERGE (n1)-[r1:TAG]->(n2)-[r2:TAGGED_BY]->(n1)
        RETURN n1.key, n2.key
        """)

        try:
            tx = self._driver.session().begin_transaction()
            tbl_result = tx.run(table_validation_query, {'tbl_key': table_uri})
            if not tbl_result.single():
                raise NotFoundException(
                    'table_uri {} does not exist'.format(table_uri))

            # upsert the node. Currently the type for all the tags is default. We could change it later per UI.
            tx.run(upsert_tag_query, {'tag': tag, 'tag_type': tag_type})
            result = tx.run(upsert_tag_relation_query, {
                'tag': tag,
                'tbl_key': table_uri,
                'tag_type': tag_type
            })
            if not result.single():
                raise RuntimeError('Failed to create relation between '
                                   'tag {tag} and table {tbl}'.format(
                                       tag=tag, tbl=table_uri))
            tx.commit()
        except Exception as e:
            if not tx.closed():
                tx.rollback()
            # propagate the exception back to api
            raise e
        finally:
            if not tx.closed():
                tx.close()
    def get_dashboard(
        self,
        id: str,
    ) -> DashboardDetailEntity:

        get_dashboard_detail_query = textwrap.dedent(u"""
        MATCH (d:Dashboard {key: $query_key})-[:DASHBOARD_OF]->(dg:Dashboardgroup)-[:DASHBOARD_GROUP_OF]->(c:Cluster)
        OPTIONAL MATCH (d)-[:DESCRIPTION]->(description:Description)
        OPTIONAL MATCH (d)-[:EXECUTED]->(last_exec:Execution) WHERE split(last_exec.key, '/')[5] = '_last_execution'
        OPTIONAL MATCH (d)-[:LAST_UPDATED_AT]->(t:Timestamp)
        OPTIONAL MATCH (d)-[:OWNER]->(owner:User)
        OPTIONAL MATCH (d)-[:TAG]->(tag:Tag)
        RETURN
        c.name as cluster_name,
        d.key as uri,
        d.dashboard_url as url,
        d.name as name,
        toInteger(d.created_timestamp) as created_timestamp,
        description.description as description,
        dg.name as group_name,
        dg.dashboard_group_url as group_url,
        toInteger(last_exec.timestamp) as last_run_timestamp,
        last_exec.state as last_run_state,
        toInteger(t.timestamp) as updated_timestamp,
        collect(owner) as owners,
        collect(tag) as tags;
        """)
        record = self._execute_cypher_query(
            statement=get_dashboard_detail_query, param_dict={
                'query_key': id
            }).single()

        if not record:
            raise NotFoundException(
                'No dashboard exist with URI: {}'.format(id))

        owners = [
            self._build_user_from_record(record=owner)
            for owner in record['owners']
        ]
        tags = [
            Tag(tag_type=tag['tag_type'], tag_name=tag['key'])
            for tag in record['tags']
        ]

        return DashboardDetailEntity(
            uri=record['uri'],
            cluster=record['cluster_name'],
            url=record['url'],
            name=record['name'],
            created_timestamp=record['created_timestamp'],
            description=self._safe_get(record, 'description'),
            group_name=self._safe_get(record, 'group_name'),
            group_url=self._safe_get(record, 'group_url'),
            last_run_timestamp=self._safe_get(record, 'last_run_timestamp'),
            last_run_state=self._safe_get(record, 'last_run_state'),
            updated_timestamp=self._safe_get(record, 'updated_timestamp'),
            owners=owners,
            tags=tags)
    def test_should_fail_when_cannot_find_table(self) -> None:
        self.mock_proxy.get_table_description.side_effect = NotFoundException(
            message='cannot find table')

        response = self.app.test_client().get(
            f'/table/{TABLE_URI}/description')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
Example #17
0
    def test_should_fail_to_delete_tag_when_feature_not_found(self) -> None:
        self.mock_proxy.delete_tag.side_effect = NotFoundException(
            message='cannot find feature')

        response = self.app.test_client().delete(
            f'/feature/{FEATURE_URI}/tag/{TAG}')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
    def test_should_fail_to_get_column_details_when_table_not_foubd(
            self) -> None:
        self.mock_proxy.get_table.side_effect = NotFoundException(
            message='table not found')

        response = self.app.test_client().get(f'/table/{TABLE_URI}')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
    def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \
            -> List[Union[PopularTable, DashboardSummary, Any]]:
        """
        ToDo (Verdan): Dashboard still needs to be implemented.
        Helper function to get the resource, table, dashboard etc owned by a user.
        :param user_id: User ID of a user
        :param resource_type: Type of a resource that returns, could be table, dashboard etc.
        :return: A list of PopularTable, DashboardSummary or any other resource.
        """
        resources = list()
        if resource_type == ResourceType.Table.name:
            type_regex = "(.*)_table$"
        # elif resource_type == ResourceType.Dashboard.name:
        #     type_regex = "Dashboard"
        else:
            LOGGER.exception(f'Resource Type ({resource_type}) is not yet implemented')
            raise NotImplemented

        user_entity = self._driver.entity_unique_attribute(self.USER_TYPE, qualifiedName=user_id).entity

        if not user_entity:
            LOGGER.exception(f'User ({user_id}) not found in Atlas')
            raise NotFoundException(f'User {user_id} not found.')

        resource_guids = set()
        for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list():
            if (item['entityStatus'] == Status.ACTIVE and
                    item['relationshipStatus'] == Status.ACTIVE and
                    re.compile(type_regex).match(item['typeName'])):
                resource_guids.add(item[self.GUID_KEY])

        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True,
            'entityFilters': {
                'condition': 'AND',
                'criterion': [
                    {
                        'attributeName': 'owner',
                        'operator': 'startsWith',
                        'attributeValue': user_id.lower()
                    }
                ]
            },
            'attributes': [self.GUID_KEY]
        }
        table_entities = self._driver.search_basic.create(data=params)
        for table in table_entities.entities:
            resource_guids.add(table.guid)

        if resource_guids:
            entities = extract_entities(self._driver.entity_bulk(guid=list(resource_guids), ignoreRelationships=True))
            if resource_type == ResourceType.Table.name:
                resources = self._serialize_popular_tables(entities)
        else:
            LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"')

        return resources
Example #20
0
    def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \
            -> List[Union[PopularTable, DashboardSummary, Any]]:
        """
        ToDo (Verdan): Dashboard still needs to be implemented.
        Helper function to get the resource, table, dashboard etc owned by a user.
        :param user_id: User ID of a user
        :param resource_type: Type of a resource that returns, could be table, dashboard etc.
        :return: A list of PopularTable, DashboardSummary or any other resource.
        """
        resources = list()

        if resource_type == ResourceType.Table.name:
            type_regex = "(.*)_table$"
            entity_type = 'Table'
        # elif resource_type == ResourceType.Dashboard.name:
        #     type_regex = "Dashboard"
        #     entity_type = 'Dashboard'
        else:
            LOGGER.exception(
                f'Resource Type ({resource_type}) is not yet implemented')
            raise NotImplemented

        user_entity = self.client.entity.get_entity_by_attribute(
            type_name=self.USER_TYPE,
            uniq_attributes=[(self.QN_KEY, user_id)]).entity

        if not user_entity:
            LOGGER.exception(f'User ({user_id}) not found in Atlas')
            raise NotFoundException(f'User {user_id} not found.')

        resource_guids = set()
        for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list():
            if (item['entityStatus'] == Status.ACTIVE
                    and item['relationshipStatus'] == Status.ACTIVE
                    and re.compile(type_regex).match(item['typeName'])):
                resource_guids.add(item[self.GUID_KEY])

        owned_tables_query = f'{entity_type} where owner like "{user_id.lower()}*" and __state = "ACTIVE"'
        table_entities = self.client.discovery.dsl_search(owned_tables_query)

        for table in table_entities.entities or list():
            resource_guids.add(table.guid)

        if resource_guids:
            resource_guids_chunks = AtlasProxy.split_list_to_chunks(
                list(resource_guids), 100)

            for chunk in resource_guids_chunks:
                entities = self.client.entity.get_entities_by_guids(
                    guids=list(chunk), ignore_relationships=True)
                if resource_type == ResourceType.Table.name:
                    resources += self._serialize_popular_tables(
                        entities.entities)
        else:
            LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"')

        return resources
Example #21
0
    def test_should_fail_to_get_column_description_when_table_is_not_found(
            self) -> None:
        self.mock_proxy.get_column_description.side_effect = NotFoundException(
            message="table does not exist")

        response = self.app.test_client().get(
            f'/table/{TABLE_NAME}/column/{COLUMN_NAME}/description')

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
    def get_users(self) -> List[UserEntity]:
        statement = "MATCH (usr:User) WHERE usr.is_active = true RETURN collect(usr) as users"

        record = self._execute_cypher_query(statement=statement, param_dict={})
        result = record.single()
        if not result or not result.get('users'):
            raise NotFoundException('Error getting users')

        return [self._build_user_from_record(record=rec) for rec in result['users']]
    def test_should_fail_to_update_description_when_table_not_found(
            self) -> None:
        self.mock_proxy.put_table_description.side_effect = NotFoundException(
            message='cannot find table')

        response = self.app.test_client().put(
            f'/table/{TABLE_URI}/description',
            json={'description': DESCRIPTION})

        self.assertEqual(response.status_code, HTTPStatus.NOT_FOUND)
    def _get_column(self, *, table_uri: str, column_name: str) -> Dict:
        """
        Fetch the column information from referredEntities of the table entity
        :param table_uri:
        :param column_name:
        :return: A dictionary containing the column details
        """
        try:
            table_entity = self._get_table_entity(table_uri=table_uri)
            columns = table_entity.entity[self.REL_ATTRS_KEY].get('columns')
            for column in columns or list():
                col_details = table_entity.referredEntities[column[self.GUID_KEY]]
                if column_name == col_details[self.ATTRS_KEY]['name']:
                    return col_details

            raise NotFoundException(f'Column not found: {column_name}')

        except KeyError as ex:
            LOGGER.exception(f'Column not found: {str(ex)}')
            raise NotFoundException(f'Column not found: {column_name}')
    def _get_column(self, *, table_uri: str, column_name: str) -> Dict:
        """
        Fetch the column information from referredEntities of the table entity
        :param table_uri:
        :param column_name:
        :return: A dictionary containing the column details
        """
        try:
            table_entity, _ = self._get_table_entity(table_uri=table_uri)
            columns = table_entity.entity['attributes'].get('columns', list())
            for column in columns:
                col_details = table_entity.referredEntities[column['guid']]
                if column_name == col_details['attributes']['qualifiedName']:
                    return col_details

            raise NotFoundException(f'Column not found: {column_name}')

        except KeyError as ex:
            LOGGER.exception(f'Column not found: {str(ex)}')
            raise NotFoundException(f'Column not found: {column_name}')
 def _get_user_entity(self, user_id: str) -> AtlasEntityWithExtInfo:
     """
     Fetches an user entity from an id
     :param user_id: User ID
     :return: A User entity matching the user_id
     """
     try:
         return self.client.entity.get_entity_by_attribute(type_name=self.USER_TYPE,
                                                           uniq_attributes=[(self.QN_KEY, user_id)])
     except Exception as ex:
         raise NotFoundException('(User {user_id}) does not exist'
                                 .format(user_id=user_id))
 def _get_user_entity(self, user_id: str) -> EntityUniqueAttribute:
     """
     Fetches an user entity from an id
     :param user_id:
     :return:
     """
     try:
         return self._driver.entity_unique_attribute("User",
                                                     qualifiedName=user_id)
     except Exception as ex:
         raise NotFoundException(
             '(User {user_id}) does not exist'.format(user_id=user_id))
Example #28
0
    def add_owner(self, *, table_uri: str, owner: str) -> None:
        """
        Query on Atlas User entity to find if the entity exist for the
        owner string in parameter, if not create one. And then use that User
        entity's GUID and add a relationship between Table and User, on ownedBy field.
        :param table_uri:
        :param owner: Email address of the owner
        :return: None, as it simply adds the owner.
        """
        owner_info = self._get_user_details(owner)

        if not owner_info:
            raise NotFoundException(f'User "{owner}" does not exist.')

        user_dict = type_coerce(
            {
                "entity": {
                    "typeName": "User",
                    "attributes": {
                        "qualifiedName": owner
                    },
                }
            }, AtlasEntityWithExtInfo)

        # Get or Create a User
        user_entity = self.client.entity.create_entity(user_dict)
        user_guid = next(iter(user_entity.guidAssignments.values()))

        table = self._get_table_entity(table_uri=table_uri)

        entity_def = {
            "typeName": "DataSet_Users_Owner",
            "end1": {
                "guid": table.entity.get("guid"),
                "typeName": "Table",
            },
            "end2": {
                "guid": user_guid,
                "typeName": "User",
            },
        }
        try:
            relationship = type_coerce(entity_def, AtlasRelationship)
            self.client.relationship.create_relationship(
                relationship=relationship)

        except Exception as ex:
            LOGGER.exception(
                'Error while adding the owner information. {}'.format(str(ex)))
            raise BadRequest(
                f'User {owner} is already added as a data owner for '
                f'table {table_uri}.')
Example #29
0
    def _get_column(self, *, column_id: str) -> Entity:
        """
        Fetch the column information from referredEntities of the table entity
        :param column_id:
        :return: A dictionary containing the column details
        """

        try:
            return self._driver.entity_guid(column_id)

        except Exception as ex:
            LOGGER.exception(f'Column not found: {str(ex)}')
            raise NotFoundException(f'Column not found: {column_id}')
    def get_dashboard_by_user_relation(self, *, user_email: str, relation_type: UserResourceRel) \
            -> Dict[str, List[DashboardSummary]]:
        """
        Retrieve all follow the Dashboard per user based on the relation.

        :param user_email: the email of the user
        :param relation_type: the relation between the user and the resource
        :return:
        """
        rel_clause: str = self._get_user_resource_relationship_clause(relation_type=relation_type,
                                                                      id='',
                                                                      resource_type=ResourceType.Dashboard,
                                                                      user_key=user_email)

        # FYI, to extract last_successful_execution, it searches for its execution ID which is always
        # _last_successful_execution
        # https://github.com/lyft/amundsendatabuilder/blob/master/databuilder/models/dashboard/dashboard_execution.py#L18
        # https://github.com/lyft/amundsendatabuilder/blob/master/databuilder/models/dashboard/dashboard_execution.py#L24

        query = textwrap.dedent(f"""
        MATCH {rel_clause}<-[:DASHBOARD]-(dg:Dashboardgroup)<-[:DASHBOARD_GROUP]-(clstr:Cluster)
        OPTIONAL MATCH (resource)-[:DESCRIPTION]->(dscrpt:Description)
        OPTIONAL MATCH (resource)-[:EXECUTED]->(last_exec:Execution)
        WHERE split(last_exec.key, '/')[5] = '_last_successful_execution'
        RETURN clstr.name as cluster_name, dg.name as dg_name, dg.dashboard_group_url as dg_url,
        resource.key as uri, resource.name as name, resource.dashboard_url as url,
        dscrpt.description as description, last_exec.timestamp as last_successful_run_timestamp""")

        records = self._execute_cypher_query(statement=query, param_dict={'user_key': user_email})

        if not records:
            raise NotFoundException('User {user_id} does not {relation} on {resource_type} resources'.format(
                user_id=user_email,
                relation=relation_type,
                resource_type=ResourceType.Dashboard.name))

        results = []
        for record in records:
            results.append(DashboardSummary(
                uri=record['uri'],
                cluster=record['cluster_name'],
                group_name=record['dg_name'],
                group_url=record['dg_url'],
                name=record['name'],
                url=record['url'],
                description=record['description'],
                last_successful_run_timestamp=record['last_successful_run_timestamp'],
            ))

        return {ResourceType.Dashboard.name.lower(): results}