Beispiel #1
0
    def delete_owner(self, *, table_uri: str, owner: str) -> None:
        """

        :param table_uri:
        :param owner:
        :return:
        """
        table = self._get_table_entity(table_uri=table_uri)
        table_entity = table.entity

        if table_entity[self.REL_ATTRS_KEY].get("ownedBy"):
            try:
                active_owners = filter(
                    lambda item: item['relationshipStatus'] == Status.ACTIVE
                    and item['displayText'] == owner,
                    table_entity[self.REL_ATTRS_KEY]['ownedBy'])
                if list(active_owners):
                    self._driver.relationship_guid(
                        next(active_owners).get('relationshipGuid')).delete()
                else:
                    raise BadRequest('You can not delete this owner.')
            except NotFound as ex:
                LOGGER.exception(
                    'Error while removing table data owner. {}'.format(
                        str(ex)))
    def get_table(self, *, table_uri: str) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_uri:
        :return: A Table object with all the information available
        or gathered from different entities.
        """
        entity = self._get_table_entity(table_uri=table_uri)
        table_details = entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]

            programmatic_descriptions = self._get_programmatic_descriptions(
                attrs.get('parameters'))

            table_qn = parse_table_qualified_name(
                qualified_name=attrs.get(self.QN_KEY))

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get(
                    "classifications") or list():
                tags.append(
                    Tag(tag_name=classification.get('typeName'),
                        tag_type="default"))

            columns = self._serialize_columns(entity=entity)

            reports_guids = [
                report.get("guid")
                for report in attrs.get("reports") or list()
            ]

            table = Table(
                database=table_details.get('typeName'),
                cluster=table_qn.get('cluster_name', ''),
                schema=table_qn.get('db_name', ''),
                name=attrs.get('name') or table_qn.get("table_name", ''),
                tags=tags,
                description=attrs.get('description') or attrs.get('comment'),
                owners=[User(email=attrs.get('owner'))],
                resource_reports=self._get_reports(guids=reports_guids),
                columns=columns,
                table_readers=self._get_readers(attrs.get(self.QN_KEY)),
                last_updated_timestamp=self._parse_date(
                    table_details.get('updateTime')),
                programmatic_descriptions=programmatic_descriptions)

            return table
        except KeyError as ex:
            LOGGER.exception(
                'Error while accessing table information. {}'.format(str(ex)))
            raise BadRequest(
                'Some of the required attributes '
                'are missing in : ( {table_uri} )'.format(table_uri=table_uri))
Beispiel #3
0
    def get_popular_tables(self,
                           *,
                           num_entries: int = 10) -> List[PopularTable]:
        """
        FixMe: For now it simply returns ALL the tables available,
        Need to generate the formula for popular tables only.
        :param num_entries:
        :return:
        """
        popular_tables = list()
        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True,
            self.ATTRS_KEY: [self.DB_ATTRIBUTE]
        }
        try:
            # Fetch all the Popular Tables
            _table_collection = self._driver.search_basic.create(data=params)
            # Inflate the table entities
            table_entities = _table_collection.entities
        except BadRequest as ex:
            LOGGER.exception(
                f'Please make sure you have assigned the appropriate '
                f'self.TABLE_ENTITY entity to your atlas tables. {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        # Make a dictionary of Database Entities to avoid multiple DB calls
        dbs_dict = self._get_rel_attributes_dict(entities=table_entities,
                                                 attribute=self.DB_ATTRIBUTE)

        # Make instances of PopularTable
        for entity in table_entities:
            attrs = entity.attributes

            # DB would be available in attributes
            # because it is in the request parameter.
            db_id = attrs.get(self.DB_ATTRIBUTE, {}).get('guid')
            db_entity = dbs_dict.get(db_id)

            if db_entity:
                db_attrs = db_entity.attributes
                db_name = db_attrs.get(self.NAME_ATTRIBUTE)
                db_cluster = db_attrs.get('clusterName')
            else:
                db_name = ''
                db_cluster = ''

            popular_table = PopularTable(database=entity.typeName,
                                         cluster=db_cluster,
                                         schema=db_name,
                                         name=attrs.get(self.NAME_ATTRIBUTE),
                                         description=attrs.get('description'))
            popular_tables.append(popular_table)
        return popular_tables
Beispiel #4
0
    def get_table(self, *, table_id: str, table_info: Dict) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_id:
        :param table_info: Additional table information (entity, db, cluster, name)
        :return: A Table object with all the information available
        or gathered from different entities.
        """

        table_entity = self._get_table_entity(table_id=table_id)
        table_details = table_entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]
            rel_attrs = table_details[self.REL_ATTRS_KEY]

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get(
                    "classifications") or list():
                tags.append(
                    Tag(tag_name=classification.get('typeName'),
                        tag_type="default"))

            columns = []
            for column in rel_attrs.get('columns') or list():
                col_entity = table_entity.referredEntities[column['guid']]
                col_attrs = col_entity[self.ATTRS_KEY]
                columns.append(
                    Column(
                        name=col_attrs.get(self.NAME_ATTRIBUTE),
                        description=col_attrs.get('description'),
                        col_type=col_attrs.get('type')
                        or col_attrs.get('dataType'),
                        sort_order=col_attrs.get('position'),
                    ))

            table = Table(
                database=table_info['entity'],
                cluster=table_info['cluster'],
                schema=table_info['db'],
                name=table_info['name'],
                tags=tags,
                description=attrs.get('description'),
                owners=[User(email=attrs.get('owner'))],
                columns=columns,
                last_updated_timestamp=table_details.get('updateTime'))

            return table
        except KeyError as ex:
            LOGGER.exception(
                'Error while accessing table information. {}'.format(str(ex)))
            raise BadRequest(
                'Some of the required attributes '
                'are missing in : ( {table_id} )'.format(table_id=table_id))
    def add_owner(self, *, table_uri: str, owner: str) -> None:
        """
        Query on Atlas User entity to find if the entity exist for the
        owner string in parameter, if not create one. And then use that User
        entity's GUID and add a relationship between Table and User, on ownedBy field.
        :param table_uri:
        :param owner: Email address of the owner
        :return: None, as it simply adds the owner.
        """
        # Generating owner_info to validate if the user exists
        owner_info = self._get_user_details(owner, fallback=owner)

        if not owner_info:
            raise NotFoundException(f'User "{owner}" does not exist.')

        user_dict = {
            "entity": {
                "typeName": "User",
                "attributes": {
                    "qualifiedName": owner
                },
            }
        }

        # Get or Create a User
        user_entity = self._driver.entity_post.create(data=user_dict)
        user_guid = next(iter(user_entity.get("guidAssignments").values()))

        table = self._get_table_entity(table_uri=table_uri)

        entity_def = {
            "typeName": "DataSet_Users_Owner",
            "end1": {
                "guid": table.entity.get("guid"),
                "typeName": "Table",
            },
            "end2": {
                "guid": user_guid,
                "typeName": "User",
            },
        }
        try:
            self._driver.relationship.create(data=entity_def)
        except Conflict as ex:
            LOGGER.exception(
                'Error while adding the owner information. {}'.format(str(ex)))
            raise BadRequest(
                f'User {owner} is already added as a data owner for '
                f'table {table_uri}.')
    def get_popular_tables(self,
                           *,
                           num_entries: int = 10) -> List[PopularTable]:
        """
        FixMe: For now it simply returns ALL the tables available,
        Need to generate the formula for popular tables only.
        :param num_entries:
        :return:
        """
        popular_tables = list()
        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True
        }
        try:
            guids = self._get_ids_from_basic_search(params=params)

            entity_collection = self._driver.entity_bulk(guid=guids)
        except BadRequest as ex:
            LOGGER.exception(
                f'Please make sure you have assigned the appropriate '
                f'self.TABLE_ENTITY entity to your atlas tables. {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        for _collection in entity_collection:
            for entity in _collection.entities:
                attrs = entity.attributes
                # ToDo (Verdan): Investigate why db is not in referredEntities
                database = attrs.get(self.DB_KEY)
                if database:
                    db_entity = self._driver.entity_guid(database['guid'])
                    db_attrs = db_entity.entity['attributes']
                    db_name = db_attrs.get(self.NAME_KEY)
                    db_cluster = db_attrs.get('clusterName')
                else:
                    db_name = ''
                    db_cluster = ''

                popular_table = PopularTable(
                    database=entity.typeName,
                    cluster=db_cluster,
                    schema=db_name,
                    name=attrs.get(self.NAME_KEY),
                    description=attrs.get('description'))
                popular_tables.append(popular_table)
        return popular_tables
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        try:
            # Fetch the metadata entities based on popularity score
            query_metadata_ids = {'query': f'FROM Table SELECT metadata.__guid '
                                           f'ORDERBY popularityScore desc '
                                           f'LIMIT {num_entries}'}
            metadata_ids = self._get_flat_values_from_dsl(dsl_param=query_metadata_ids)
            metadata_collection = self._driver.entity_bulk(guid=metadata_ids)
        except KeyError as ex:
            LOGGER.exception(f'DSL Search query failed: {ex}')
            raise BadRequest('Unable to fetch popular tables. '
                             'Please check your configurations.')

        if not metadata_collection:
            raise NotFoundException('Unable to fetch popular tables. '
                                    'Please check your configurations.')

        for _collection in metadata_collection:
            metadata_entities = _collection.entities_with_relationships(attributes=["parentEntity"])

            for metadata in metadata_entities:
                table = metadata.relationshipAttributes.get("parentEntity")
                table_attrs = table.get(self.ATTRS_KEY)

                _regex_result = self.TABLE_QN_REGEX.match(table_attrs.get(self.QN_KEY))
                table_qn = _regex_result.groupdict() if _regex_result else dict()

                # Hardcoded empty strings as default, because these values are not optional
                table_name = table_attrs.get(self.NAME_ATTRIBUTE) or table_qn.get("table_name", '')
                db_name = table_qn.get("db_name", '')
                db_cluster = table_qn.get("cluster_name", '')

                popular_table = PopularTable(database=table.get("typeName"),
                                             cluster=db_cluster,
                                             schema=db_name,
                                             name=table_name,
                                             description=table_attrs.get('description'))
                popular_tables.append(popular_table)

        return popular_tables
    def get_table(self, *, table_uri: str) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_uri:
        :return: A Table object with all the information available
        or gathered from different entities.
        """
        entity, table_info = self._get_table_entity(table_uri=table_uri)
        table_details = entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get("classifications") or list():
                tags.append(
                    Tag(
                        tag_name=classification.get('typeName'),
                        tag_type="default"
                    )
                )

            columns = self._serialize_columns(entity=entity)

            table = Table(database=table_info['entity'],
                          cluster=table_info['cluster'],
                          schema=table_info['db'],
                          name=table_info['name'],
                          tags=tags,
                          description=attrs.get('description'),
                          owners=[User(email=attrs.get('owner'))],
                          columns=columns,
                          last_updated_timestamp=table_details.get('updateTime'))

            return table
        except KeyError as ex:
            LOGGER.exception('Error while accessing table information. {}'
                             .format(str(ex)))
            raise BadRequest('Some of the required attributes '
                             'are missing in : ( {table_uri} )'
                             .format(table_uri=table_uri))
Beispiel #9
0
    def update(self, data):
        """
        Updates entities in bulk amount. Data must be a list of instances
        """
        LOG.debug(
            f"Trying to update {self.__class__.__name__} with the data {data}")
        if not isinstance(data, list):
            raise BadRequest(
                url=self.model_class.path,
                method="PUT",
                message=
                f'Data should be a list of "{self.model_class.data_class}"')
        _data = list()
        for item in data:
            # noinspection PyProtectedMember
            _data.append(
                self.model_class.data_class(**item).to_dict(
                    data_key=self.model_class.data_key, ignore_falsy=True))

        self.load(self.client.put(self.url, data=_data))
        return self._models
 def test_get_popular_tables_search_exception(self):
     with self.assertRaises(BadRequest):
         self.proxy._driver.search_basic = MagicMock(
             side_effect=BadRequest('Boom!'))
         self.proxy.get_popular_tables(num_entries=2)