Esempio n. 1
0
    def _get_readers(self,
                     qualified_name: str,
                     top: Optional[int] = 15) -> List[Reader]:
        params = {
            'typeName': self.READER_TYPE,
            'offset': '0',
            'limit': top,
            'excludeDeletedEntities': True,
            'entityFilters': {
                'condition':
                'AND',
                'criterion': [{
                    'attributeName':
                    self.QN_KEY,
                    'operator':
                    'STARTSWITH',
                    'attributeValue':
                    qualified_name.split('@')[0] + '.'
                }, {
                    'attributeName':
                    'count',
                    'operator':
                    'gte',
                    'attributeValue':
                    f'{app.config["POPULAR_TABLE_MINIMUM_READER_COUNT"]}'
                }]
            },
            'attributes': ['count', self.QN_KEY],
            'sortBy': 'count',
            'sortOrder': 'DESCENDING'
        }

        search_results = self._driver.search_basic.create(
            data=params, ignoreRelationships=False)

        readers = []

        for record in search_results.entities:
            readers.append(record.guid)

        results = []

        if readers:
            read_entities = extract_entities(
                self._driver.entity_bulk(guid=readers,
                                         ignoreRelationships=False))

            for read_entity in read_entities:
                reader_qn = read_entity.relationshipAttributes['user'][
                    'displayText']
                reader_details = self.user_detail_method(reader_qn) or {
                    'email': reader_qn,
                    'user_id': reader_qn
                }
                reader = Reader(user=User(**reader_details),
                                read_count=read_entity.attributes['count'])

                results.append(reader)

        return results
    def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \
            -> List[Union[PopularTable, DashboardSummary, Any]]:
        """
        ToDo (Verdan): Dashboard still needs to be implemented.
        Helper function to get the resource, table, dashboard etc owned by a user.
        :param user_id: User ID of a user
        :param resource_type: Type of a resource that returns, could be table, dashboard etc.
        :return: A list of PopularTable, DashboardSummary or any other resource.
        """
        resources = list()
        user_entity = self._driver.entity_unique_attribute(
            self.USER_TYPE, qualifiedName=user_id).entity

        if not user_entity:
            LOGGER.exception(f'User ({user_id}) not found in Atlas')
            raise NotFoundException(f'User {user_id} not found.')

        resource_guids = list()
        for item in user_entity[self.REL_ATTRS_KEY].get('ownerOf') or list():
            if (item['entityStatus'] == Status.ACTIVE
                    and item['relationshipStatus'] == Status.ACTIVE
                    and item['typeName'] == resource_type):
                resource_guids.append(item[self.GUID_KEY])

        entities = extract_entities(
            self._driver.entity_bulk(guid=resource_guids,
                                     ignoreRelationships=True))
        if resource_type == self.TABLE_ENTITY:
            resources = self._serialize_popular_tables(entities)

        return resources
    def _get_readers(self, entity: EntityUniqueAttribute, top: Optional[int] = 15) -> List[Reader]:
        _readers = entity.get('relationshipAttributes', dict()).get('readers', list())

        guids = [_reader.get('guid') for _reader in _readers
                 if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE
                 and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE]

        if not guids:
            return []

        readers = extract_entities(self._driver.entity_bulk(guid=guids, ignoreRelationships=False))

        _result = []

        for _reader in readers:
            read_count = _reader.attributes['count']

            if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']):
                reader_qn = _reader.relationshipAttributes['user']['displayText']
                reader_details = self._get_user_details(reader_qn)
                reader = Reader(user=User(**reader_details), read_count=read_count)

                _result.append(reader)

        result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top]

        return result
    def _get_resources_owned_by_user(self, user_id: str, resource_type: str) \
            -> List[Union[PopularTable, DashboardSummary, Any]]:
        """
        ToDo (Verdan): Dashboard still needs to be implemented.
        Helper function to get the resource, table, dashboard etc owned by a user.
        :param user_id: User ID of a user
        :param resource_type: Type of a resource that returns, could be table, dashboard etc.
        :return: A list of PopularTable, DashboardSummary or any other resource.
        """
        resources = list()
        if resource_type == ResourceType.Table.name:
            type_regex = "(.*)_table$"
        # elif resource_type == ResourceType.Dashboard.name:
        #     type_regex = "Dashboard"
        else:
            LOGGER.exception(f'Resource Type ({resource_type}) is not yet implemented')
            raise NotImplemented

        user_entity = self._driver.entity_unique_attribute(self.USER_TYPE, qualifiedName=user_id).entity

        if not user_entity:
            LOGGER.exception(f'User ({user_id}) not found in Atlas')
            raise NotFoundException(f'User {user_id} not found.')

        resource_guids = set()
        for item in user_entity[self.REL_ATTRS_KEY].get('owns') or list():
            if (item['entityStatus'] == Status.ACTIVE and
                    item['relationshipStatus'] == Status.ACTIVE and
                    re.compile(type_regex).match(item['typeName'])):
                resource_guids.add(item[self.GUID_KEY])

        params = {
            'typeName': self.TABLE_ENTITY,
            'excludeDeletedEntities': True,
            'entityFilters': {
                'condition': 'AND',
                'criterion': [
                    {
                        'attributeName': 'owner',
                        'operator': 'startsWith',
                        'attributeValue': user_id.lower()
                    }
                ]
            },
            'attributes': [self.GUID_KEY]
        }
        table_entities = self._driver.search_basic.create(data=params)
        for table in table_entities.entities:
            resource_guids.add(table.guid)

        if resource_guids:
            entities = extract_entities(self._driver.entity_bulk(guid=list(resource_guids), ignoreRelationships=True))
            if resource_type == ResourceType.Table.name:
                resources = self._serialize_popular_tables(entities)
        else:
            LOGGER.info(f'User ({user_id}) does not own any "{resource_type}"')

        return resources
    def _get_reports(self, guids: List[str]) -> List[ResourceReport]:
        reports = []
        if guids:
            report_entities_collection = self._driver.entity_bulk(guid=guids)
            for report_entity in extract_entities(report_entities_collection):
                try:
                    if report_entity.status == self.ENTITY_ACTIVE_STATUS:
                        report_attrs = report_entity.attributes
                        reports.append(
                            ResourceReport(name=report_attrs['name'],
                                           url=report_attrs['url']))
                except (KeyError, AttributeError) as ex:
                    LOGGER.exception(
                        'Error while accessing table report: {}. {}'.format(
                            str(report_entity), str(ex)))

        parsed_reports = app.config['RESOURCE_REPORT_CLIENT'](reports) \
            if app.config['RESOURCE_REPORT_CLIENT'] else reports

        return parsed_reports
    def get_frequently_used_tables(
            self, *, user_email: str) -> Dict[str, List[PopularTable]]:
        user = self._driver.entity_unique_attribute(
            self.USER_TYPE, qualifiedName=user_email).entity

        readers_guids = []
        for user_reads in user['relationshipAttributes'].get('entityReads'):
            entity_status = user_reads['entityStatus']
            relationship_status = user_reads['relationshipStatus']

            if entity_status == 'ACTIVE' and relationship_status == 'ACTIVE':
                readers_guids.append(user_reads['guid'])

        readers = extract_entities(
            self._driver.entity_bulk(guid=readers_guids,
                                     ignoreRelationships=True))

        _results = {}
        for reader in readers:
            entity_uri = reader.attributes.get(self.ENTITY_URI_KEY)
            count = reader.attributes.get('count')

            if count:
                details = self._extract_info_from_uri(table_uri=entity_uri)

                _results[count] = dict(cluster=details.get('cluster'),
                                       name=details.get('name'),
                                       schema=details.get('db'),
                                       database=details.get('entity'))

        sorted_counts = sorted(_results.keys())

        results = []
        for count in sorted_counts:
            data: dict = _results.get(count, dict())
            table = PopularTable(**data)

            results.append(table)

        return {'table': results}
Esempio n. 7
0
    def _get_table_watermarks(
            self, entity: EntityUniqueAttribute) -> List[Watermark]:
        partition_value_format = '%Y-%m-%d %H:%M:%S'

        _partitions = entity.get('relationshipAttributes',
                                 dict()).get('partitions', list())

        guids = [
            _partition.get('guid') for _partition in _partitions
            if _partition.get('entityStatus') == Status.ACTIVE
            and _partition.get('relationshipStatus') == Status.ACTIVE
        ]

        if not guids:
            return []

        partition_key = AtlasProxy._render_partition_key_name(entity)

        full_partitions = extract_entities(
            self._driver.entity_bulk(guid=list(guids),
                                     ignoreRelationships=True))
        watermark_date_format = AtlasProxy._select_watermark_format(
            [p.attributes.get('name') for p in full_partitions])

        partitions = {}

        for partition in full_partitions:
            partition_name = partition.attributes.get('name')

            if partition_name and watermark_date_format:
                partition_date, _ = AtlasProxy._validate_date(
                    partition_name, watermark_date_format)

                if partition_date:
                    _partition_create_time = self._parse_date(
                        partition.createTime) or 0.0

                    partition_create_time = datetime.datetime.fromtimestamp(
                        _partition_create_time).strftime(
                            partition_value_format)

                    common_values = {
                        'partition_value':
                        datetime.datetime.strftime(partition_date,
                                                   partition_value_format),
                        'create_time':
                        partition_create_time,
                        'partition_key':
                        partition_key
                    }

                    partitions[partition_date] = common_values

        if partitions:
            low_watermark_date = min(partitions.keys())
            high_watermark_date = max(partitions.keys())

            low_watermark = Watermark(watermark_type='low_watermark',
                                      **partitions.get(low_watermark_date))
            high_watermark = Watermark(watermark_type='high_watermark',
                                       **partitions.get(high_watermark_date))

            return [low_watermark, high_watermark]
        else:
            return []