def _get_readers(self, entity: EntityUniqueAttribute, top: Optional[int] = 15) -> List[Reader]:
        _readers = entity.get('relationshipAttributes', dict()).get('readers', list())

        guids = [_reader.get('guid') for _reader in _readers
                 if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE
                 and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE]

        if not guids:
            return []

        readers = extract_entities(self._driver.entity_bulk(guid=guids, ignoreRelationships=False))

        _result = []

        for _reader in readers:
            read_count = _reader.attributes['count']

            if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']):
                reader_qn = _reader.relationshipAttributes['user']['displayText']
                reader_details = self._get_user_details(reader_qn)
                reader = Reader(user=User(**reader_details), read_count=read_count)

                _result.append(reader)

        result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top]

        return result
    def _render_partition_key_name(entity: EntityUniqueAttribute) -> Optional[str]:
        _partition_keys = []

        for partition_key in entity.get('attributes', dict()).get('partitionKeys', []):
            partition_key_column_name = partition_key.get('displayName')

            if partition_key_column_name:
                _partition_keys.append(partition_key_column_name)

        partition_key = ' '.join(_partition_keys).strip()

        return partition_key
    def _get_table_watermarks(
            self, entity: EntityUniqueAttribute) -> List[Watermark]:
        partition_value_format = '%Y-%m-%d %H:%M:%S'

        _partitions = entity.get('relationshipAttributes',
                                 dict()).get('partitions', list())

        names = [
            _partition.get('displayText') for _partition in _partitions
            if _partition.get('entityStatus') == Status.ACTIVE
            and _partition.get('relationshipStatus') == Status.ACTIVE
        ]

        if not names:
            return []

        partition_key = AtlasProxy._render_partition_key_name(entity)
        watermark_date_format = AtlasProxy._select_watermark_format(names)

        partitions = {}

        for _partition in _partitions:
            partition_name = _partition.get('displayText')
            if partition_name and watermark_date_format:
                partition_date, _ = AtlasProxy._validate_date(
                    partition_name, watermark_date_format)

                if partition_date:
                    common_values = {
                        'partition_value':
                        datetime.datetime.strftime(partition_date,
                                                   partition_value_format),
                        'create_time':
                        0,
                        'partition_key':
                        partition_key
                    }

                    partitions[partition_date] = common_values

        if partitions:
            low_watermark_date = min(partitions.keys())
            high_watermark_date = max(partitions.keys())

            low_watermark = Watermark(watermark_type='low_watermark',
                                      **partitions.get(low_watermark_date))
            high_watermark = Watermark(watermark_type='high_watermark',
                                       **partitions.get(high_watermark_date))

            return [low_watermark, high_watermark]
        else:
            return []
Exemple #4
0
    def _get_table_watermarks(
            self, entity: EntityUniqueAttribute) -> List[Watermark]:
        partition_value_format = '%Y-%m-%d %H:%M:%S'

        _partitions = entity.get('relationshipAttributes',
                                 dict()).get('partitions', list())

        guids = [
            _partition.get('guid') for _partition in _partitions
            if _partition.get('entityStatus') == Status.ACTIVE
            and _partition.get('relationshipStatus') == Status.ACTIVE
        ]

        if not guids:
            return []

        partition_key = AtlasProxy._render_partition_key_name(entity)

        full_partitions = extract_entities(
            self._driver.entity_bulk(guid=list(guids),
                                     ignoreRelationships=True))
        watermark_date_format = AtlasProxy._select_watermark_format(
            [p.attributes.get('name') for p in full_partitions])

        partitions = {}

        for partition in full_partitions:
            partition_name = partition.attributes.get('name')

            if partition_name and watermark_date_format:
                partition_date, _ = AtlasProxy._validate_date(
                    partition_name, watermark_date_format)

                if partition_date:
                    _partition_create_time = self._parse_date(
                        partition.createTime) or 0.0

                    partition_create_time = datetime.datetime.fromtimestamp(
                        _partition_create_time).strftime(
                            partition_value_format)

                    common_values = {
                        'partition_value':
                        datetime.datetime.strftime(partition_date,
                                                   partition_value_format),
                        'create_time':
                        partition_create_time,
                        'partition_key':
                        partition_key
                    }

                    partitions[partition_date] = common_values

        if partitions:
            low_watermark_date = min(partitions.keys())
            high_watermark_date = max(partitions.keys())

            low_watermark = Watermark(watermark_type='low_watermark',
                                      **partitions.get(low_watermark_date))
            high_watermark = Watermark(watermark_type='high_watermark',
                                       **partitions.get(high_watermark_date))

            return [low_watermark, high_watermark]
        else:
            return []