def _get_readers(self, entity: EntityUniqueAttribute, top: Optional[int] = 15) -> List[Reader]: _readers = entity.get('relationshipAttributes', dict()).get('readers', list()) guids = [_reader.get('guid') for _reader in _readers if _reader.get('entityStatus', 'INACTIVE') == Status.ACTIVE and _reader.get('relationshipStatus', 'INACTIVE') == Status.ACTIVE] if not guids: return [] readers = extract_entities(self._driver.entity_bulk(guid=guids, ignoreRelationships=False)) _result = [] for _reader in readers: read_count = _reader.attributes['count'] if read_count >= int(app.config['POPULAR_TABLE_MINIMUM_READER_COUNT']): reader_qn = _reader.relationshipAttributes['user']['displayText'] reader_details = self._get_user_details(reader_qn) reader = Reader(user=User(**reader_details), read_count=read_count) _result.append(reader) result = sorted(_result, key=attrgetter('read_count'), reverse=True)[:top] return result
def _render_partition_key_name(entity: EntityUniqueAttribute) -> Optional[str]: _partition_keys = [] for partition_key in entity.get('attributes', dict()).get('partitionKeys', []): partition_key_column_name = partition_key.get('displayName') if partition_key_column_name: _partition_keys.append(partition_key_column_name) partition_key = ' '.join(_partition_keys).strip() return partition_key
def _get_table_watermarks( self, entity: EntityUniqueAttribute) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) names = [ _partition.get('displayText') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE ] if not names: return [] partition_key = AtlasProxy._render_partition_key_name(entity) watermark_date_format = AtlasProxy._select_watermark_format(names) partitions = {} for _partition in _partitions: partition_name = _partition.get('displayText') if partition_name and watermark_date_format: partition_date, _ = AtlasProxy._validate_date( partition_name, watermark_date_format) if partition_date: common_values = { 'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': 0, 'partition_key': partition_key } partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return []
def _get_table_watermarks( self, entity: EntityUniqueAttribute) -> List[Watermark]: partition_value_format = '%Y-%m-%d %H:%M:%S' _partitions = entity.get('relationshipAttributes', dict()).get('partitions', list()) guids = [ _partition.get('guid') for _partition in _partitions if _partition.get('entityStatus') == Status.ACTIVE and _partition.get('relationshipStatus') == Status.ACTIVE ] if not guids: return [] partition_key = AtlasProxy._render_partition_key_name(entity) full_partitions = extract_entities( self._driver.entity_bulk(guid=list(guids), ignoreRelationships=True)) watermark_date_format = AtlasProxy._select_watermark_format( [p.attributes.get('name') for p in full_partitions]) partitions = {} for partition in full_partitions: partition_name = partition.attributes.get('name') if partition_name and watermark_date_format: partition_date, _ = AtlasProxy._validate_date( partition_name, watermark_date_format) if partition_date: _partition_create_time = self._parse_date( partition.createTime) or 0.0 partition_create_time = datetime.datetime.fromtimestamp( _partition_create_time).strftime( partition_value_format) common_values = { 'partition_value': datetime.datetime.strftime(partition_date, partition_value_format), 'create_time': partition_create_time, 'partition_key': partition_key } partitions[partition_date] = common_values if partitions: low_watermark_date = min(partitions.keys()) high_watermark_date = max(partitions.keys()) low_watermark = Watermark(watermark_type='low_watermark', **partitions.get(low_watermark_date)) high_watermark = Watermark(watermark_type='high_watermark', **partitions.get(high_watermark_date)) return [low_watermark, high_watermark] else: return []