Example #1
0
def data_records_to_delete(nuvla: Nuvla, obj_num, filter, size_bytes_after, page_size, thresholdLow_station):
    data_records = []
    # Pagination.
    first = 1
    if obj_num < page_size:
        last = obj_num
    else:
        last = page_size
    log.info('filter: {}'.format(filter))
    orderby = 'gnss:timestamp:asc'
    log.info('orderby: {}'.format(orderby))
    aggrs = 'sum:bytes'
    log.info('aggregations: {}'.format(aggrs))
    select = 'id,bucket,object,bytes'
    log.info('select: {}'.format(select))
    while (size_bytes_after > thresholdLow_station) and (last <= obj_num):
        log.info("Collecting data-records with paging: {0} {1} {2} {3}"
                 .format(size_bytes_after, thresholdLow_station, first, last))
        res = nuvla.search('data-record',
                           filter=filter,
                           orderby=orderby,
                           aggregation=aggrs,
                           first=first,
                           last=last,
                           select=select)
        drs = []
        for dr in res.data['resources']:
            drs.append(dr)
            size_bytes_after -= dr['bytes']
            if size_bytes_after < thresholdLow_station:
                log.info("size_bytes_after {0} below or equal watermark {1}. Ready to delete."
                         .format(size_bytes_after, thresholdLow_station))
                break
        data_records.append(drs)
        first = last + 1
        last += page_size
    return data_records
 time_start = drf['time-start']
 time_end = drf['time-end']
 drs_filter_mask = "(timestamp>='{0}' and timestamp<='{1}') and {2}"
 data_records_filter = drs_filter_mask.format(time_start, time_end,
                                              dr_filter)
 print('Initial data records filter: {}'.format(data_records_filter))
 cycle_count = 1
 ndrs = 0
 last_timestamp = None
 collected_total = 0
 while True:
     t_s = time.time()
     print('::: Page: {}'.format(cycle_count))
     res = api.search('data-record',
                      filter=data_records_filter,
                      select=['bucket', 'object', 'timestamp'],
                      aggregation='value_count:id',
                      orderby='timestamp:asc')
     print('Time to get records: {:.3f} sec'.format(time.time() - t_s))
     if len(res.data['resources']) <= 0:
         print('No more data records to collect.')
         break
     if cycle_count == 1:
         ndrs = res.data['aggregations']['value_count:id']['value']
         print('Number of data records to collect: {}'.format(ndrs))
     for dr in res.data['resources']:
         obj = dr['object']
         bucket = dr['bucket']
         if bucket in records:
             records[bucket].append(obj)
         else: