Example #1
0
 def _get_trunc_func(
     self, subject: str, period: str
 ) -> Tuple[Union[TruncHour, TruncDay, TruncWeek, TruncMonth], str]:
     if period == "Hour":
         fields = """
         FLOOR(DATE_PART('day', first_date - %s) * 24 + DATE_PART('hour', first_date - %s)) AS first_date,
         FLOOR(DATE_PART('day', event_date - first_date) * 24 + DATE_PART('hour', event_date - first_date)) AS date,
         """
         return TruncHour(subject), fields
     elif period == "Day":
         fields = """
         FLOOR(DATE_PART('day', first_date - %s)) AS first_date,
         FLOOR(DATE_PART('day', event_date - first_date)) AS date,
         """
         return TruncDay(subject), fields
     elif period == "Week":
         fields = """
         FLOOR(DATE_PART('day', first_date - %s) / 7) AS first_date,
         FLOOR(DATE_PART('day', event_date - first_date) / 7) AS date,
         """
         return TruncWeek(subject), fields
     elif period == "Month":
         fields = """
         FLOOR((DATE_PART('year', first_date) - DATE_PART('year', %s)) * 12 + DATE_PART('month', first_date) - DATE_PART('month', %s)) AS first_date,
         FLOOR((DATE_PART('year', event_date) - DATE_PART('year', first_date)) * 12 + DATE_PART('month', event_date) - DATE_PART('month', first_date)) AS date,
         """
         return TruncMonth(subject), fields
     else:
         raise ValidationError(f"Period {period} is unsupported.")
Example #2
0
def _leads_period_unit_expr(period):
    """
    :param period: - 'hour', 'day', 'week', 'moth', 'year'
    :param date_to:
    :return:
    >>> _leads_period_unit_expr('hour')
    TruncHour(F(created))
    >>> _leads_period_unit_expr('day')
    TruncDay(F(created))
    >>> _leads_period_unit_expr('week')
    Trunc(F(created))
    >>> _leads_period_unit_expr('month')
    TruncMonth(F(created))
    >>> _leads_period_unit_expr('year')
    TruncYear(F(created))
    """
    if period == 'hour':
        return TruncHour('created')
    elif period == 'day':
        return TruncDay('created')
    elif period == 'week':
        return Trunc('created', 'week')
    elif period == 'month':
        return TruncMonth('created')
    else:
        return TruncYear('created')
Example #3
0
 def _determineTrunc(
         subject: str, period: str
 ) -> Union[TruncHour, TruncDay, TruncWeek, TruncMonth]:
     if period == "Hour":
         return TruncHour(subject)
     elif period == "Day":
         return TruncDay(subject)
     elif period == "Week":
         return TruncWeek(subject)
     elif period == "Month":
         return TruncMonth(subject)
     else:
         raise ValueError(f"Period {period} is unsupported.")
Example #4
0
 def trunc_func(
         self, field_name: str
 ) -> Union[TruncHour, TruncDay, TruncWeek, TruncMonth]:
     if self.interval == "hour":
         return TruncHour(field_name)
     elif self.interval == "day":
         return TruncDay(field_name)
     elif self.interval == "week":
         return TruncWeek(field_name)
     elif self.interval == "month":
         return TruncMonth(field_name)
     else:
         raise ValidationError(f"{self.interval} not supported")
Example #5
0
def apply_data_retention():
    """
    When data retention is enabled, this discards all data applicable for retention. Keeps at least one data point per
    hour available.
    """
    settings = RetentionSettings.get_solo()

    if settings.data_retention_in_hours is None:
        # No retention enabled at all (default behaviour).
        return

    current_hour = timezone.now().hour

    # Only cleanup during nights. Allow from midnight to six a.m.
    if current_hour > 6:
        return

    # Each run should be capped, for obvious performance reasons.
    MAX_HOURS_CLEANUP = 24

    # These models should be rotated with retention. Dict value is the datetime field used.
    MODELS_TO_CLEANUP = {
        DsmrReading.objects.processed(): 'timestamp',
        ElectricityConsumption.objects.all(): 'read_at',
        GasConsumption.objects.all(): 'read_at',
    }

    retention_date = timezone.now() - timezone.timedelta(
        hours=settings.data_retention_in_hours)

    # We need to force UTC here, to avoid AmbiguousTimeError's on DST changes.
    timezone.activate(pytz.UTC)

    for base_queryset, datetime_field in MODELS_TO_CLEANUP.items():
        hours_to_cleanup = base_queryset.filter(**{
            '{}__lt'.format(datetime_field):
            retention_date
        }).annotate(
            item_hour=TruncHour(datetime_field)).values('item_hour').annotate(
                item_count=Count('id')).order_by().filter(
                    item_count__gt=2).order_by('item_hour').values_list(
                        'item_hour', flat=True)[:MAX_HOURS_CLEANUP]

        hours_to_cleanup = list(hours_to_cleanup)  # Force evaluation.

        if not hours_to_cleanup:
            continue

        for current_hour in hours_to_cleanup:

            # Fetch all data per hour.
            data_set = base_queryset.filter(
                **{
                    '{}__gte'.format(datetime_field):
                    current_hour,
                    '{}__lt'.format(datetime_field):
                    current_hour + timezone.timedelta(hours=1),
                })

            # Extract the first/last item, so we can exclude it.
            # NOTE: Want to alter this? Please update "item_count__gt=2" above as well!
            keeper_pks = [
                data_set.order_by(datetime_field)[0].pk,
                data_set.order_by('-{}'.format(datetime_field))[0].pk
            ]

            # Now drop all others.
            print('Retention | Cleaning up: {} ({})'.format(
                current_hour, data_set[0].__class__.__name__))
            data_set.exclude(pk__in=keeper_pks).delete()

    timezone.deactivate()
Example #6
0
def run(scheduled_process):
    retention_settings = RetentionSettings.get_solo()

    if retention_settings.data_retention_in_hours == RetentionSettings.RETENTION_NONE:
        return scheduled_process.disable(
        )  # Changing the retention settings in the admin will re-activate it again.

    # These models should be rotated with retention. Dict value is the datetime field used.
    ITEM_COUNT_PER_HOUR = 2
    MODELS_TO_CLEANUP = {
        DsmrReading.objects.processed(): 'timestamp',
        ElectricityConsumption.objects.all(): 'read_at',
        GasConsumption.objects.all(): 'read_at',
    }

    retention_date = timezone.now() - timezone.timedelta(
        hours=retention_settings.data_retention_in_hours)
    data_to_clean_up = False

    # We need to force UTC here, to avoid AmbiguousTimeError's on DST changes.
    timezone.activate(pytz.UTC)

    for base_queryset, datetime_field in MODELS_TO_CLEANUP.items():
        hours_to_cleanup = base_queryset.filter(**{
            '{}__lt'.format(datetime_field):
            retention_date
        }).annotate(
            item_hour=TruncHour(datetime_field)).values('item_hour').annotate(
                item_count=Count('id')).order_by().filter(
                    item_count__gt=ITEM_COUNT_PER_HOUR
                ).order_by('item_hour').values_list(
                    'item_hour', flat=True
                )[:settings.DSMRREADER_RETENTION_MAX_CLEANUP_HOURS_PER_RUN]

        hours_to_cleanup = list(hours_to_cleanup)  # Force evaluation.

        if not hours_to_cleanup:
            continue

        data_to_clean_up = True

        for current_hour in hours_to_cleanup:
            # Fetch all data per hour.
            data_set = base_queryset.filter(
                **{
                    '{}__gte'.format(datetime_field):
                    current_hour,
                    '{}__lt'.format(datetime_field):
                    current_hour + timezone.timedelta(hours=1),
                })

            # Extract the first/last item, so we can exclude it.
            # NOTE: Want to alter this? Please update ITEM_COUNT_PER_HOUR above as well!
            keeper_pks = [
                data_set.order_by(datetime_field)[0].pk,
                data_set.order_by('-{}'.format(datetime_field))[0].pk
            ]

            # Now drop all others.
            logger.debug('Retention: Cleaning up: %s (%s)', current_hour,
                         data_set[0].__class__.__name__)
            data_set.exclude(pk__in=keeper_pks).delete()

    timezone.deactivate()

    # Delay for a bit, as there is nothing to do.
    if not data_to_clean_up:
        scheduled_process.delay(timezone.timedelta(hours=12))