def aggregate_data_to_db():
    logger.info(f"{settings.TERM_LBL} Starting to aggregate event data ...")

    while True:
        start_time = time.time()
        recent_hour = get_most_recent_hour()
        look_back_until = recent_hour - timedelta(
            hours=7 * 24)  # catch up until a week of yet unaggregated data

        # This aggregates all unaggregated hours/days up until the preceding one, plus the currently active one.
        with transaction.atomic():
            for hour in get_unaggregated_hours(
                    look_back_until, recent_hour -
                    timedelta(hours=1)) + [get_most_recent_hour()]:
                seen_by_hour = aggregate_hour(hour)
                seen_by_hour.save()
                logger.info(f"Saved {seen_by_hour}")

            for day in get_unaggregated_days(
                    look_back_until, recent_hour - timedelta(hours=24)) + [
                        get_most_recent_hour().replace(hour=0)
                    ]:
                seen_by_day = aggregate_day(day)
                seen_by_day.save()
                logger.info(f"Saved {seen_by_day}")

        sleep_until_interval_is_complete(start_time,
                                         settings.UPLOAD_INTERVAL_IN_SECONDS)

        print()
Example #2
0
def upload_latest_aggregations():
    """Upload latest aggregations."""
    box_settings = BoxSettings.objects.first()
    # build queries
    hour_query = SeenByHour.objects.filter(box_id=box_settings.box_id)
    day_query = SeenByDay.objects.filter(box_id=box_settings.box_id)
    latest_upload_time = box_settings.aggregations_uploaded_until
    if latest_upload_time is not None:
        hour_query = hour_query.filter(hour_start__gt=latest_upload_time)
        day_query = day_query.filter(day_start__gte=as_day(
            latest_upload_time.astimezone(get_timezone())))
    # query
    seen_by_hour = hour_query.order_by(
        "hour_start").all()[:settings.UPLOAD_MAX_NUMBER_PER_REQUEST]
    seen_by_day = day_query.order_by(
        "day_start").all()[:settings.UPLOAD_MAX_NUMBER_PER_REQUEST]

    if len(seen_by_hour) == 0:
        logger.info("No aggregations found. Nothing to send.")
        return

    # find out until what aggregation time we won't upload again next time - current hour needs to stay out.
    current_hour_start = get_most_recent_hour()
    latest_aggregation_time = None
    for aggregation in [
            sbh for sbh in seen_by_hour if sbh.hour_start < current_hour_start
    ]:
        latest_aggregation_time = aggregation.hour_start

    logger.info(
        f"I collected {len(seen_by_hour)} hour aggregation(s) and {len(seen_by_day)} day aggregation(s) to send,"
        f" starting at {seen_by_hour.first().hour_start}.")
    if latest_aggregation_time is not None:
        logger.info(
            f" - next time I will not upload finished times up until {latest_aggregation_time}."
        )

    payload = dict(
        seen_by_hour=serialize("json", seen_by_hour),
        seen_by_day=serialize("json", seen_by_day),
    )

    response = requests.post(
        f"{box_settings.server_url}/api/postAggregations/{box_settings.box_id}/",
        data=payload,
        headers={"Authorization": box_settings.upload_token},
    )

    if response.status_code == 200:
        if latest_aggregation_time is not None:
            logger.info(
                f"Marking {latest_aggregation_time} as the last aggregation time we will not upload next time."
            )
            box_settings.aggregations_uploaded_until = latest_aggregation_time
            box_settings.save()
    else:
        logger.error(
            f"Server responded with code {response.status_code} ({response.text})."
        )
    def handle(self, *args, **options):
        # figure out start and end
        if options["start"] is None:
            options["start"] = get_most_recent_hour() - timedelta(days=7)
        else:
            options["start"] = iso8601.parse_date(options["start"]).astimezone(
                get_timezone())
        if options["end"] is None:
            options["end"] = get_most_recent_hour()
        else:
            options["end"] = iso8601.parse_date(options["end"]).astimezone(
                get_timezone())
        if options["start"] >= options["end"]:
            print("Start cannot be before end.")
            sys.exit(2)

        aileen_box = make_new_aileenbox_or_none(options)

        if "peak_time" not in options or options["peak_time"] not in (
                "morning",
                "afternoon",
        ):
            print("peak_time needs to be either morning or afternoon.")
            sys.exit(2)

        logger.info(
            "create_static_data command was called. Start: %s, End: %s, Box: %s ..."
            % (options["start"], options["end"], options["box_id"]))

        hourly = []
        daily = []

        time_slots = pd.date_range(options["start"], options["end"], freq="1H")
        for dt in time_slots:
            x = makeup_number_observables(dt, options)
            hourly.append(
                SeenByHour(
                    box_id=options["box_id"],
                    hour_start=dt,
                    seen=int(x),
                    seen_also_in_preceding_hour=int(
                        x / random.choice([1.5, 2.5, 4])),
                ))
            if dt.hour == 0 and len(hourly) >= 24:
                sum_by_hours = sum([sbh.seen for sbh in hourly[-24:]])
                seen_this_day = int(sum_by_hours / 2.0)
                daily.append(
                    SeenByDay(
                        box_id=options["box_id"],
                        day_start=dt - timedelta(hours=24),
                        seen=int(seen_this_day),
                        seen_also_on_preceding_day=int(
                            seen_this_day / random.choice([1.5, 2.5, 4])),
                        seen_also_a_week_earlier=int(
                            seen_this_day / random.choice([1.5, 2.5, 4])),
                    ))

        # Now save to the database
        with transaction.atomic():
            if aileen_box is not None:
                aileen_box.save()
            for aggregation in hourly + daily:
                print("Saving %s ..." % aggregation)
                aggregation.save()