def aggregate_data_to_db(): logger.info(f"{settings.TERM_LBL} Starting to aggregate event data ...") while True: start_time = time.time() recent_hour = get_most_recent_hour() look_back_until = recent_hour - timedelta( hours=7 * 24) # catch up until a week of yet unaggregated data # This aggregates all unaggregated hours/days up until the preceding one, plus the currently active one. with transaction.atomic(): for hour in get_unaggregated_hours( look_back_until, recent_hour - timedelta(hours=1)) + [get_most_recent_hour()]: seen_by_hour = aggregate_hour(hour) seen_by_hour.save() logger.info(f"Saved {seen_by_hour}") for day in get_unaggregated_days( look_back_until, recent_hour - timedelta(hours=24)) + [ get_most_recent_hour().replace(hour=0) ]: seen_by_day = aggregate_day(day) seen_by_day.save() logger.info(f"Saved {seen_by_day}") sleep_until_interval_is_complete(start_time, settings.UPLOAD_INTERVAL_IN_SECONDS) print()
def upload_latest_aggregations(): """Upload latest aggregations.""" box_settings = BoxSettings.objects.first() # build queries hour_query = SeenByHour.objects.filter(box_id=box_settings.box_id) day_query = SeenByDay.objects.filter(box_id=box_settings.box_id) latest_upload_time = box_settings.aggregations_uploaded_until if latest_upload_time is not None: hour_query = hour_query.filter(hour_start__gt=latest_upload_time) day_query = day_query.filter(day_start__gte=as_day( latest_upload_time.astimezone(get_timezone()))) # query seen_by_hour = hour_query.order_by( "hour_start").all()[:settings.UPLOAD_MAX_NUMBER_PER_REQUEST] seen_by_day = day_query.order_by( "day_start").all()[:settings.UPLOAD_MAX_NUMBER_PER_REQUEST] if len(seen_by_hour) == 0: logger.info("No aggregations found. Nothing to send.") return # find out until what aggregation time we won't upload again next time - current hour needs to stay out. current_hour_start = get_most_recent_hour() latest_aggregation_time = None for aggregation in [ sbh for sbh in seen_by_hour if sbh.hour_start < current_hour_start ]: latest_aggregation_time = aggregation.hour_start logger.info( f"I collected {len(seen_by_hour)} hour aggregation(s) and {len(seen_by_day)} day aggregation(s) to send," f" starting at {seen_by_hour.first().hour_start}.") if latest_aggregation_time is not None: logger.info( f" - next time I will not upload finished times up until {latest_aggregation_time}." ) payload = dict( seen_by_hour=serialize("json", seen_by_hour), seen_by_day=serialize("json", seen_by_day), ) response = requests.post( f"{box_settings.server_url}/api/postAggregations/{box_settings.box_id}/", data=payload, headers={"Authorization": box_settings.upload_token}, ) if response.status_code == 200: if latest_aggregation_time is not None: logger.info( f"Marking {latest_aggregation_time} as the last aggregation time we will not upload next time." ) box_settings.aggregations_uploaded_until = latest_aggregation_time box_settings.save() else: logger.error( f"Server responded with code {response.status_code} ({response.text})." )
def handle(self, *args, **options): # figure out start and end if options["start"] is None: options["start"] = get_most_recent_hour() - timedelta(days=7) else: options["start"] = iso8601.parse_date(options["start"]).astimezone( get_timezone()) if options["end"] is None: options["end"] = get_most_recent_hour() else: options["end"] = iso8601.parse_date(options["end"]).astimezone( get_timezone()) if options["start"] >= options["end"]: print("Start cannot be before end.") sys.exit(2) aileen_box = make_new_aileenbox_or_none(options) if "peak_time" not in options or options["peak_time"] not in ( "morning", "afternoon", ): print("peak_time needs to be either morning or afternoon.") sys.exit(2) logger.info( "create_static_data command was called. Start: %s, End: %s, Box: %s ..." % (options["start"], options["end"], options["box_id"])) hourly = [] daily = [] time_slots = pd.date_range(options["start"], options["end"], freq="1H") for dt in time_slots: x = makeup_number_observables(dt, options) hourly.append( SeenByHour( box_id=options["box_id"], hour_start=dt, seen=int(x), seen_also_in_preceding_hour=int( x / random.choice([1.5, 2.5, 4])), )) if dt.hour == 0 and len(hourly) >= 24: sum_by_hours = sum([sbh.seen for sbh in hourly[-24:]]) seen_this_day = int(sum_by_hours / 2.0) daily.append( SeenByDay( box_id=options["box_id"], day_start=dt - timedelta(hours=24), seen=int(seen_this_day), seen_also_on_preceding_day=int( seen_this_day / random.choice([1.5, 2.5, 4])), seen_also_a_week_earlier=int( seen_this_day / random.choice([1.5, 2.5, 4])), )) # Now save to the database with transaction.atomic(): if aileen_box is not None: aileen_box.save() for aggregation in hourly + daily: print("Saving %s ..." % aggregation) aggregation.save()