def range_tuple(self): """ :return: either a 2-tuple () if I have a ranges, or None o/w. ordered by min, max """ ranges_qs = self.ranges.all() if not ranges_qs.count(): return None ranges_list = list(ranges_qs) ranges0 = ranges_list[0] ranges1 = ranges_list[1] ranges0_val = PointPrediction.first_non_none_value( ranges0.value_i, ranges0.value_f, None, None, None) ranges1_val = PointPrediction.first_non_none_value( ranges1.value_i, ranges1.value_f, None, None, None) return min(ranges0_val, ranges1_val), max(ranges0_val, ranges1_val)
def _tz_loc_targ_pk_lwr_to_pred_val(forecast_model): """ Returns prediction data for all forecasts in forecast_model as a dict: [timezero_pk][unit_pk][target_pk][cat_value] -> predicted_value Only returns rows whose targets match numeric_targets(). """ targets = forecast_model.project.numeric_targets() bin_dist_qs = BinDistribution.objects \ .filter(forecast__forecast_model=forecast_model, target__in=targets) \ .order_by('forecast__time_zero__id', 'unit__id', 'target__id') \ .values_list('forecast__time_zero__id', 'unit__id', 'target__id', 'prob', 'cat_i', 'cat_f', 'cat_t', 'cat_d', 'cat_b') # only one of cat_* is non-None # build the dict: {timezero_pk: {unit_pk: {target_id: {lwr_1: predicted_value_1, ...}}}}: tzltpk_to_forec_st_to_pred_val = {} for time_zero_id, unit_target_val_grouper in groupby(bin_dist_qs, key=lambda _: _[0]): ltpk_to_forec_start_to_pred_val = {} # {unit_pk: {target_id: {lwr_1: predicted_value_1, ...}}} tzltpk_to_forec_st_to_pred_val[time_zero_id] = ltpk_to_forec_start_to_pred_val for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]): # {target_id: {lwr_1: predicted_value_1, ...}}: tpk_to_forec_start_to_pred_val = defaultdict(dict) ltpk_to_forec_start_to_pred_val[unit_id] = tpk_to_forec_start_to_pred_val for _, _, target_id, pred_value, cat_i, cat_f, cat_t, cat_d, cat_b in target_val_grouper: cat_value = PointPrediction.first_non_none_value(cat_i, cat_f, cat_t, cat_d, cat_b) tpk_to_forec_start_to_pred_val[target_id][cat_value] = pred_value return tzltpk_to_forec_st_to_pred_val
def csv_response_for_project_truth_data(project): """ Similar to json_response_for_forecast(), but returns a response with project's truth data formatted as CSV. NB: The returned response will contain only those rows that actually loaded from the original CSV file passed to Project.load_truth_data(), which will contain fewer rows if some were invalid. For that reason we change the filename to hopefully hint at what's going on. """ response = HttpResponse(content_type='text/csv') # two cases for deciding the filename to put in download response: # 1) original ends with .csv -> orig-name.csv -> orig-name-validated.csv # 2) "" does not end "" -> orig-name.csv.foo -> orig-name.csv.foo-validated.csv csv_filename_path = Path(project.truth_csv_filename) if csv_filename_path.suffix.lower() == '.csv': csv_filename = csv_filename_path.stem + '-validated' + csv_filename_path.suffix else: csv_filename = csv_filename_path.name + '-validated.csv' response['Content-Disposition'] = 'attachment; filename="{}"'.format( str(csv_filename)) writer = csv.writer(response) writer.writerow(TRUTH_CSV_HEADER) for timezero_date, unit_name, target_name, \ value_i, value_f, value_t, value_d, value_b in project.get_truth_data_rows(): timezero_date = timezero_date.strftime(YYYY_MM_DD_DATE_FORMAT) truth_value = PointPrediction.first_non_none_value( value_i, value_f, value_t, value_d, value_b) writer.writerow([timezero_date, unit_name, target_name, truth_value]) return response
def _tz_unit_targ_pks_to_truth_values(project): """ Similar to Project.unit_target_name_tz_date_to_truth(), returns project's truth values as a nested dict that's organized for easy access using these keys: [timezero_pk][unit_pk][target_id] -> truth_values (a list). """ truth_data_qs = project.truth_data_qs() \ .order_by('time_zero__id', 'unit__id', 'target__id') \ .values_list('time_zero__id', 'unit__id', 'target__id', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') tz_unit_targ_pks_to_truth_vals = { } # {timezero_pk: {unit_pk: {target_id: truth_value}}} for time_zero_id, unit_target_val_grouper in groupby(truth_data_qs, key=lambda _: _[0]): unit_targ_pks_to_truth = {} # {unit_pk: {target_id: truth_value}} tz_unit_targ_pks_to_truth_vals[time_zero_id] = unit_targ_pks_to_truth for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]): target_pk_to_truth = defaultdict(list) # {target_id: truth_value} unit_targ_pks_to_truth[unit_id] = target_pk_to_truth for _, _, target_id, value_i, value_f, value_t, value_d, value_b in target_val_grouper: value = PointPrediction.first_non_none_value( value_i, value_f, value_t, value_d, value_b) target_pk_to_truth[target_id].append(value) return tz_unit_targ_pks_to_truth_vals
def _model_id_to_unit_timezero_points(project, season_name, step_ahead_targets): """ Similar to Project.unit_target_name_tz_date_to_truth(), returns forecast_model's truth values as a nested dict that's organized for easy access using these keys: [forecast_model][unit][timezero_date] -> point_values (a list) Note that some project TimeZeros have no predictions. """ # get the rows, ordered so we can groupby() # note that some project timezeros might not be returned by _flusight_point_value_rows_for_models(): # query notes: # - ORDER BY ensures groupby() will work # - we don't need to select targets b/c forecast ids have 1:1 correspondence to TimeZeros # - "" b/c targets are needed only for ordering # - ORDER BY target__step_ahead_increment ensures values are sorted by target deterministically season_start_date, season_end_date = project.start_end_dates_for_season( season_name) forecast_point_predictions_qs = PointPrediction.objects \ .filter(forecast__forecast_model__project=project, target__in=step_ahead_targets, forecast__time_zero__timezero_date__gte=season_start_date, forecast__time_zero__timezero_date__lte=season_end_date) \ .order_by('forecast__forecast_model__id', 'unit__id', 'forecast__time_zero__timezero_date', 'target__step_ahead_increment') \ .values_list('forecast__forecast_model__id', 'unit__name', 'forecast__time_zero__timezero_date', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') # only one of value_* is non-None # build the dict model_to_unit_timezero_points = {} # return value. filled next for model_pk, loc_tz_val_grouper in groupby(forecast_point_predictions_qs, key=lambda _: _[0]): unit_to_timezero_points_dict = {} for unit, timezero_values_grouper in groupby(loc_tz_val_grouper, key=lambda _: _[1]): timezero_to_points_dict = {} for timezero_date, values_grouper in groupby( timezero_values_grouper, key=lambda _: _[2]): point_values = [ PointPrediction.first_non_none_value( _[3], _[4], _[5], _[6], _[7]) for _ in list(values_grouper) ] timezero_to_points_dict[timezero_date] = point_values unit_to_timezero_points_dict[unit] = timezero_to_points_dict forecast_model = ForecastModel.objects.get(pk=model_pk) model_to_unit_timezero_points[ forecast_model] = unit_to_timezero_points_dict # b/c _flusight_point_value_rows_for_models() does not return any rows for models that don't have data for # season_name and step_ahead_targets, we need to add empty model entries for callers for forecast_model in project.models.all(): if forecast_model not in model_to_unit_timezero_points: model_to_unit_timezero_points[forecast_model] = {} return model_to_unit_timezero_points
def unit_target_name_tz_date_to_truth(self, season_name=None): """ Returns my truth values as a dict that's organized for easy access, as in: unit_target_name_tz_date_to_truth[unit_name][target_name][timezero_date]. Only includes data from season_name, which is None if I have no seasons. """ from forecast_app.models import PointPrediction # avoid circular imports logger.debug( f"unit_target_name_tz_date_to_truth(): entered. project={self}, season_name={season_name}" ) loc_target_tz_date_to_truth = {} # NB: ordering by target__id is arbitrary. it could be target__name, but it doesn't matter as long it's grouped # at all for the second groupby() call below truth_data_qs = self.truth_data_qs() \ .order_by('unit__name', 'target__name') \ .values_list('unit__id', 'target__id', 'time_zero__timezero_date', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') if season_name: season_start_date, season_end_date = self.start_end_dates_for_season( season_name) truth_data_qs = truth_data_qs.filter( time_zero__timezero_date__gte=season_start_date, time_zero__timezero_date__lte=season_end_date) unit_pks_to_names = {unit.id: unit.name for unit in self.units.all()} target_pks_to_names = { target.id: target.name for target in self.targets.all() } for unit_id, loc_target_tz_grouper in groupby(truth_data_qs, key=lambda _: _[0]): if unit_id not in unit_pks_to_names: continue target_tz_date_to_truth = {} loc_target_tz_date_to_truth[ unit_pks_to_names[unit_id]] = target_tz_date_to_truth for target_id, target_tz_grouper in groupby(loc_target_tz_grouper, key=lambda _: _[1]): if target_id not in target_pks_to_names: continue tz_date_to_truth = defaultdict(list) target_tz_date_to_truth[ target_pks_to_names[target_id]] = tz_date_to_truth for _, _, tz_date, value_i, value_f, value_t, value_d, value_b in target_tz_grouper: value = PointPrediction.first_non_none_value( value_i, value_f, value_t, value_d, value_b) tz_date_to_truth[tz_date].append(value) logger.debug( f"unit_target_name_tz_date_to_truth(): done ({len(loc_target_tz_date_to_truth)}). " f"project={self}, season_name={season_name}") return loc_target_tz_date_to_truth
def get_truth_data_preview(self): """ :return: view helper function that returns a preview of my truth data in the form of a table that's represented as a nested list of rows. each row: [timezero_date, unit_name, target_name, truth_value] """ from forecast_app.models import PointPrediction # avoid circular imports rows = self.truth_data_qs().values_list('time_zero__timezero_date', 'unit__name', 'target__name', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b')[:10] return [[ timezero_date, unit_name, target_name, PointPrediction.first_non_none_value(value_i, value_f, value_t, value_d, value_b) ] for timezero_date, unit_name, target_name, value_i, value_f, value_t, value_d, value_b in rows]
def _calculate_error_score_values(score, forecast_model, is_absolute_error): """ Implements the 'error' and 'abs_error' scores. Creates ScoreValue instances for the passed args, saving them into the passed score. The score is simply `true_value - predicted_value` (optionally passed to abs() based on is_absolute_error) for each combination of Unit + Target in forecast_model's project. Runs in the calling thread and therefore blocks. Note that this implementation uses a naive approach to calculating scores, iterating over truth and forecast tables instead of caching. :param score: a Score :param forecast_model: a ForecastModel :param is_absolute_error: True if abs() should be called """ from forecast_app.scores.bin_utils import _insert_score_values # avoid circular imports from forecast_app.scores.definitions import _validate_score_targets_and_data try: targets = _validate_score_targets_and_data(forecast_model) except RuntimeError as rte: logger.warning(f"_calculate_error_score_values(): _validate_score_targets_and_data() failed. " f"rte={rte!r}, score={score}, forecast_model={forecast_model}") return # step 1/2: build tz_unit_targ_pk_to_pt_pred_value: [timezero_id][unit_id][target_id] -> point_value tz_unit_targ_pk_to_pt_pred_value = {} point_predictions_qs = PointPrediction.objects \ .filter(forecast__forecast_model=forecast_model, target__in=targets) \ .order_by('forecast__time_zero__id', 'unit__id', 'target__id') \ .values_list('forecast__time_zero__id', 'unit__id', 'target__id', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') # only one of value_* is non-None for timezero_id, unit_target_val_grouper in groupby(point_predictions_qs, key=lambda _: _[0]): tz_unit_targ_pk_to_pt_pred_value[timezero_id] = {} for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]): tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id] = {} for _, _, target_id, value_i, value_f, value_t, value_d, value_b in target_val_grouper: value = PointPrediction.first_non_none_value(value_i, value_f, None, value_d, None) tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id][target_id] = value # step 2/2: iterate over truths, calculating scores. it is convenient to iterate over truths to get all # timezero/unit/target combinations. this will omit forecasts with no truth, but that's OK b/c without truth, a # forecast makes no contribution to the score. note that we collect all ScoreValue rows and then bulk insert them as # an optimization, rather than create separate ORM instances score_values = [] # list of 5-tuples: (score.pk, forecast.pk, unit.pk, target.pk, score_value) timezero_id_to_forecast_id = {forecast.time_zero.pk: forecast.pk for forecast in forecast_model.forecasts.all()} truth_data_qs = forecast_model.project.truth_data_qs() \ .filter(target__in=targets) \ .values_list('time_zero__id', 'unit__id', 'target__id', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') # only one of value_* is non-None num_warnings = 0 for timezero_id, unit_id, target_id, value_i, value_f, value_t, value_d, value_b in truth_data_qs: truth_value = PointPrediction.first_non_none_value(value_i, value_f, value_t, value_d, value_b) if truth_value is None: num_warnings += 1 continue # skip this timezero's contribution to the score try: predicted_value = tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id][target_id] score_value = abs(truth_value - predicted_value) if is_absolute_error else truth_value - predicted_value score_values.append((score.pk, timezero_id_to_forecast_id[timezero_id], unit_id, target_id, score_value)) except KeyError as ke: # no predicted value for one of timezero_id, unit_id, target_id num_warnings += 1 continue # skip this timezero's contribution to the score # insert the ScoreValues! _insert_score_values(score_values) # print warning count logger.warning(f"_calculate_error_score_values(): done. score={score}, forecast_model={forecast_model}, " f"num_warnings={num_warnings}")
def _calculate_interval_score_values(score, forecast_model, alpha): """ Implements an interval score as inspired by "Strictly Proper Scoring Rules, Prediction, and Estimation" by Tilmann Gneiting & Adrian E Raftery. Only calculates ScoreValues for QuantileDistribution data in forecast_model. """ from forecast_app.scores.definitions import _validate_score_targets_and_data # avoid circular imports from forecast_app.scores.bin_utils import _insert_score_values try: targets = _validate_score_targets_and_data(forecast_model) except RuntimeError as rte: logger.warning(f"_calculate_interval_score_values(): _validate_score_targets_and_data() failed. " f"rte={rte!r}, score={score}, forecast_model={forecast_model}") return lower_interval_quantile = alpha / 2 upper_interval_quantile = 1 - (alpha / 2) # step 1/2: build dict tz_unit_targ_pk_to_l_u_vals: # [timezero_id][unit_id][target_id] -> (lower_interval_value, upper_interval_value): tz_unit_targ_pk_to_l_u_vals = {} quantile_predictions_qs = QuantileDistribution.objects \ .filter(Q(forecast__forecast_model=forecast_model), # AND Q(target__in=targets), # AND (Q(quantile=lower_interval_quantile) | Q(quantile=upper_interval_quantile))) \ .order_by('forecast__time_zero__id', 'unit__id', 'target__id', 'quantile') \ .values_list('forecast__time_zero__id', 'unit__id', 'target__id', 'quantile', 'value_i', 'value_f', 'value_d') # only one of value_* is non-None for timezero_id, unit_target_val_grouper in groupby(quantile_predictions_qs, key=lambda _: _[0]): tz_unit_targ_pk_to_l_u_vals[timezero_id] = {} for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]): tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id] = defaultdict(list) for _, _, target_id, quantile, value_i, value_f, value_d in target_val_grouper: value = PointPrediction.first_non_none_value(value_i, value_f, None, value_d, None) tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id][target_id].append(value) # step 2/2: iterate over truths, calculating scores. it is convenient to iterate over truths to get all # timezero/unit/target combinations. this will omit forecasts with no truth, but that's OK b/c without truth, a # forecast makes no contribution to the score. note that we collect all ScoreValue rows and then bulk insert them as # an optimization, rather than create separate ORM instances score_values = [] # list of 5-tuples: (score.pk, forecast.pk, unit.pk, target.pk, score_value) timezero_id_to_forecast_id = {forecast.time_zero.pk: forecast.pk for forecast in forecast_model.forecasts.all()} truth_data_qs = forecast_model.project.truth_data_qs() \ .filter(target__in=targets) \ .values_list('time_zero__id', 'unit__id', 'target__id', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') # only one of value_* is non-None num_warnings = 0 for timezero_id, unit_id, target_id, value_i, value_f, value_t, value_d, value_b in truth_data_qs: truth_value = PointPrediction.first_non_none_value(value_i, value_f, value_t, value_d, value_b) try: lower_upper_interval_values = tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id][target_id] if not lower_upper_interval_values: # defaultdict(list) -> [] result if match [timezero_id][unit_id] but not target_id num_warnings += 1 continue # skip this forecast's contribution to the score elif len(lower_upper_interval_values) == 1: # median quantile (alpha = 1.0) has same lower and upper lower_interval_value = upper_interval_value = lower_upper_interval_values[0] elif len(lower_upper_interval_values) == 2: # median quantile (alpha = 1.0) has same lower and upper lower_interval_value, upper_interval_value = lower_upper_interval_values else: # should never happen (?) given `_validate_quantile_predictions()` catches "quantile`s must be unique" raise RuntimeError(f">2 lower_upper_interval_values: {lower_upper_interval_values}. " f"timezero_id={timezero_id}, unit_id={unit_id}, target_id={target_id}") interval_width = upper_interval_value - lower_interval_value penalty_l = (2 / alpha) * max(lower_interval_value - truth_value, 0) penalty_u = (2 / alpha) * max(truth_value - upper_interval_value, 0) score_value = interval_width + penalty_l + penalty_u score_values.append((score.pk, timezero_id_to_forecast_id[timezero_id], unit_id, target_id, score_value)) except KeyError: # no lower/upper values for one of timezero_id, unit_id, target_id num_warnings += 1 continue # skip this forecast's contribution to the score # insert the ScoreValues! _insert_score_values(score_values) # print warning count logger.warning(f"_calculate_interval_score_values(): done. score={score}, forecast_model={forecast_model}, " f"num_warnings={num_warnings}")
def query_forecasts_for_project(project, query, max_num_rows=MAX_NUM_QUERY_ROWS): """ Top-level function for querying forecasts within project. Runs in the calling thread and therefore blocks. Returns a list of rows in a Zoltar-specific CSV row format. The columns are defined in FORECAST_CSV_HEADER. Note that the csv is 'sparse': not every row uses all columns, and unused ones are empty (''). However, the first four columns are always non-empty, i.e., every prediction has them. The 'class' of each row is named to be the same as Zoltar's utils.forecast.PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS variable. Column ordering is FORECAST_CSV_HEADER. `query` is documented at https://docs.zoltardata.com/, but briefly, it is a dict of up to six keys, five of which are lists of strings: - 'models': optional list of ForecastModel.abbreviation strings - 'units': "" Unit.name strings - 'targets': "" Target.name strings - 'timezeros': "" TimeZero.timezero_date strings in YYYY_MM_DD_DATE_FORMAT - 'types': optional list of type strings as defined in PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS.values() The sixth key allows searching based on `Forecast.issue_date`: - 'as_of': optional inclusive issue_date in YYYY_MM_DD_DATE_FORMAT to limit the search to. the default behavior if not passed is to use the newest forecast for each TimeZero. Note that _strings_ are passed to refer to object *contents*, not database IDs, which means validation will fail if the referred-to objects are not found. NB: If multiple objects are found with the same name then the program will arbitrarily choose one. :param project: a Project :param query: a dict specifying the query parameters. see https://docs.zoltardata.com/ for documentation, and above for a summary. NB: assumes it has passed validation via `validate_forecasts_query()` :param max_num_rows: the number of rows at which this function raises a RuntimeError :return: a list of CSV rows including the header """ from utils.forecast import PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS # avoid circular imports # validate query logger.debug( f"query_forecasts_for_project(): 1/4 validating query. query={query}, project={project}" ) error_messages, (model_ids, unit_ids, target_ids, timezero_ids, types) = validate_forecasts_query(project, query) # get which types to include is_include_bin = (not types) or ( PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[BinDistribution] in types) is_include_named = (not types) or ( PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[NamedDistribution] in types) is_include_point = (not types) or ( PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[PointPrediction] in types) is_include_sample = (not types) or ( PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[SampleDistribution] in types) is_include_quantile = (not types) or ( PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[QuantileDistribution] in types) # get Forecasts to be included, applying query's constraints forecast_ids = latest_forecast_ids_for_project(project, True, model_ids=model_ids, timezero_ids=timezero_ids, as_of=query.get( 'as_of', None)) # create queries for each prediction type, but don't execute them yet. first check # rows and limit if necessary. # note that not all will be executed, depending on the 'types' key # todo no unit_ids or target_ids -> do not pass '__in' if not unit_ids: unit_ids = project.units.all().values_list('id', flat=True) # "" Units "" if not target_ids: target_ids = project.targets.all().values_list( 'id', flat=True) # "" Targets "" bin_qs = BinDistribution.objects.filter(forecast__id__in=list(forecast_ids), unit__id__in=list(unit_ids), target__id__in=list(target_ids)) \ .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name', 'prob', 'cat_i', 'cat_f', 'cat_t', 'cat_d', 'cat_b') named_qs = NamedDistribution.objects.filter(forecast__id__in=list(forecast_ids), unit__id__in=list(unit_ids), target__id__in=list(target_ids)) \ .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name', 'family', 'param1', 'param2', 'param3') point_qs = PointPrediction.objects.filter(forecast__id__in=list(forecast_ids), unit__id__in=list(unit_ids), target__id__in=list(target_ids)) \ .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') sample_qs = SampleDistribution.objects.filter(forecast__id__in=list(forecast_ids), unit__id__in=list(unit_ids), target__id__in=list(target_ids)) \ .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name', 'sample_i', 'sample_f', 'sample_t', 'sample_d', 'sample_b') quantile_qs = QuantileDistribution.objects.filter(forecast__id__in=list(forecast_ids), unit__id__in=list(unit_ids), target__id__in=list(target_ids)) \ .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name', 'quantile', 'value_i', 'value_f', 'value_d') # count number of rows to query, and error if too many logger.debug( f"query_forecasts_for_project(): 2/4 getting counts. query={query}, project={project}" ) is_include_query_set_pred_types = [ (is_include_bin, bin_qs, 'bin'), (is_include_named, named_qs, 'named'), (is_include_point, point_qs, 'point'), (is_include_sample, sample_qs, 'sample'), (is_include_quantile, quantile_qs, 'quantile') ] pred_type_counts = [ ] # filled next. NB: we do not use a list comprehension b/c we want logging for each pred_type for idx, (is_include, query_set, pred_type) in enumerate(is_include_query_set_pred_types): if is_include: logger.debug( f"query_forecasts_for_project(): 2{string.ascii_letters[idx]}/4 getting counts: {pred_type!r}" ) pred_type_counts.append((pred_type, query_set.count())) num_rows = sum([_[1] for _ in pred_type_counts]) logger.debug( f"query_forecasts_for_project(): 3/4 preparing to query. pred_type_counts={pred_type_counts}. total " f"num_rows={num_rows}. query={query}, project={project}") if num_rows > max_num_rows: raise RuntimeError( f"number of rows exceeded maximum. num_rows={num_rows}, max_num_rows={max_num_rows}" ) # output rows for each Prediction subclass yield FORECAST_CSV_HEADER forecast_model_id_to_obj = { forecast_model.pk: forecast_model for forecast_model in project.models.all() } timezero_id_to_obj = { timezero.pk: timezero for timezero in project.timezeros.all() } timezero_to_season_name = project.timezero_to_season_name() # add BinDistributions if is_include_bin: logger.debug( f"query_forecasts_for_project(): 3a/4 getting BinDistributions") # class-specific columns all default to empty: value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', '' for forecast_model_id, timezero_id, unit_name, target_name, prob, cat_i, cat_f, cat_t, cat_d, cat_b in bin_qs: model_str, timezero_str, season, class_str = _model_tz_season_class_strs( forecast_model_id_to_obj[forecast_model_id], timezero_id_to_obj[timezero_id], timezero_to_season_name, BinDistribution) cat = PointPrediction.first_non_none_value(cat_i, cat_f, cat_t, cat_d, cat_b) cat = cat.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance( cat, datetime.date) else cat yield [ model_str, timezero_str, season, unit_name, target_name, class_str, value, cat, prob, sample, quantile, family, param1, param2, param3 ] # add NamedDistributions if is_include_named: logger.debug( f"query_forecasts_for_project(): 3b/4 getting NamedDistributions") # class-specific columns all default to empty: value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', '' for forecast_model_id, timezero_id, unit_name, target_name, family, param1, param2, param3 in named_qs: model_str, timezero_str, season, class_str = _model_tz_season_class_strs( forecast_model_id_to_obj[forecast_model_id], timezero_id_to_obj[timezero_id], timezero_to_season_name, NamedDistribution) family = NamedDistribution.FAMILY_CHOICE_TO_ABBREVIATION[family] yield [ model_str, timezero_str, season, unit_name, target_name, class_str, value, cat, prob, sample, quantile, family, param1, param2, param3 ] # add PointPredictions if is_include_point: logger.debug( f"query_forecasts_for_project(): 3c/4 getting PointPredictions") # class-specific columns all default to empty: value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', '' for forecast_model_id, timezero_id, unit_name, target_name, value_i, value_f, value_t, value_d, value_b \ in point_qs: model_str, timezero_str, season, class_str = _model_tz_season_class_strs( forecast_model_id_to_obj[forecast_model_id], timezero_id_to_obj[timezero_id], timezero_to_season_name, PointPrediction) value = PointPrediction.first_non_none_value( value_i, value_f, value_t, value_d, value_b) value = value.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance( value, datetime.date) else value yield [ model_str, timezero_str, season, unit_name, target_name, class_str, value, cat, prob, sample, quantile, family, param1, param2, param3 ] # add SampleDistribution if is_include_sample: logger.debug( f"query_forecasts_for_project(): 3d/4 getting SampleDistributions") # class-specific columns all default to empty: value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', '' for forecast_model_id, timezero_id, unit_name, target_name, \ sample_i, sample_f, sample_t, sample_d, sample_b in sample_qs: model_str, timezero_str, season, class_str = _model_tz_season_class_strs( forecast_model_id_to_obj[forecast_model_id], timezero_id_to_obj[timezero_id], timezero_to_season_name, SampleDistribution) sample = PointPrediction.first_non_none_value( sample_i, sample_f, sample_t, sample_d, sample_b) sample = sample.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance( sample, datetime.date) else sample yield [ model_str, timezero_str, season, unit_name, target_name, class_str, value, cat, prob, sample, quantile, family, param1, param2, param3 ] # add QuantileDistribution if is_include_quantile: logger.debug( f"query_forecasts_for_project(): 3e/4 getting QuantileDistributions" ) # class-specific columns all default to empty: value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', '' for forecast_model_id, timezero_id, unit_name, target_name, quantile, value_i, value_f, value_d in quantile_qs: model_str, timezero_str, season, class_str = _model_tz_season_class_strs( forecast_model_id_to_obj[forecast_model_id], timezero_id_to_obj[timezero_id], timezero_to_season_name, QuantileDistribution) value = PointPrediction.first_non_none_value( value_i, value_f, None, value_d, None) value = value.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance( value, datetime.date) else value yield [ model_str, timezero_str, season, unit_name, target_name, class_str, value, cat, prob, sample, quantile, family, param1, param2, param3 ] # NB: we do not sort b/c it's expensive logger.debug( f"query_forecasts_for_project(): 4/4 done. num_rows={num_rows}, query={query}, project={project}" )