Beispiel #1
0
    def range_tuple(self):
        """
        :return: either a 2-tuple () if I have a ranges, or None o/w. ordered by min, max
        """
        ranges_qs = self.ranges.all()
        if not ranges_qs.count():
            return None

        ranges_list = list(ranges_qs)
        ranges0 = ranges_list[0]
        ranges1 = ranges_list[1]
        ranges0_val = PointPrediction.first_non_none_value(
            ranges0.value_i, ranges0.value_f, None, None, None)
        ranges1_val = PointPrediction.first_non_none_value(
            ranges1.value_i, ranges1.value_f, None, None, None)
        return min(ranges0_val, ranges1_val), max(ranges0_val, ranges1_val)
def _tz_loc_targ_pk_lwr_to_pred_val(forecast_model):
    """
    Returns prediction data for all forecasts in forecast_model as a dict:

        [timezero_pk][unit_pk][target_pk][cat_value] -> predicted_value

    Only returns rows whose targets match numeric_targets().
    """
    targets = forecast_model.project.numeric_targets()
    bin_dist_qs = BinDistribution.objects \
        .filter(forecast__forecast_model=forecast_model,
                target__in=targets) \
        .order_by('forecast__time_zero__id', 'unit__id', 'target__id') \
        .values_list('forecast__time_zero__id', 'unit__id', 'target__id', 'prob',
                     'cat_i', 'cat_f', 'cat_t', 'cat_d', 'cat_b')  # only one of cat_* is non-None

    # build the dict: {timezero_pk: {unit_pk: {target_id: {lwr_1: predicted_value_1, ...}}}}:
    tzltpk_to_forec_st_to_pred_val = {}
    for time_zero_id, unit_target_val_grouper in groupby(bin_dist_qs, key=lambda _: _[0]):
        ltpk_to_forec_start_to_pred_val = {}  # {unit_pk: {target_id: {lwr_1: predicted_value_1, ...}}}
        tzltpk_to_forec_st_to_pred_val[time_zero_id] = ltpk_to_forec_start_to_pred_val
        for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]):
            # {target_id: {lwr_1: predicted_value_1, ...}}:
            tpk_to_forec_start_to_pred_val = defaultdict(dict)
            ltpk_to_forec_start_to_pred_val[unit_id] = tpk_to_forec_start_to_pred_val
            for _, _, target_id, pred_value, cat_i, cat_f, cat_t, cat_d, cat_b in target_val_grouper:
                cat_value = PointPrediction.first_non_none_value(cat_i, cat_f, cat_t, cat_d, cat_b)
                tpk_to_forec_start_to_pred_val[target_id][cat_value] = pred_value

    return tzltpk_to_forec_st_to_pred_val
Beispiel #3
0
def csv_response_for_project_truth_data(project):
    """
    Similar to json_response_for_forecast(), but returns a response with project's truth data formatted as
    CSV. NB: The returned response will contain only those rows that actually loaded from the original CSV file passed
    to Project.load_truth_data(), which will contain fewer rows if some were invalid. For that reason we change the
    filename to hopefully hint at what's going on.
    """
    response = HttpResponse(content_type='text/csv')

    # two cases for deciding the filename to put in download response:
    # 1) original ends with .csv -> orig-name.csv -> orig-name-validated.csv
    # 2) "" does not end "" -> orig-name.csv.foo -> orig-name.csv.foo-validated.csv
    csv_filename_path = Path(project.truth_csv_filename)
    if csv_filename_path.suffix.lower() == '.csv':
        csv_filename = csv_filename_path.stem + '-validated' + csv_filename_path.suffix
    else:
        csv_filename = csv_filename_path.name + '-validated.csv'
    response['Content-Disposition'] = 'attachment; filename="{}"'.format(
        str(csv_filename))

    writer = csv.writer(response)
    writer.writerow(TRUTH_CSV_HEADER)
    for timezero_date, unit_name, target_name, \
        value_i, value_f, value_t, value_d, value_b in project.get_truth_data_rows():
        timezero_date = timezero_date.strftime(YYYY_MM_DD_DATE_FORMAT)
        truth_value = PointPrediction.first_non_none_value(
            value_i, value_f, value_t, value_d, value_b)
        writer.writerow([timezero_date, unit_name, target_name, truth_value])

    return response
Beispiel #4
0
def _tz_unit_targ_pks_to_truth_values(project):
    """
    Similar to Project.unit_target_name_tz_date_to_truth(), returns project's truth values as a nested dict
    that's organized for easy access using these keys: [timezero_pk][unit_pk][target_id] -> truth_values (a list).
    """
    truth_data_qs = project.truth_data_qs() \
        .order_by('time_zero__id', 'unit__id', 'target__id') \
        .values_list('time_zero__id', 'unit__id', 'target__id',
                     'value_i', 'value_f', 'value_t', 'value_d', 'value_b')

    tz_unit_targ_pks_to_truth_vals = {
    }  # {timezero_pk: {unit_pk: {target_id: truth_value}}}
    for time_zero_id, unit_target_val_grouper in groupby(truth_data_qs,
                                                         key=lambda _: _[0]):
        unit_targ_pks_to_truth = {}  # {unit_pk: {target_id: truth_value}}
        tz_unit_targ_pks_to_truth_vals[time_zero_id] = unit_targ_pks_to_truth
        for unit_id, target_val_grouper in groupby(unit_target_val_grouper,
                                                   key=lambda _: _[1]):
            target_pk_to_truth = defaultdict(list)  # {target_id: truth_value}
            unit_targ_pks_to_truth[unit_id] = target_pk_to_truth
            for _, _, target_id, value_i, value_f, value_t, value_d, value_b in target_val_grouper:
                value = PointPrediction.first_non_none_value(
                    value_i, value_f, value_t, value_d, value_b)
                target_pk_to_truth[target_id].append(value)

    return tz_unit_targ_pks_to_truth_vals
Beispiel #5
0
def _model_id_to_unit_timezero_points(project, season_name,
                                      step_ahead_targets):
    """
    Similar to Project.unit_target_name_tz_date_to_truth(), returns forecast_model's truth values as a nested dict
    that's organized for easy access using these keys:

        [forecast_model][unit][timezero_date] -> point_values (a list)

    Note that some project TimeZeros have no predictions.
    """
    # get the rows, ordered so we can groupby()
    # note that some project timezeros might not be returned by _flusight_point_value_rows_for_models():
    # query notes:
    # - ORDER BY ensures groupby() will work
    # - we don't need to select targets b/c forecast ids have 1:1 correspondence to TimeZeros
    # - "" b/c targets are needed only for ordering
    # - ORDER BY target__step_ahead_increment ensures values are sorted by target deterministically
    season_start_date, season_end_date = project.start_end_dates_for_season(
        season_name)
    forecast_point_predictions_qs = PointPrediction.objects \
        .filter(forecast__forecast_model__project=project,
                target__in=step_ahead_targets,
                forecast__time_zero__timezero_date__gte=season_start_date,
                forecast__time_zero__timezero_date__lte=season_end_date) \
        .order_by('forecast__forecast_model__id', 'unit__id', 'forecast__time_zero__timezero_date',
                  'target__step_ahead_increment') \
        .values_list('forecast__forecast_model__id', 'unit__name', 'forecast__time_zero__timezero_date',
                     'value_i', 'value_f', 'value_t', 'value_d', 'value_b')  # only one of value_* is non-None

    # build the dict
    model_to_unit_timezero_points = {}  # return value. filled next
    for model_pk, loc_tz_val_grouper in groupby(forecast_point_predictions_qs,
                                                key=lambda _: _[0]):
        unit_to_timezero_points_dict = {}
        for unit, timezero_values_grouper in groupby(loc_tz_val_grouper,
                                                     key=lambda _: _[1]):
            timezero_to_points_dict = {}
            for timezero_date, values_grouper in groupby(
                    timezero_values_grouper, key=lambda _: _[2]):
                point_values = [
                    PointPrediction.first_non_none_value(
                        _[3], _[4], _[5], _[6], _[7])
                    for _ in list(values_grouper)
                ]
                timezero_to_points_dict[timezero_date] = point_values
            unit_to_timezero_points_dict[unit] = timezero_to_points_dict
        forecast_model = ForecastModel.objects.get(pk=model_pk)
        model_to_unit_timezero_points[
            forecast_model] = unit_to_timezero_points_dict

    # b/c _flusight_point_value_rows_for_models() does not return any rows for models that don't have data for
    # season_name and step_ahead_targets, we need to add empty model entries for callers
    for forecast_model in project.models.all():
        if forecast_model not in model_to_unit_timezero_points:
            model_to_unit_timezero_points[forecast_model] = {}

    return model_to_unit_timezero_points
    def unit_target_name_tz_date_to_truth(self, season_name=None):
        """
        Returns my truth values as a dict that's organized for easy access, as in:
        unit_target_name_tz_date_to_truth[unit_name][target_name][timezero_date]. Only includes data from
        season_name, which is None if I have no seasons.
        """
        from forecast_app.models import PointPrediction  # avoid circular imports

        logger.debug(
            f"unit_target_name_tz_date_to_truth(): entered. project={self}, season_name={season_name}"
        )
        loc_target_tz_date_to_truth = {}
        # NB: ordering by target__id is arbitrary. it could be target__name, but it doesn't matter as long it's grouped
        # at all for the second groupby() call below
        truth_data_qs = self.truth_data_qs() \
            .order_by('unit__name', 'target__name') \
            .values_list('unit__id', 'target__id', 'time_zero__timezero_date',
                         'value_i', 'value_f', 'value_t', 'value_d', 'value_b')
        if season_name:
            season_start_date, season_end_date = self.start_end_dates_for_season(
                season_name)
            truth_data_qs = truth_data_qs.filter(
                time_zero__timezero_date__gte=season_start_date,
                time_zero__timezero_date__lte=season_end_date)

        unit_pks_to_names = {unit.id: unit.name for unit in self.units.all()}
        target_pks_to_names = {
            target.id: target.name
            for target in self.targets.all()
        }
        for unit_id, loc_target_tz_grouper in groupby(truth_data_qs,
                                                      key=lambda _: _[0]):
            if unit_id not in unit_pks_to_names:
                continue

            target_tz_date_to_truth = {}
            loc_target_tz_date_to_truth[
                unit_pks_to_names[unit_id]] = target_tz_date_to_truth
            for target_id, target_tz_grouper in groupby(loc_target_tz_grouper,
                                                        key=lambda _: _[1]):
                if target_id not in target_pks_to_names:
                    continue

                tz_date_to_truth = defaultdict(list)
                target_tz_date_to_truth[
                    target_pks_to_names[target_id]] = tz_date_to_truth
                for _, _, tz_date, value_i, value_f, value_t, value_d, value_b in target_tz_grouper:
                    value = PointPrediction.first_non_none_value(
                        value_i, value_f, value_t, value_d, value_b)
                    tz_date_to_truth[tz_date].append(value)
        logger.debug(
            f"unit_target_name_tz_date_to_truth(): done ({len(loc_target_tz_date_to_truth)}). "
            f"project={self}, season_name={season_name}")
        return loc_target_tz_date_to_truth
    def get_truth_data_preview(self):
        """
        :return: view helper function that returns a preview of my truth data in the form of a table that's represented
            as a nested list of rows. each row: [timezero_date, unit_name, target_name, truth_value]
        """
        from forecast_app.models import PointPrediction  # avoid circular imports

        rows = self.truth_data_qs().values_list('time_zero__timezero_date',
                                                'unit__name', 'target__name',
                                                'value_i', 'value_f',
                                                'value_t', 'value_d',
                                                'value_b')[:10]
        return [[
            timezero_date, unit_name, target_name,
            PointPrediction.first_non_none_value(value_i, value_f, value_t,
                                                 value_d, value_b)
        ] for timezero_date, unit_name, target_name, value_i, value_f, value_t,
                value_d, value_b in rows]
Beispiel #8
0
def _calculate_error_score_values(score, forecast_model, is_absolute_error):
    """
    Implements the 'error' and 'abs_error' scores. Creates ScoreValue instances for the passed args, saving them into
    the passed score. The score is simply `true_value - predicted_value` (optionally passed to abs() based on
    is_absolute_error) for each combination of Unit + Target in forecast_model's project. Runs in the calling thread
    and therefore blocks. Note that this implementation uses a naive approach to calculating scores, iterating over
    truth and forecast tables instead of caching.

    :param score: a Score
    :param forecast_model: a ForecastModel
    :param is_absolute_error: True if abs() should be called
    """
    from forecast_app.scores.bin_utils import _insert_score_values  # avoid circular imports
    from forecast_app.scores.definitions import _validate_score_targets_and_data


    try:
        targets = _validate_score_targets_and_data(forecast_model)
    except RuntimeError as rte:
        logger.warning(f"_calculate_error_score_values(): _validate_score_targets_and_data() failed. "
                       f"rte={rte!r}, score={score}, forecast_model={forecast_model}")
        return

    # step 1/2: build tz_unit_targ_pk_to_pt_pred_value: [timezero_id][unit_id][target_id] -> point_value
    tz_unit_targ_pk_to_pt_pred_value = {}
    point_predictions_qs = PointPrediction.objects \
        .filter(forecast__forecast_model=forecast_model, target__in=targets) \
        .order_by('forecast__time_zero__id', 'unit__id', 'target__id') \
        .values_list('forecast__time_zero__id', 'unit__id', 'target__id',
                     'value_i', 'value_f', 'value_t', 'value_d', 'value_b')  # only one of value_* is non-None
    for timezero_id, unit_target_val_grouper in groupby(point_predictions_qs, key=lambda _: _[0]):
        tz_unit_targ_pk_to_pt_pred_value[timezero_id] = {}
        for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]):
            tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id] = {}
            for _, _, target_id, value_i, value_f, value_t, value_d, value_b in target_val_grouper:
                value = PointPrediction.first_non_none_value(value_i, value_f, None, value_d, None)
                tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id][target_id] = value

    # step 2/2: iterate over truths, calculating scores. it is convenient to iterate over truths to get all
    # timezero/unit/target combinations. this will omit forecasts with no truth, but that's OK b/c without truth, a
    # forecast makes no contribution to the score. note that we collect all ScoreValue rows and then bulk insert them as
    # an optimization, rather than create separate ORM instances
    score_values = []  # list of 5-tuples: (score.pk, forecast.pk, unit.pk, target.pk, score_value)
    timezero_id_to_forecast_id = {forecast.time_zero.pk: forecast.pk for forecast in forecast_model.forecasts.all()}
    truth_data_qs = forecast_model.project.truth_data_qs() \
        .filter(target__in=targets) \
        .values_list('time_zero__id', 'unit__id', 'target__id',
                     'value_i', 'value_f', 'value_t', 'value_d', 'value_b')  # only one of value_* is non-None
    num_warnings = 0
    for timezero_id, unit_id, target_id, value_i, value_f, value_t, value_d, value_b in truth_data_qs:
        truth_value = PointPrediction.first_non_none_value(value_i, value_f, value_t, value_d, value_b)
        if truth_value is None:
            num_warnings += 1
            continue  # skip this timezero's contribution to the score
        try:
            predicted_value = tz_unit_targ_pk_to_pt_pred_value[timezero_id][unit_id][target_id]
            score_value = abs(truth_value - predicted_value) if is_absolute_error else truth_value - predicted_value
            score_values.append((score.pk, timezero_id_to_forecast_id[timezero_id], unit_id, target_id, score_value))
        except KeyError as ke:  # no predicted value for one of timezero_id, unit_id, target_id
            num_warnings += 1
            continue  # skip this timezero's contribution to the score

    # insert the ScoreValues!
    _insert_score_values(score_values)

    # print warning count
    logger.warning(f"_calculate_error_score_values(): done. score={score}, forecast_model={forecast_model}, "
                   f"num_warnings={num_warnings}")
def _calculate_interval_score_values(score, forecast_model, alpha):
    """
    Implements an interval score as inspired by "Strictly Proper Scoring Rules, Prediction, and Estimation" by
    Tilmann Gneiting & Adrian E Raftery. Only calculates ScoreValues for QuantileDistribution data in forecast_model.
    """
    from forecast_app.scores.definitions import _validate_score_targets_and_data  # avoid circular imports
    from forecast_app.scores.bin_utils import _insert_score_values


    try:
        targets = _validate_score_targets_and_data(forecast_model)
    except RuntimeError as rte:
        logger.warning(f"_calculate_interval_score_values(): _validate_score_targets_and_data() failed. "
                       f"rte={rte!r}, score={score}, forecast_model={forecast_model}")
        return

    lower_interval_quantile = alpha / 2
    upper_interval_quantile = 1 - (alpha / 2)

    # step 1/2: build dict tz_unit_targ_pk_to_l_u_vals:
    #   [timezero_id][unit_id][target_id] -> (lower_interval_value, upper_interval_value):
    tz_unit_targ_pk_to_l_u_vals = {}
    quantile_predictions_qs = QuantileDistribution.objects \
        .filter(Q(forecast__forecast_model=forecast_model),  # AND
                Q(target__in=targets),  # AND
                (Q(quantile=lower_interval_quantile) | Q(quantile=upper_interval_quantile))) \
        .order_by('forecast__time_zero__id', 'unit__id', 'target__id', 'quantile') \
        .values_list('forecast__time_zero__id', 'unit__id', 'target__id', 'quantile',
                     'value_i', 'value_f', 'value_d')  # only one of value_* is non-None
    for timezero_id, unit_target_val_grouper in groupby(quantile_predictions_qs, key=lambda _: _[0]):
        tz_unit_targ_pk_to_l_u_vals[timezero_id] = {}
        for unit_id, target_val_grouper in groupby(unit_target_val_grouper, key=lambda _: _[1]):
            tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id] = defaultdict(list)
            for _, _, target_id, quantile, value_i, value_f, value_d in target_val_grouper:
                value = PointPrediction.first_non_none_value(value_i, value_f, None, value_d, None)
                tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id][target_id].append(value)

    # step 2/2: iterate over truths, calculating scores. it is convenient to iterate over truths to get all
    # timezero/unit/target combinations. this will omit forecasts with no truth, but that's OK b/c without truth, a
    # forecast makes no contribution to the score. note that we collect all ScoreValue rows and then bulk insert them as
    # an optimization, rather than create separate ORM instances
    score_values = []  # list of 5-tuples: (score.pk, forecast.pk, unit.pk, target.pk, score_value)
    timezero_id_to_forecast_id = {forecast.time_zero.pk: forecast.pk for forecast in forecast_model.forecasts.all()}
    truth_data_qs = forecast_model.project.truth_data_qs() \
        .filter(target__in=targets) \
        .values_list('time_zero__id', 'unit__id', 'target__id', 'value_i', 'value_f', 'value_t', 'value_d',
                     'value_b')  # only one of value_* is non-None
    num_warnings = 0
    for timezero_id, unit_id, target_id, value_i, value_f, value_t, value_d, value_b in truth_data_qs:
        truth_value = PointPrediction.first_non_none_value(value_i, value_f, value_t, value_d, value_b)
        try:
            lower_upper_interval_values = tz_unit_targ_pk_to_l_u_vals[timezero_id][unit_id][target_id]
            if not lower_upper_interval_values:
                # defaultdict(list) -> [] result if match [timezero_id][unit_id] but not target_id
                num_warnings += 1
                continue  # skip this forecast's contribution to the score
            elif len(lower_upper_interval_values) == 1:  # median quantile (alpha = 1.0) has same lower and upper
                lower_interval_value = upper_interval_value = lower_upper_interval_values[0]
            elif len(lower_upper_interval_values) == 2:  # median quantile (alpha = 1.0) has same lower and upper
                lower_interval_value, upper_interval_value = lower_upper_interval_values
            else:
                # should never happen (?) given `_validate_quantile_predictions()` catches "quantile`s must be unique"
                raise RuntimeError(f">2 lower_upper_interval_values: {lower_upper_interval_values}. "
                                   f"timezero_id={timezero_id}, unit_id={unit_id}, target_id={target_id}")

            interval_width = upper_interval_value - lower_interval_value
            penalty_l = (2 / alpha) * max(lower_interval_value - truth_value, 0)
            penalty_u = (2 / alpha) * max(truth_value - upper_interval_value, 0)
            score_value = interval_width + penalty_l + penalty_u
            score_values.append((score.pk, timezero_id_to_forecast_id[timezero_id], unit_id, target_id, score_value))
        except KeyError:  # no lower/upper values for one of timezero_id, unit_id, target_id
            num_warnings += 1
            continue  # skip this forecast's contribution to the score

    # insert the ScoreValues!
    _insert_score_values(score_values)

    # print warning count
    logger.warning(f"_calculate_interval_score_values(): done. score={score}, forecast_model={forecast_model}, "
                   f"num_warnings={num_warnings}")
Beispiel #10
0
def query_forecasts_for_project(project,
                                query,
                                max_num_rows=MAX_NUM_QUERY_ROWS):
    """
    Top-level function for querying forecasts within project. Runs in the calling thread and therefore blocks.

    Returns a list of rows in a Zoltar-specific CSV row format. The columns are defined in FORECAST_CSV_HEADER. Note
    that the csv is 'sparse': not every row uses all columns, and unused ones are empty (''). However, the first four
    columns are always non-empty, i.e., every prediction has them.

    The 'class' of each row is named to be the same as Zoltar's utils.forecast.PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS
    variable. Column ordering is FORECAST_CSV_HEADER.

    `query` is documented at https://docs.zoltardata.com/, but briefly, it is a dict of up to six keys, five of which
    are lists of strings:

    - 'models': optional list of ForecastModel.abbreviation strings
    - 'units': "" Unit.name strings
    - 'targets': "" Target.name strings
    - 'timezeros': "" TimeZero.timezero_date strings in YYYY_MM_DD_DATE_FORMAT
    - 'types': optional list of type strings as defined in PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS.values()

    The sixth key allows searching based on `Forecast.issue_date`:
    - 'as_of': optional inclusive issue_date in YYYY_MM_DD_DATE_FORMAT to limit the search to. the default behavior if
               not passed is to use the newest forecast for each TimeZero.

    Note that _strings_ are passed to refer to object *contents*, not database IDs, which means validation will fail if
    the referred-to objects are not found. NB: If multiple objects are found with the same name then the program will
    arbitrarily choose one.

    :param project: a Project
    :param query: a dict specifying the query parameters. see https://docs.zoltardata.com/ for documentation, and above
        for a summary. NB: assumes it has passed validation via `validate_forecasts_query()`
    :param max_num_rows: the number of rows at which this function raises a RuntimeError
    :return: a list of CSV rows including the header
    """
    from utils.forecast import PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS  # avoid circular imports

    # validate query
    logger.debug(
        f"query_forecasts_for_project(): 1/4 validating query. query={query}, project={project}"
    )
    error_messages, (model_ids, unit_ids, target_ids, timezero_ids,
                     types) = validate_forecasts_query(project, query)

    # get which types to include
    is_include_bin = (not types) or (
        PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[BinDistribution] in types)
    is_include_named = (not types) or (
        PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[NamedDistribution] in types)
    is_include_point = (not types) or (
        PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[PointPrediction] in types)
    is_include_sample = (not types) or (
        PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[SampleDistribution] in types)
    is_include_quantile = (not types) or (
        PREDICTION_CLASS_TO_JSON_IO_DICT_CLASS[QuantileDistribution] in types)

    # get Forecasts to be included, applying query's constraints
    forecast_ids = latest_forecast_ids_for_project(project,
                                                   True,
                                                   model_ids=model_ids,
                                                   timezero_ids=timezero_ids,
                                                   as_of=query.get(
                                                       'as_of', None))

    # create queries for each prediction type, but don't execute them yet. first check # rows and limit if necessary.
    # note that not all will be executed, depending on the 'types' key

    # todo no unit_ids or target_ids -> do not pass '__in'
    if not unit_ids:
        unit_ids = project.units.all().values_list('id',
                                                   flat=True)  # "" Units ""
    if not target_ids:
        target_ids = project.targets.all().values_list(
            'id', flat=True)  # "" Targets ""

    bin_qs = BinDistribution.objects.filter(forecast__id__in=list(forecast_ids),
                                            unit__id__in=list(unit_ids),
                                            target__id__in=list(target_ids)) \
        .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name',
                     'prob', 'cat_i', 'cat_f', 'cat_t', 'cat_d', 'cat_b')
    named_qs = NamedDistribution.objects.filter(forecast__id__in=list(forecast_ids),
                                                unit__id__in=list(unit_ids),
                                                target__id__in=list(target_ids)) \
        .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name',
                     'family', 'param1', 'param2', 'param3')
    point_qs = PointPrediction.objects.filter(forecast__id__in=list(forecast_ids),
                                              unit__id__in=list(unit_ids),
                                              target__id__in=list(target_ids)) \
        .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name',
                     'value_i', 'value_f', 'value_t', 'value_d', 'value_b')
    sample_qs = SampleDistribution.objects.filter(forecast__id__in=list(forecast_ids),
                                                  unit__id__in=list(unit_ids),
                                                  target__id__in=list(target_ids)) \
        .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name',
                     'sample_i', 'sample_f', 'sample_t', 'sample_d', 'sample_b')
    quantile_qs = QuantileDistribution.objects.filter(forecast__id__in=list(forecast_ids),
                                                      unit__id__in=list(unit_ids),
                                                      target__id__in=list(target_ids)) \
        .values_list('forecast__forecast_model__id', 'forecast__time_zero__id', 'unit__name', 'target__name',
                     'quantile', 'value_i', 'value_f', 'value_d')

    # count number of rows to query, and error if too many
    logger.debug(
        f"query_forecasts_for_project(): 2/4 getting counts. query={query}, project={project}"
    )
    is_include_query_set_pred_types = [
        (is_include_bin, bin_qs, 'bin'), (is_include_named, named_qs, 'named'),
        (is_include_point, point_qs, 'point'),
        (is_include_sample, sample_qs, 'sample'),
        (is_include_quantile, quantile_qs, 'quantile')
    ]

    pred_type_counts = [
    ]  # filled next. NB: we do not use a list comprehension b/c we want logging for each pred_type
    for idx, (is_include, query_set,
              pred_type) in enumerate(is_include_query_set_pred_types):
        if is_include:
            logger.debug(
                f"query_forecasts_for_project(): 2{string.ascii_letters[idx]}/4 getting counts: {pred_type!r}"
            )
            pred_type_counts.append((pred_type, query_set.count()))

    num_rows = sum([_[1] for _ in pred_type_counts])
    logger.debug(
        f"query_forecasts_for_project(): 3/4 preparing to query. pred_type_counts={pred_type_counts}. total "
        f"num_rows={num_rows}. query={query}, project={project}")
    if num_rows > max_num_rows:
        raise RuntimeError(
            f"number of rows exceeded maximum. num_rows={num_rows}, max_num_rows={max_num_rows}"
        )

    # output rows for each Prediction subclass
    yield FORECAST_CSV_HEADER

    forecast_model_id_to_obj = {
        forecast_model.pk: forecast_model
        for forecast_model in project.models.all()
    }
    timezero_id_to_obj = {
        timezero.pk: timezero
        for timezero in project.timezeros.all()
    }
    timezero_to_season_name = project.timezero_to_season_name()

    # add BinDistributions
    if is_include_bin:
        logger.debug(
            f"query_forecasts_for_project(): 3a/4 getting BinDistributions")
        # class-specific columns all default to empty:
        value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', ''
        for forecast_model_id, timezero_id, unit_name, target_name, prob, cat_i, cat_f, cat_t, cat_d, cat_b in bin_qs:
            model_str, timezero_str, season, class_str = _model_tz_season_class_strs(
                forecast_model_id_to_obj[forecast_model_id],
                timezero_id_to_obj[timezero_id], timezero_to_season_name,
                BinDistribution)
            cat = PointPrediction.first_non_none_value(cat_i, cat_f, cat_t,
                                                       cat_d, cat_b)
            cat = cat.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance(
                cat, datetime.date) else cat
            yield [
                model_str, timezero_str, season, unit_name, target_name,
                class_str, value, cat, prob, sample, quantile, family, param1,
                param2, param3
            ]

    # add NamedDistributions
    if is_include_named:
        logger.debug(
            f"query_forecasts_for_project(): 3b/4 getting NamedDistributions")
        # class-specific columns all default to empty:
        value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', ''
        for forecast_model_id, timezero_id, unit_name, target_name, family, param1, param2, param3 in named_qs:
            model_str, timezero_str, season, class_str = _model_tz_season_class_strs(
                forecast_model_id_to_obj[forecast_model_id],
                timezero_id_to_obj[timezero_id], timezero_to_season_name,
                NamedDistribution)
            family = NamedDistribution.FAMILY_CHOICE_TO_ABBREVIATION[family]
            yield [
                model_str, timezero_str, season, unit_name, target_name,
                class_str, value, cat, prob, sample, quantile, family, param1,
                param2, param3
            ]

    # add PointPredictions
    if is_include_point:
        logger.debug(
            f"query_forecasts_for_project(): 3c/4 getting PointPredictions")
        # class-specific columns all default to empty:
        value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', ''
        for forecast_model_id, timezero_id, unit_name, target_name, value_i, value_f, value_t, value_d, value_b \
                in point_qs:
            model_str, timezero_str, season, class_str = _model_tz_season_class_strs(
                forecast_model_id_to_obj[forecast_model_id],
                timezero_id_to_obj[timezero_id], timezero_to_season_name,
                PointPrediction)
            value = PointPrediction.first_non_none_value(
                value_i, value_f, value_t, value_d, value_b)
            value = value.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance(
                value, datetime.date) else value
            yield [
                model_str, timezero_str, season, unit_name, target_name,
                class_str, value, cat, prob, sample, quantile, family, param1,
                param2, param3
            ]

    # add SampleDistribution
    if is_include_sample:
        logger.debug(
            f"query_forecasts_for_project(): 3d/4 getting SampleDistributions")
        # class-specific columns all default to empty:
        value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', ''
        for forecast_model_id, timezero_id, unit_name, target_name, \
            sample_i, sample_f, sample_t, sample_d, sample_b in sample_qs:
            model_str, timezero_str, season, class_str = _model_tz_season_class_strs(
                forecast_model_id_to_obj[forecast_model_id],
                timezero_id_to_obj[timezero_id], timezero_to_season_name,
                SampleDistribution)
            sample = PointPrediction.first_non_none_value(
                sample_i, sample_f, sample_t, sample_d, sample_b)
            sample = sample.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance(
                sample, datetime.date) else sample
            yield [
                model_str, timezero_str, season, unit_name, target_name,
                class_str, value, cat, prob, sample, quantile, family, param1,
                param2, param3
            ]

    # add QuantileDistribution
    if is_include_quantile:
        logger.debug(
            f"query_forecasts_for_project(): 3e/4 getting QuantileDistributions"
        )
        # class-specific columns all default to empty:
        value, cat, prob, sample, quantile, family, param1, param2, param3 = '', '', '', '', '', '', '', '', ''
        for forecast_model_id, timezero_id, unit_name, target_name, quantile, value_i, value_f, value_d in quantile_qs:
            model_str, timezero_str, season, class_str = _model_tz_season_class_strs(
                forecast_model_id_to_obj[forecast_model_id],
                timezero_id_to_obj[timezero_id], timezero_to_season_name,
                QuantileDistribution)
            value = PointPrediction.first_non_none_value(
                value_i, value_f, None, value_d, None)
            value = value.strftime(YYYY_MM_DD_DATE_FORMAT) if isinstance(
                value, datetime.date) else value
            yield [
                model_str, timezero_str, season, unit_name, target_name,
                class_str, value, cat, prob, sample, quantile, family, param1,
                param2, param3
            ]

    # NB: we do not sort b/c it's expensive
    logger.debug(
        f"query_forecasts_for_project(): 4/4 done. num_rows={num_rows}, query={query}, project={project}"
    )