def compute_pmdl_weight(df, methods, outcome, target_day, c0=1, mu=0.5):
    # FIXME: currently target_day is not used in this function
    y = np.array([df[outcome].values[i][-7:] for i in range(len(df))])
    weights = {}
    for (i, model) in enumerate(methods):

        if 'demographic_vars' in model:
            demographic_vars = model['demographic_vars']
        else:
            demographic_vars = []

        y_preds = np.zeros(y.shape)
        for t in range(1, 8):
            df2 = exponential_modeling.leave_t_day_out(df, t + 3 - 1)
            df2 = fit_and_predict.fit_and_predict(df2,
                                                  outcome=outcome,
                                                  method=model['model_type'],
                                                  mode='predict_future',
                                                  target_day=np.array([3]),
                                                  output_key='y_preds',
                                                  demographic_vars=demographic_vars)

            y_preds[:, (7 - t)] = np.array([df2['y_preds'].values[i][-1] for i in range(len(df))])

        # weights[i] = pmdl_weight(np.log(y + 1), np.log(np.maximum(y_preds, 0) + 1))
        # weights[i] = pmdl_weight(y, y_preds)
        weights[i] = pmdl_weight(np.sqrt(y), np.sqrt(np.maximum(y_preds, 0)), c0=c0, mu=mu)
    # weights[i] = pmdl_weight(y**(1/4), (np.maximum(y_preds, 0))**(1/4))

    return weights
예제 #2
0
def add_all_preds(df_county):
    """
    add single predictor predictions for the past {ndays} days
    """

    for method in methods:
        for t in tqdm(range(1, ndays + 1)):
            d = today - timedelta(t)
            if d < date(2020, 3, 16) and method in ['demographic']:
                continue
            use_df = exponential_modeling.leave_t_day_out(df_county, 0 + t)

            if method != 'ensemble' and method != 'demographic':
                use_df = fit_and_predict.fit_and_predict(
                    use_df,
                    target_day=np.arange(1, horizon + 1),
                    outcome=outcome,
                    method=method,
                    mode='predict_future',
                    output_key=f'predicted_{outcome}_{method}_{horizon}')
            elif method == 'demographic':
                use_df = fit_and_predict.fit_and_predict(
                    use_df,
                    target_day=np.arange(1, horizon + 1),
                    outcome=outcome,
                    method='shared_exponential',
                    mode='predict_future',
                    demographic_vars=very_important_vars,
                    output_key=f'predicted_{outcome}_{method}_{horizon}')

            df_county[
                f'all_{outcome}_pred_{d.month}_{d.day}_{method}_{horizon}'] = use_df[
                    f'predicted_{outcome}_{method}_{horizon}']

    return df_county
예제 #3
0
def previous_prediction_errors(df,
                               target_day: np.ndarray = np.array([1]),
                               outcome: str = 'deaths',
                               methods: list = [advanced_model, linear],
                               look_back_day: int = 5,
                               output_key: str = None):
    """
    Calculating prediction errors of previous days
    Input:
        df: pd.DataFrame
        target_day: np.ndarray
        outcome: str
        methods: list
        look_back_day: int
            returns the prediction errors for the last {look_back_day} days
    Output:
        list of {len(df)} dictionaries, the keys of each dictionary are days in target_day, and the values are a list of (normalized) l1 error, of length {look_back_day}
    """

    # find previous models to run
    previous_start_days = defaultdict(list)
    for day in target_day:
        for back_day in range(look_back_day):
            previous_start_days[day + back_day].append(day)

    # previous_model_predictions = {}
    previous_model_errors = [defaultdict(list) for i in range(len(df))]
    prediction_uncertainty = [defaultdict(list) for i in range(len(df))]

    for t in previous_start_days:

        previous_target_days = previous_start_days[t]
        df_old = exponential_modeling.leave_t_day_out(df, t)

        previous_model_predictions = fit_and_predict_ensemble(
            df_old,
            target_day=np.array(previous_target_days),
            outcome=outcome,
            methods=methods,
            mode='predict_future',
            output_key='old_predictions',
        )['old_predictions'].values  # running old prediction models
        for i in range(len(df)):
            for (j, td) in enumerate(previous_target_days):
                pred = previous_model_predictions[i][j]
                actual_outcome = df[outcome].iloc[i][td - t - 1]
                error = actual_outcome / max(pred, 1) - 1
                previous_model_errors[i][td].append(error)

    # for i in range(len(df)):
    #    for td in target_day:
    #       prediction_uncertainty[i][td] = max(previous_model_errors[i][td])

    df[output_key] = previous_model_errors

    return df
예제 #4
0
def fit_and_predict_ensemble(df,
                             target_day: np.ndarray = np.array([1]),
                             outcome: str = 'deaths',
                             methods: list = [shared_exponential, linear],
                             mode: str = 'predict_future',
                             output_key: str = None,
                             verbose: bool = False,
                             weight_c0: int = 1,
                             weight_mu: int = 0.5,
                             debug: bool = False,
                             expanded_shared_time_truncation=None):
    """
    Function for ensemble prediction
    Input:
        df: pd.DataFrame
        target_day: array
        outcome: str
        method: list of dictionary
            each dictionary specify the type and parameters of the model
        mode: str
        output_key: str
    Output:
        df with ensemble prediction
    """
    if output_key is None:
        output_key = f'predicted_{outcome}_ensemble_{target_day[-1]}'
    predictions = {}
    for (i, model) in enumerate(methods):
        if debug:
            print(f"[DEBUG] fit_and_predict_ensemble:{i}, {model}")

        if 'demographic_vars' in model:
            demographic_vars = model['demographic_vars']
        else:
            demographic_vars = []

        predictions[i] = fit_and_predict(
            df,
            outcome=outcome,
            method=model['model_type'],
            mode=mode,
            target_day=target_day,
            output_key=f'y_preds_{i}',
            demographic_vars=demographic_vars,
            verbose=verbose,
            expanded_shared_time_truncation=expanded_shared_time_truncation
        )[f'y_preds_{i}'].values

    if mode == 'predict_future':
        use_df = df
    else:
        use_df = exponential_modeling.leave_t_day_out(df, target_day[-1])
    if debug:
        print(f"[DEBUG] fit_and_predict_ensemble: compute weights.")
    weights = pmdl_weight.compute_pmdl_weight(use_df,
                                              methods=methods,
                                              outcome=outcome,
                                              target_day=target_day,
                                              c0=weight_c0,
                                              mu=weight_mu)
    sum_weights = np.zeros(len(use_df))
    for model_index in weights:
        sum_weights = sum_weights + np.array(weights[model_index])

    # weighted_preds = np.zeros((len(use_df), len(target_day)))
    weighted_preds = [np.zeros(len(target_day)) for i in range(len(use_df))]
    for i in range(len(df)):
        for model_index in weights:
            weighted_preds[i] += np.array(
                predictions[model_index]
                [i]) * weights[model_index][i] / sum_weights[i]

    # print out the relative contribution of each model
    if verbose:
        print('--- Model Contributions ---')
        model_weight_counter = Counter()
        for model_index in weights:
            m_weights = 0
            for i in range(len(use_df)):
                m_weights += weights[model_index][i] / sum_weights[i]
            m_weights = m_weights / len(use_df)
            model_weight_counter[model_index] = m_weights
        for model_index, weight in model_weight_counter.most_common():
            print(str(methods[model_index]) + ': ' + str(weight))

    # Make sure predictions are non-decreasing
    if debug:
        print(f"[DEBUG] fit_and_predict_ensemble: monotonicity constraint.")
    monotonic_weighted_preds = []
    for preds in weighted_preds:
        new_preds = []
        for i in range(len(preds)):
            if i > 0:
                new_preds.append(max(preds[i], preds[i - 1]))
            else:
                new_preds.append(preds[i])
        monotonic_weighted_preds.append(new_preds)
    weighted_preds = monotonic_weighted_preds
    df[output_key] = weighted_preds
    return df