def compute_pmdl_weight(df, methods, outcome, target_day, c0=1, mu=0.5): # FIXME: currently target_day is not used in this function y = np.array([df[outcome].values[i][-7:] for i in range(len(df))]) weights = {} for (i, model) in enumerate(methods): if 'demographic_vars' in model: demographic_vars = model['demographic_vars'] else: demographic_vars = [] y_preds = np.zeros(y.shape) for t in range(1, 8): df2 = exponential_modeling.leave_t_day_out(df, t + 3 - 1) df2 = fit_and_predict.fit_and_predict(df2, outcome=outcome, method=model['model_type'], mode='predict_future', target_day=np.array([3]), output_key='y_preds', demographic_vars=demographic_vars) y_preds[:, (7 - t)] = np.array([df2['y_preds'].values[i][-1] for i in range(len(df))]) # weights[i] = pmdl_weight(np.log(y + 1), np.log(np.maximum(y_preds, 0) + 1)) # weights[i] = pmdl_weight(y, y_preds) weights[i] = pmdl_weight(np.sqrt(y), np.sqrt(np.maximum(y_preds, 0)), c0=c0, mu=mu) # weights[i] = pmdl_weight(y**(1/4), (np.maximum(y_preds, 0))**(1/4)) return weights
def add_all_preds(df_county): """ add single predictor predictions for the past {ndays} days """ for method in methods: for t in tqdm(range(1, ndays + 1)): d = today - timedelta(t) if d < date(2020, 3, 16) and method in ['demographic']: continue use_df = exponential_modeling.leave_t_day_out(df_county, 0 + t) if method != 'ensemble' and method != 'demographic': use_df = fit_and_predict.fit_and_predict( use_df, target_day=np.arange(1, horizon + 1), outcome=outcome, method=method, mode='predict_future', output_key=f'predicted_{outcome}_{method}_{horizon}') elif method == 'demographic': use_df = fit_and_predict.fit_and_predict( use_df, target_day=np.arange(1, horizon + 1), outcome=outcome, method='shared_exponential', mode='predict_future', demographic_vars=very_important_vars, output_key=f'predicted_{outcome}_{method}_{horizon}') df_county[ f'all_{outcome}_pred_{d.month}_{d.day}_{method}_{horizon}'] = use_df[ f'predicted_{outcome}_{method}_{horizon}'] return df_county
def previous_prediction_errors(df, target_day: np.ndarray = np.array([1]), outcome: str = 'deaths', methods: list = [advanced_model, linear], look_back_day: int = 5, output_key: str = None): """ Calculating prediction errors of previous days Input: df: pd.DataFrame target_day: np.ndarray outcome: str methods: list look_back_day: int returns the prediction errors for the last {look_back_day} days Output: list of {len(df)} dictionaries, the keys of each dictionary are days in target_day, and the values are a list of (normalized) l1 error, of length {look_back_day} """ # find previous models to run previous_start_days = defaultdict(list) for day in target_day: for back_day in range(look_back_day): previous_start_days[day + back_day].append(day) # previous_model_predictions = {} previous_model_errors = [defaultdict(list) for i in range(len(df))] prediction_uncertainty = [defaultdict(list) for i in range(len(df))] for t in previous_start_days: previous_target_days = previous_start_days[t] df_old = exponential_modeling.leave_t_day_out(df, t) previous_model_predictions = fit_and_predict_ensemble( df_old, target_day=np.array(previous_target_days), outcome=outcome, methods=methods, mode='predict_future', output_key='old_predictions', )['old_predictions'].values # running old prediction models for i in range(len(df)): for (j, td) in enumerate(previous_target_days): pred = previous_model_predictions[i][j] actual_outcome = df[outcome].iloc[i][td - t - 1] error = actual_outcome / max(pred, 1) - 1 previous_model_errors[i][td].append(error) # for i in range(len(df)): # for td in target_day: # prediction_uncertainty[i][td] = max(previous_model_errors[i][td]) df[output_key] = previous_model_errors return df
def fit_and_predict_ensemble(df, target_day: np.ndarray = np.array([1]), outcome: str = 'deaths', methods: list = [shared_exponential, linear], mode: str = 'predict_future', output_key: str = None, verbose: bool = False, weight_c0: int = 1, weight_mu: int = 0.5, debug: bool = False, expanded_shared_time_truncation=None): """ Function for ensemble prediction Input: df: pd.DataFrame target_day: array outcome: str method: list of dictionary each dictionary specify the type and parameters of the model mode: str output_key: str Output: df with ensemble prediction """ if output_key is None: output_key = f'predicted_{outcome}_ensemble_{target_day[-1]}' predictions = {} for (i, model) in enumerate(methods): if debug: print(f"[DEBUG] fit_and_predict_ensemble:{i}, {model}") if 'demographic_vars' in model: demographic_vars = model['demographic_vars'] else: demographic_vars = [] predictions[i] = fit_and_predict( df, outcome=outcome, method=model['model_type'], mode=mode, target_day=target_day, output_key=f'y_preds_{i}', demographic_vars=demographic_vars, verbose=verbose, expanded_shared_time_truncation=expanded_shared_time_truncation )[f'y_preds_{i}'].values if mode == 'predict_future': use_df = df else: use_df = exponential_modeling.leave_t_day_out(df, target_day[-1]) if debug: print(f"[DEBUG] fit_and_predict_ensemble: compute weights.") weights = pmdl_weight.compute_pmdl_weight(use_df, methods=methods, outcome=outcome, target_day=target_day, c0=weight_c0, mu=weight_mu) sum_weights = np.zeros(len(use_df)) for model_index in weights: sum_weights = sum_weights + np.array(weights[model_index]) # weighted_preds = np.zeros((len(use_df), len(target_day))) weighted_preds = [np.zeros(len(target_day)) for i in range(len(use_df))] for i in range(len(df)): for model_index in weights: weighted_preds[i] += np.array( predictions[model_index] [i]) * weights[model_index][i] / sum_weights[i] # print out the relative contribution of each model if verbose: print('--- Model Contributions ---') model_weight_counter = Counter() for model_index in weights: m_weights = 0 for i in range(len(use_df)): m_weights += weights[model_index][i] / sum_weights[i] m_weights = m_weights / len(use_df) model_weight_counter[model_index] = m_weights for model_index, weight in model_weight_counter.most_common(): print(str(methods[model_index]) + ': ' + str(weight)) # Make sure predictions are non-decreasing if debug: print(f"[DEBUG] fit_and_predict_ensemble: monotonicity constraint.") monotonic_weighted_preds = [] for preds in weighted_preds: new_preds = [] for i in range(len(preds)): if i > 0: new_preds.append(max(preds[i], preds[i - 1])) else: new_preds.append(preds[i]) monotonic_weighted_preds.append(new_preds) weighted_preds = monotonic_weighted_preds df[output_key] = weighted_preds return df