def plot_uncertainty(generator, prediction_times, sharex, sharey, draw_space, plot_obs, plot_draws=False): """ Plot the draws from a model generator at some prediction times. Args: generator: (curvefit.model_generator.ModelPipeline) that has some draws prediction_times: (np.array) of prediction times sharex: (bool) fix the x axes sharey: (bool) fix the y axes draw_space: (callable) which curvefit.functions space to plot the draws in plot_obs: (str) column of observations to plot, plot_draws: (bool) whether to plot all of the draws or just the summaries """ fig, ax = plt.subplots(len(generator.groups), 1, figsize=(8, 4 * len(generator.groups)), sharex=sharex, sharey=sharey) for i, group in enumerate(generator.groups): draws = generator.draws[group].copy() draws = data_translator(data=draws, input_space=generator.predict_space, output_space=draw_space) mean_fit = generator.mean_predictions[group].copy() mean_fit = data_translator(data=mean_fit, input_space=generator.predict_space, output_space=draw_space) mean = draws.mean(axis=0) lower = np.quantile(draws, axis=0, q=0.025) upper = np.quantile(draws, axis=0, q=0.975) ax[i].plot(prediction_times, mean, c='red', linestyle=':') ax[i].plot(prediction_times, lower, c='red', linestyle=':') ax[i].plot(prediction_times, upper, c='red', linestyle=':') ax[i].plot(prediction_times, mean_fit, c='black') df_data = generator.all_data.loc[generator.all_data[ generator.col_group] == group].copy() ax[i].scatter(df_data[generator.col_t], df_data[plot_obs]) ax[i].set_title(f"{group} predictions")
def simulate(self, mp, num_simulations, prediction_times, group, epsilon=1e-2, theta=1): """ Simulate the residuals based on the mean and standard deviation of predicting into the future. Args: mp: (curvefit.model_generator.ModelPipeline) model pipeline prediction_times: (np.array) times to create predictions at num_simulations: number of simulations group: (str) the group to make the simulations for epsilon: (epsilon) the floor for standard deviation moving out into the future theta: (theta) scaling of residuals to do relative to prediction magnitude Returns: List[pd.DataFrame] list of data frames for each simulation """ data = mp.all_data.loc[mp.all_data[mp.col_group] == group].copy() max_t = int(np.round(data[mp.col_t].max())) num_obs = data.loc[~data[mp.col_obs_compare].isnull()][ mp.col_group].count() predictions = mp.mean_predictions[group] add_noise = prediction_times > max_t no_noise = prediction_times <= max_t forecast_out_times = prediction_times[add_noise] - max_t residuals = self.predict(far_out=forecast_out_times, num_data=np.array([num_obs])) std_residual = residuals['residual_std'].apply( lambda x: max(x, epsilon)).values no_error = np.zeros(shape=(num_simulations, sum(no_noise))) error = np.random.normal(0, scale=std_residual, size=(num_simulations, sum(add_noise))) all_error = np.hstack([no_error, error]) noisy_forecast = predictions - (predictions**theta) * all_error noisy_forecast = data_translator(data=noisy_forecast, input_space=mp.predict_space, output_space=mp.predict_space) return noisy_forecast
def simulate(self, mp, far_out, num_simulations, group, epsilon=1e-2, theta=1): """ Simulate the residuals based on the mean and standard deviation of predicting into the future. Args: mp: (curvefit.model_generator.ModelPipeline) model pipeline far_out: (int) how far out into the future to predict num_simulations: number of simulations group: (str) the group to make the simulations for epsilon: (epsilon) the floor for standard deviation moving out into the future theta: (theta) scaling of residuals to do relative to prediction magnitude Returns: List[pd.DataFrame] list of data frames for each simulation """ data = mp.all_data.loc[mp.all_data[mp.col_group] == group].copy() max_t = data[mp.col_t].max() num_obs = data.loc[~data[mp.col_obs_compare].isnull()][ mp.col_group].count() num_out = np.array(range(far_out)) + 1 forecast_times = max_t + num_out observations = np.asarray(data[mp.col_obs_compare]) obs_times = np.asarray(data[mp.col_t]) all_times = np.append(obs_times, forecast_times) mean_pred = mp.predict(times=forecast_times, predict_space=mp.predict_space, predict_group=group) residuals = self.predict(far_out=num_out, num_data=np.array([num_obs])) mean_residual = residuals['residual_mean'].values std_residual = residuals['residual_std'].apply( lambda x: max(x, epsilon)).values error = np.random.normal(loc=mean_residual, scale=std_residual, size=(num_simulations, far_out)) forecast_data = mean_pred + (mean_pred**theta) * error simulated_flag = np.append(np.repeat(0, len(observations)), np.repeat(1, far_out)) cov_dict = {} for cov in mp.all_cov_names: covariate = data[cov].unique() assert len( covariate ) == 1, f"There is not a unique covariate value for {cov} group {group}" cov_dict[cov] = covariate[0] dfs = [] for i in range(num_simulations): new_observations = np.append(observations, forecast_data[i, :]) # translate into new space with data translator fit_space_new_observations = data_translator( data=new_observations, input_space=mp.predict_space, output_space=mp.fun) df = pd.DataFrame({ mp.col_t: all_times, mp.col_obs: fit_space_new_observations, mp.col_obs_compare: new_observations, mp.col_group: group, 'simulated': simulated_flag, 'intercept': 1 }) for k, v in cov_dict.items(): df[k] = v if mp.obs_se_func is not None: df[mp.col_obs_se] = df[mp.col_t].apply(mp.obs_se_func) dfs.append(df) return dfs