[M, C] = gpr.gpmodel(cc, rr, data_year, data_mort, data_var, data_category, prior_year, prior_mort, mse, best_scale, best_amp2x, predictionyears) ## find mean and standard error, drawing from M and C draws = 1000 mort_draws = np.zeros((draws, len(predictionyears))) gpr_seeds = [x + 123456 for x in range(1, 1001)] for draw in range(draws): np.random.seed(gpr_seeds[draw]) mort_draws[draw, :] = Realization(M, C)(predictionyears) # collapse across draws # note: space transformations need to be performed at the draw level logit_est = gpr.collapse_sims(mort_draws) unlogit_est = gpr.collapse_sims(gpr.inv_logit(mort_draws)) if hivsims == 0: os.chdir('FILEPATH') all_est = [] for i in range(len(predictionyears)): all_est.append((cc, predictionyears[i], unlogit_est['med'][i], unlogit_est['lower'][i], unlogit_est['upper'][i])) all_est = pl.array(all_est, [('ihme_loc_id', '|S32'), ('year', '<f8'), ('med', '<f8'), ('lower', '<f8'), ('upper', '<f8')]) pl.rec2csv(all_est, 'gpr_%s.txt' % cc) # save the sims all_sim = [] for i in range(len(predictionyears)):
mse=mse) else: # data model [M, C] = gpr.gpmodel(ihme_loc_id, region_name, data_year, data_mort, data_var, data_category, prior_year, prior_mort, mse, best_scale, best_amp2x, predictionyears) ## find mean and standard error, drawing from M and C draws = 1000 mort_draws = np.zeros((draws, len(predictionyears))) gpr_seeds = [x + 123456 for x in range(1, 1001)] for draw in range(draws): np.random.seed(gpr_seeds[draw]) mort_draws[draw, :] = Realization(M, C)(predictionyears) # collapse across draws # note: space transformations need to be performed at the draw level logit_est = gpr.collapse_sims(mort_draws) unlogit_est = gpr.collapse_sims(gpr.inv_logit(mort_draws)) # save the sims all_sim = [] for i in range(len(predictionyears)): for s in range(draws): all_sim.append((ihme_loc_id, predictionyears[i], s, gpr.inv_logit(mort_draws[s][i]))) all_sim = pl.array(all_sim, [('ihme_loc_id', '|S32'), ('year', '<f8'), ('sim', '<f8'), ('mort', '<f8')]) pl.rec2csv(all_sim, "FILEPATH")
print("data model") [M,C] = gpr.gpmodel(ihme_loc_id,region_name,data_year,data_mort,data_var,data_category,prior_year,prior_mort,mse,best_scale,best_amp2x,predictionyears) ## find mean and standard error, drawing from M and C draws = 1000 mort_draws = np.zeros((draws, len(predictionyears))) gpr_seeds = [x+123456 for x in range(1,1001)] for draw in range(draws): np.random.seed(gpr_seeds[draw]) mort_draws[draw,:] = Realization(M, C)(predictionyears) # collapse across draws # note: space transformations need to be performed at the draw level print("collapse across draws...") logit_est = gpr.collapse_sims(mort_draws) unlogit_est = gpr.collapse_sims(gpr.inv_logit(mort_draws)) # the difference of the mean of the antilogited draws from the antilogit of the mean of the draws mean_diff = np.subtract(unlogit_est['med'],gpr.inv_logit(logit_est['med'])) all_est = [] for i in range(len(predictionyears)): all_est.append((ihme_loc_id,ss, predictionyears[i], unlogit_est['med'][i] - mean_diff[i], unlogit_est['lower'][i] - mean_diff[i], unlogit_est['upper'][i] - mean_diff[i])) labels = ['ihme_loc_id','sex','year','mort_med','mort_lower', 'mort_upper'] all_est_df = pd.DataFrame.from_records(all_est, columns=labels) est_file = "{}/gpr_{}_{}_not_scaled.csv".format(output_dir, ihme_loc_id,ss) # all_est_df['sex'] = ss all_est_df.to_csv(est_file, index = False) # save the sims all_sim = []
[M,C] = gpr.gpmodel_nodata(pyear=prior_year,pmort=prior_mort,scale=best_scale,predictionyears=predictionyears,sim=1000,amp2x=best_amp2x,mse=mse) else: # data model [M,C] = gpr.gpmodel(cc,rr,data_year,data_mort,data_var,data_category,prior_year,prior_mort,mse,best_scale,best_amp2x,predictionyears) ## find mean and standard error, drawing from M and C draws = 1000 mort_draws = np.zeros((draws, len(predictionyears))) gpr_seeds = [x+123456 for x in range(1,1001)] for draw in range(draws): np.random.seed(gpr_seeds[draw]) mort_draws[draw,:] = Realization(M, C)(predictionyears) # collapse across draws logit_est = gpr.collapse_sims(mort_draws) mort_draws = gpr.inv_logit(mort_draws) mort_draws = pd.DataFrame(mort_draws) mort_draws.columns = predictionyears mort_draws['ihme_loc_id'] = cc mort_draws['sim'] = list(range(1000)) mort_draws = pd.melt(mort_draws, id_vars = ['ihme_loc_id','sim'], var_name = 'year', value_name = 'mort') # Unscale backtransformed draws upper_logit_bound = bounds[bounds['age'] == int(age)]['upper_bound'].iloc[0] lower_logit_bound = bounds[bounds['age'] == int(age)]['lower_bound'].iloc[0] mort_draws['mort'] = mort_draws['mort'] * (upper_logit_bound - lower_logit_bound) + lower_logit_bound mort_draws = mort_draws.rename(index = str, columns = {"mort" : "val"}) # Collapse unscaled and backtransformed draws meandf = mort_draws[['ihme_loc_id', 'year', 'val']].groupby(by = ['ihme_loc_id','year']).mean()
else: # data model [M, C] = gpr.gpmodel(cc, rr, train_year, train_mort, train_var, train_category, prior_year, prior_mort, mse, scale, amp2x, predictionyears) ## find mean and standard error, drawing from M and C draws = 1000 #not setting seed here because the holdouts are random anyway mort_draws = np.zeros((draws, len(predictionyears))) for draw in range(draws): mort_draws[draw, :] = Realization(M, C)(predictionyears) # collapse across draws # note: space transformations need to be performed at the draw level logit_est = gpr.collapse_sims(mort_draws) unlogit_est = gpr.collapse_sims(gpr.inv_logit(mort_draws)) ## save the predictions for i in range(len(predictionyears)): all_est.append( (rr, cc, ho, scale, amp2x, mse * amp2x, predictionyears[i], unlogit_est['med'][i], unlogit_est['std'][i])) ## calculate error and save this too for year, mort, var in zip(test_year, test_mort, test_var): pred_index = (predictionyears == year) re = (gpr.inv_logit(mort) - unlogit_est['med'][pred_index]) / (gpr.inv_logit(mort)) total_var = var + logit_est['std'][pred_index]**2 coverage = int( (logit_est['med'][pred_index] -