def analyze_final_output(path_inv, path_sq, HORIZON=60, city=None): # extract cepac out cepac_out = link.import_all_cepac_out_files(path_inv, module='regression') # read SQ out cepac_out['SQ'] = (link.import_all_cepac_out_files( path_sq, module='regression'))['SQ'] # create a plotting friendly df plot_dict = create_plot_df(cepac_out, HORIZON, city) # plot results and save images # lineplot folder_name = 'Line plots for CEPAC output_' + str(HORIZON) save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name) if not os.path.exists(save_path): os.makedirs(save_path) #save_line_plots(plot_dict, save_path) # heatmap folder_name = 'Heatmaps for CEPAC output_' + str(HORIZON) save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name) if not os.path.exists(save_path): os.makedirs(save_path) save_heatmaps(plot_dict, save_path, 'Percentage reduction in incidence rate') save_heatmaps(plot_dict, save_path, 'Infections averted') save_heatmaps(plot_dict, save_path, 'Infections averted (%)') save_heatmaps(plot_dict, save_path, 'Transmissions averted') # scatter folder_name = 'Scatter plots for CEPAC output_' + str(HORIZON) save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name) if not os.path.exists(save_path): os.makedirs(save_path) #save_scatter_plots(plot_dict['percentage reduction'], save_path) # save results folder_name = 'Excel files for results_' + str(HORIZON) save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name) if not os.path.exists(save_path): os.makedirs(save_path) plot_dict['percentage reduction'].to_csv( os.path.join(save_path, 'outcomes.csv')) return
def select_output_subset(path): # get number of directories in the current folder folder_list = os.listdir(path) # define dictionary to collect all the subsetted data data_dict = {} float_dict = {} # iterate over every folder for folder in folder_list: new_pairs = link.import_all_cepac_out_files(os.path.join( path, folder, "results"), module="regression") new_pairs.pop("popstats") float_dict.update(new_pairs) # convert keys into int type for k in float_dict: data_dict[int(k)] = float_dict[k] return data_dict
def __init__(self, path, sensitivity_module=False, transmission_module=False): ''' # according to the active module, set path path = os.path.join(path, r'Input files') # next folder depends on module if transmission_module: path = os.path.join(path, r'Runs prepared for calculating percentage decline in incidence') elif sensitivity_module: path = os.path.join(path, r'Runs prepared for sensitivity analysis') ''' # get list of directories in current path folder_list = os.listdir(path) d = {} f_idx = -1 for folder in folder_list: f_idx += 1 # set path float_path = os.path.join(path, folder_list[f_idx], 'results') # import all the data which_module = sensitivity_module * [ 'sensitivity_module' ] + transmission_module * ['regression'] d[folder] = self.get_structured_data( link.import_all_cepac_out_files(float_path, which_module[0]), sensitivity_module, transmission_module) # point the d dictionary to the relevant attribute self.output_data = d # if the transmission module is on, we'll need to read input files too if transmission_module: print('chan') return
def get_in_out_data(path): cepac_out, cepac_in = {}, {} # import required input and output for k in path: cepac_out[k] = link.import_all_cepac_out_files(os.path.join( path[k], r'results'), module='regression') cepac_in[k] = link.import_all_cepac_in_files(path[k]) # ============================================================================= # cepac_out['Positive coverage (15%)'] = link.import_all_cepac_out_files(os.path.join(path['positive coverage'], r'results'), module = 'regression') # cepac_out['Zero coverage'] = link.import_all_cepac_out_files(os.path.join(path['zero coverage'], r'results'), module = 'regression') # cepac_in['Positive coverage (15%)'] = link.import_all_cepac_in_files(path['positive coverage']) # cepac_in['Zero coverage'] = link.import_all_cepac_in_files(path['zero coverage']) # ============================================================================= # get transmission mutiplier from input file var_name = ["DynamicTransmissionNumTransmissionsHRG"] for sce in cepac_in: for run in cepac_in[sce]: cepac_in[sce][run] = link.get_subset_of_in_file( cepac_in[sce][run], var_name) return cepac_in, cepac_out
# collect SQ output if multisq: try: c_op.collect_output(os.path.join(sqpath, 'SQ_'+var)) except: pass # read output # SQ if multisq: try: c_op.collect_output(os.path.join(sqpath, 'SQ_'+var, 'results')) except: pass cepac_outputs['status quo'] = link.import_all_cepac_out_files(os.path.join(sqpath, 'SQ_'+var, 'results'), module = 'regression') else: cepac_outputs['status quo'] = link.import_all_cepac_out_files(os.path.join(sqpath, 'SQ', 'results'), module = 'regression')['SQ'] # INT cepac_outputs[var] = link.import_all_cepac_out_files(os.path.join(readpaths[var], 'results'), module = 'regression') # send to community benefit function for file in cepac_outputs[var]: # calculate community benefit if multisq: per_red, coeff = community_benefit(cepac_outputs['status quo']['SQ_'+file], cepac_outputs[var][file], HORIZON) else: per_red, coeff = community_benefit(cepac_outputs['status quo'], cepac_outputs[var][file], HORIZON) coeff = max(coeff, 10**-8)
def write_final_runs(value_grid, path_dict, STOP_TIME = 60): # aux function for creating final run files def get_reduction_coeff(percentage_red, stop_time): # reduction coefficient will be as follows if ((0.01*percentage_red) < 1) and (percentage_red > 0): red_coeff = -1*stop_time/(np.log(1 - (0.01*percentage_red))) elif (percentage_red <= 0) or (percentage_red <= 0.0): red_coeff = 10000 else: # this says percentage reduction is >= 100% red_coeff = -1*stop_time/np.log(10**-100) return red_coeff # check if both inputs are dictionaries if not (isinstance(value_grid, dict) or isinstance(path_dict, dict)): raise TypeError("Input needs to be in dictionary format.") return # val_to_replace = value_grid del value_grid # import out files #out_path = os.path.join(path_dict['output']['intervention'], r'results') cepac_out = {} for k in path_dict['output']: cepac_out[k] = link.import_all_cepac_out_files(os.path.join(path_dict['output'][k], r'results'), module = "regression") # set stop time and PrEP duration #SQ_DF = link.import_all_cepac_in_files(path_dict['output']['status quo'])['SQ'] #STOP_TIME = int(t_op.read_values('HIVIncidReductionStopTime', SQ_DF)) #del SQ_DF # create sets of runs ABC out = {} name_map = {} for k in cepac_out['intervention']: if k == 'popstats': continue key_name = aux.get_digit_for_file_name(k, val_to_replace) if not key_name in out.keys(): out[key_name] = {} name_map[key_name] = k if "RunB" in k: out[key_name]['B'] = cepac_out['intervention'][k] elif "RunC" in k: out[key_name]['C'] = cepac_out['intervention'][k] else: continue # TODO: following value will change according to city, need to take care of this inp = {} for k in out: out[k]['A'] = cepac_out['status quo']['SQ']#['A'] inp[k] = {'PrEPCoverage': 0, 'prep_efficacy': 0.96, 'CohortSize': 10000000, 'PrEPDuration': 0, 'HIVmthIncidMale': 0.00357692085607886, 'prep_usage_at_initialization': 'n'} row_idx = 0 percentage_decline = [] for i in val_to_replace['PrEPCoverage']: for j in val_to_replace['PrEPDuration']: if i == 0.0: j_idx = np.where(val_to_replace['PrEPDuration'] == j)[0][0] out[j_idx] = {} out[j_idx]["A"] = cepac_out["status quo"]["SQ"] out[j_idx]["B"] = cepac_out["status quo"]["SQ"] out[j_idx]["C"] = cepac_out["status quo"]["SQ"] inp[j_idx] = inp[1] inp[row_idx]['PrEPCoverage'] = i inp[row_idx]['PrEPDuration'] = STOP_TIME #60 #600 percentage_decline.append(tx_algo.get_percentage_decline(out[row_idx], inp[row_idx])) row_idx += 1 # plot results plot_heatmap(val_to_replace, percentage_decline, path_dict) # write results to excel file export_abc_out_to_excel(cepac_out, out, val_to_replace, path_dict) # create directory to write the final run files if not os.path.exists(path_dict['output']['final runs']): os.makedirs(path_dict['output']['final runs']) final_path = path_dict['output']['final runs'] # read one .in file (B, as that is intervention file) and alter following values # 1. reduction coefficient # 2. disable dynamic transmission module # import base file for B base_int = link.import_all_cepac_in_files(path_dict['input']) base_int = base_int['B'] # first we'll find the indices of all the required variables idx = {} var_list = ["UseHIVIncidReduction", "HIVIncidReductionStopTime", "HIVIncidReductionCoefficient", "UseDynamicTransmission", "PrEPCoverage", "PrEPDuration"] for k in var_list: idx[k] = base_int.loc[base_int.loc[:, 0] == k, :].index.values # replace required values for run in name_map: # deepcopy float_int = deepcopy(base_int) # following value of stop time might not be correct float_int.loc[idx["HIVIncidReductionStopTime"], 1] = STOP_TIME #480#120 coeff = get_reduction_coeff(percentage_decline[run], float_int.loc[idx["HIVIncidReductionStopTime"], 1].values[0]) if coeff <= 0: # disable incidence reduction float_int.loc[idx["UseHIVIncidReduction"], 1] = 0 #float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff else: # enable incidence reduction float_int.loc[idx["UseHIVIncidReduction"], 1] = 1 float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff # disable dynamic transmission module # TODO: we need to take care that the values for risk multiplier are correct float_int.loc[idx["UseDynamicTransmission"], 1] = 1 # replace coverage and coverage time float_int.loc[idx["PrEPCoverage"], 1:2] = 0.01*(aux.get_coverage_level_from_file_name(name_map[run])) float_int.loc[idx["PrEPDuration"], 1:2] = aux.get_coverage_time_from_file_name(name_map[run]) # write the file save_path = os.path.join(final_path, name_map[run].split('_')[1]) + '.in' link.write_cepac_in_file(save_path, float_int)
def create_target_tensor(path, device, SEQ_LEN=60): # path: result folder of the set of runs for which we want to create the target tensor # import all results cepac_outputs = link.import_all_cepac_out_files(path, module='regression') targets, targets_std = {}, {} # import output distribtion parameters para = pd.read_csv( r'/Users/vijetadeshpande/Documents/GitHub/meta-environment/Data and results/CEPAC RUNS/regression model input/output_mean_and_sd.csv' ).set_index('Unnamed: 0') # iterate over each file for file in cepac_outputs: # skip run_c if 'RunC' in file: continue # get coverage and coverage time, for setting it as key of dict try: coverage, cov_time = int(file.split('=')[1][:2]), int( file.split('=')[2]) except: if 'SQ' in file: coverage, cov_time = 0, 0 else: coverage, cov_time = 'NA', 'NA' # remove the multiplier array try: cepac_outputs[file].pop('multiplier') except: pass # for feature in cepac_outputs[file]: cepac_outputs[file][feature] = cepac_outputs[file][ feature].to_list()[:SEQ_LEN] # store as matrix targets[(coverage, cov_time)] = pd.DataFrame(0, index=np.arange(SEQ_LEN + 1), columns=cepac_outputs[file].keys()) targets[(coverage, cov_time)].iloc[1:, :] = pd.DataFrame( cepac_outputs[file]).values # standardize targets targets_std[(coverage, cov_time)] = pd.DataFrame( 0, index=targets[(coverage, cov_time)].index, columns=targets[(coverage, cov_time)].columns) for feature in cepac_outputs[file]: targets_std[(coverage, cov_time)].loc[1:, feature] = ( targets[(coverage, cov_time)].loc[1:, feature] - para.loc['mean', feature]) / para.loc['sd', feature] # convert to numpy array targets[(coverage, cov_time)] = torch.tensor( np.array(targets[(coverage, cov_time)])).float().to(device) targets_std[(coverage, cov_time)] = torch.tensor( np.array(targets_std[(coverage, cov_time)])).float().to(device) return targets, targets_std
basepath = os.path.join(base, strategy) sqpath = os.path.join(base, 'Common runs', 'SQ', 'results') readbase = os.path.join( basepath, 'Final runs', 'Final runs_var1=PrEPAdherence_var2=PrEPDroputPostThreshold', 'results') writebase = os.path.join(base, 'Results') # collect output try: c_op.collect_output(os.path.join(readbase, '..')) except: pass # import all cepac out files cepac_outputs = link.import_all_cepac_out_files(readbase, module='regression') cepac_outputs['status quo'] = link.import_all_cepac_out_files( sqpath, module='regression')['SQ'] # idx = -1 for file in cepac_outputs: if file == 'status quo': continue # idx += 1 # calculate averted infections averted_inf = (cepac_outputs['status quo']['infections'] - cepac_outputs[file]['infections'])[0:HORIZON].sum()
cepac_out = {} percentage_decline = {} for p_coverage in dir_list["coverage"]: # create dictionary cepac_out[p_coverage] = {} percentage_decline[p_coverage] = {} # get all the subfolders dir_list["coverage time"] = os.listdir(os.path.join(path["output"], p_coverage)) for p_coverage_time in dir_list["coverage time"]: p_res = os.path.join(path["output"], p_coverage, p_coverage_time, "results") # extract results cepac_out[p_coverage][p_coverage_time] = link.import_all_cepac_out_files(p_res, module = "regression") # calculate percentage decline percentage_decline[p_coverage][p_coverage_time], _ = dtm.get_community_benefit(cepac_out[p_coverage][p_coverage_time], os.path.join(p_res, "..")) #%% create a set of final runs for p_c in percentage_decline: for var in percentage_decline[p_c]: cepac_in = link.import_all_cepac_in_files(os.path.join(path["output"], p_c, var)) float_in = deepcopy(cepac_in) # adjustments for run in cepac_in: if 'RunC' in run or 'C' in run: float_in.pop(run) elif 'RunA' in run or 'A' in run: float_in['beasecase'] = float_in.pop(run)
# if os.path.exists(os.path.join(path_rnn)): # load data data_rnn[batch] = h_fun1.load_all_json(path_rnn) # check if we have CEPAC output #if not 'rnn_target' in data_rnn: if not 'CEPAC_output' in data_rnn[batch]: # collect the output which was parallelized for the cluster if os.path.exists(os.path.join(path_cepac, str(0))): c_op.collect_output(path_cepac) # import raw data data_cepac[batch] = link.import_all_cepac_out_files( os.path.join(path_cepac, 'results'), module='regression') for example in data_cepac[batch]: data_cepac[batch][example].pop('multiplier') for feature in data_cepac[batch][example]: data_cepac[batch][example][feature] = data_cepac[batch][ example][feature].to_list()[:SEQ_LEN] # save raw data to json file filename = os.path.join(path_rnn, r'CEPAC_output.json') h_fun1.dump_json(data_cepac[batch], filename) else: data_cepac[batch] = data_rnn[batch].pop('CEPAC_output') # coverting keys from str to int