def select_input_subset(path, var=list_of_input_var): # get number of directories in the current folder folder_list = os.listdir(path) # define dictionary to collect all the subsetted data data_dict = {} # iterate over every folder for folder in folder_list: float_dict = link.import_all_cepac_in_files(os.path.join(path, folder)) for k in float_dict: float_name = re.sub('.in', "", os.path.basename(os.path.normpath(k))) float_name = re.sub('cepac_run', "", float_name) data_dict[int(float_name)] = link.get_subset_of_in_file( float_dict[k], var) return data_dict
def __init__(self, path, sensitivity_module = False, transmission_module = False): # get independent variable names and values self.independent_var, self.independent_var_val = link.extract_ind_var_and_val(path) # perform either sensitivity module or transmission module if sensitivity_module: # read perturbation file self.perturbation = link.import_perturbation_file(os.path.join(path, r'Input files')) for folder in os.listdir(os.path.join(path, r'Input files')): # read the input data for base-case, intervention and perturbation. Point it to correct attribute # make input_files folder as current directory self.base_files = link.import_all_cepac_in_files(os.path.join(path, r'Input files', folder)) # following attribute will be 'True' if all the files for sensitivity analysis have been created self.sensitivity_files = self.get_sensitivity_analysis_files(os.path.join(path, r'Input files', folder)) elif transmission_module: # run the transmission module self.transmission_rate_multiplier = dtm.initialize_tx_module(path) # replace the values of m_bar_tx and write the files else: print('You have not activated any module among sensitivity and transmissions')
def get_in_out_data(path): cepac_out, cepac_in = {}, {} # import required input and output for k in path: cepac_out[k] = link.import_all_cepac_out_files(os.path.join( path[k], r'results'), module='regression') cepac_in[k] = link.import_all_cepac_in_files(path[k]) # ============================================================================= # cepac_out['Positive coverage (15%)'] = link.import_all_cepac_out_files(os.path.join(path['positive coverage'], r'results'), module = 'regression') # cepac_out['Zero coverage'] = link.import_all_cepac_out_files(os.path.join(path['zero coverage'], r'results'), module = 'regression') # cepac_in['Positive coverage (15%)'] = link.import_all_cepac_in_files(path['positive coverage']) # cepac_in['Zero coverage'] = link.import_all_cepac_in_files(path['zero coverage']) # ============================================================================= # get transmission mutiplier from input file var_name = ["DynamicTransmissionNumTransmissionsHRG"] for sce in cepac_in: for run in cepac_in[sce]: cepac_in[sce][run] = link.get_subset_of_in_file( cepac_in[sce][run], var_name) return cepac_in, cepac_out
def write_final_runs(value_grid, path_dict, STOP_TIME = 60): # aux function for creating final run files def get_reduction_coeff(percentage_red, stop_time): # reduction coefficient will be as follows if ((0.01*percentage_red) < 1) and (percentage_red > 0): red_coeff = -1*stop_time/(np.log(1 - (0.01*percentage_red))) elif (percentage_red <= 0) or (percentage_red <= 0.0): red_coeff = 10000 else: # this says percentage reduction is >= 100% red_coeff = -1*stop_time/np.log(10**-100) return red_coeff # check if both inputs are dictionaries if not (isinstance(value_grid, dict) or isinstance(path_dict, dict)): raise TypeError("Input needs to be in dictionary format.") return # val_to_replace = value_grid del value_grid # import out files #out_path = os.path.join(path_dict['output']['intervention'], r'results') cepac_out = {} for k in path_dict['output']: cepac_out[k] = link.import_all_cepac_out_files(os.path.join(path_dict['output'][k], r'results'), module = "regression") # set stop time and PrEP duration #SQ_DF = link.import_all_cepac_in_files(path_dict['output']['status quo'])['SQ'] #STOP_TIME = int(t_op.read_values('HIVIncidReductionStopTime', SQ_DF)) #del SQ_DF # create sets of runs ABC out = {} name_map = {} for k in cepac_out['intervention']: if k == 'popstats': continue key_name = aux.get_digit_for_file_name(k, val_to_replace) if not key_name in out.keys(): out[key_name] = {} name_map[key_name] = k if "RunB" in k: out[key_name]['B'] = cepac_out['intervention'][k] elif "RunC" in k: out[key_name]['C'] = cepac_out['intervention'][k] else: continue # TODO: following value will change according to city, need to take care of this inp = {} for k in out: out[k]['A'] = cepac_out['status quo']['SQ']#['A'] inp[k] = {'PrEPCoverage': 0, 'prep_efficacy': 0.96, 'CohortSize': 10000000, 'PrEPDuration': 0, 'HIVmthIncidMale': 0.00357692085607886, 'prep_usage_at_initialization': 'n'} row_idx = 0 percentage_decline = [] for i in val_to_replace['PrEPCoverage']: for j in val_to_replace['PrEPDuration']: if i == 0.0: j_idx = np.where(val_to_replace['PrEPDuration'] == j)[0][0] out[j_idx] = {} out[j_idx]["A"] = cepac_out["status quo"]["SQ"] out[j_idx]["B"] = cepac_out["status quo"]["SQ"] out[j_idx]["C"] = cepac_out["status quo"]["SQ"] inp[j_idx] = inp[1] inp[row_idx]['PrEPCoverage'] = i inp[row_idx]['PrEPDuration'] = STOP_TIME #60 #600 percentage_decline.append(tx_algo.get_percentage_decline(out[row_idx], inp[row_idx])) row_idx += 1 # plot results plot_heatmap(val_to_replace, percentage_decline, path_dict) # write results to excel file export_abc_out_to_excel(cepac_out, out, val_to_replace, path_dict) # create directory to write the final run files if not os.path.exists(path_dict['output']['final runs']): os.makedirs(path_dict['output']['final runs']) final_path = path_dict['output']['final runs'] # read one .in file (B, as that is intervention file) and alter following values # 1. reduction coefficient # 2. disable dynamic transmission module # import base file for B base_int = link.import_all_cepac_in_files(path_dict['input']) base_int = base_int['B'] # first we'll find the indices of all the required variables idx = {} var_list = ["UseHIVIncidReduction", "HIVIncidReductionStopTime", "HIVIncidReductionCoefficient", "UseDynamicTransmission", "PrEPCoverage", "PrEPDuration"] for k in var_list: idx[k] = base_int.loc[base_int.loc[:, 0] == k, :].index.values # replace required values for run in name_map: # deepcopy float_int = deepcopy(base_int) # following value of stop time might not be correct float_int.loc[idx["HIVIncidReductionStopTime"], 1] = STOP_TIME #480#120 coeff = get_reduction_coeff(percentage_decline[run], float_int.loc[idx["HIVIncidReductionStopTime"], 1].values[0]) if coeff <= 0: # disable incidence reduction float_int.loc[idx["UseHIVIncidReduction"], 1] = 0 #float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff else: # enable incidence reduction float_int.loc[idx["UseHIVIncidReduction"], 1] = 1 float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff # disable dynamic transmission module # TODO: we need to take care that the values for risk multiplier are correct float_int.loc[idx["UseDynamicTransmission"], 1] = 1 # replace coverage and coverage time float_int.loc[idx["PrEPCoverage"], 1:2] = 0.01*(aux.get_coverage_level_from_file_name(name_map[run])) float_int.loc[idx["PrEPDuration"], 1:2] = aux.get_coverage_time_from_file_name(name_map[run]) # write the file save_path = os.path.join(final_path, name_map[run].split('_')[1]) + '.in' link.write_cepac_in_file(save_path, float_int)
def write_abc(value_map, path_dict): # check if both inputs are dictionaries if not (isinstance(value_map, dict) or isinstance(path_dict, dict)): raise TypeError("Input needs to be in dictionary format.") return # check keys in the input dictionary for k in value_map: if not k in ["PrEPCoverage", "PrEPDuration"]: raise ValueError("Keys of the input dictionary should only contain PrEPCoverage and PrEPDuration") return # Lazyness encounter: remaning input because rest of the code was written with different name var_to_replace = value_map del value_map # create mesh grid for values val_to_replace = {} val_to_replace['PrEPCoverage'], val_to_replace['PrEPDuration'] = np.meshgrid(var_to_replace['PrEPCoverage'], var_to_replace['PrEPDuration']) # import the base files cepac_in = link.import_all_cepac_in_files(path_dict['input']) # find indices of the required variables idx = {} for k in var_to_replace: idx[k] = cepac_in['B'].loc[cepac_in['B'].loc[:, 0] == k, :].index.values # replace all the indices with respective values float_b = cepac_in['B'] float_c = cepac_in['C'] for row in range(len(val_to_replace['PrEPCoverage'])): # for col in range(val_to_replace['PrEPCoverage'].shape[1]): # don't need to create files for zero coverage, just use SQ results if float(val_to_replace["PrEPCoverage"][(row, col)]) == 0: continue # for var in var_to_replace: float_b.loc[idx[var], 1:2] = val_to_replace[var][(row, col)] float_c.loc[idx[var], 1:2] = val_to_replace[var][(row, col)] # make new dir if not os.path.exists(path_dict['output']['intervention']): os.makedirs(path_dict['output']['intervention']) # name the input file path = path_dict['output']['intervention'] # B name = "RunB_Coverage=%d, Duration=%d"%(100 * val_to_replace["PrEPCoverage"][(row, col)], val_to_replace["PrEPDuration"][(row, col)]) + r".in" float_path = os.path.join(path, name) link.write_cepac_in_file(float_path, float_b) # C name = "RunC_Coverage=%d, Duration=%d"%(100 * val_to_replace["PrEPCoverage"][(row, col)], val_to_replace["PrEPDuration"][(row, col)]) + r".in" float_path = os.path.join(path, name) link.write_cepac_in_file(float_path, float_c) # create a folder for status quo if not os.path.exists(path_dict['output']['status quo']): os.makedirs(path_dict['output']['status quo']) # write a status quo .in file in the stsus quo folder link.write_cepac_in_file(os.path.join(path_dict['output']['status quo'], 'SQ.in'), cepac_in['SQ']) return
# collect samples samples, sample_bounds, var_list, parameters = ipar.get_samples(EXAMPLES) filepath = r'/Users/vijetadeshpande/Documents/GitHub/meta-environment/Data and results/basefile' savepath = r'/Users/vijetadeshpande/Documents/GitHub/meta-environment/Data and results/CEPAC RUNS/NEW BATCH' # create folder to save the files savepath_cepac = os.path.join(savepath, 'Files for CEPAC') savepath_rnn = os.path.join(savepath, 'Files for RNN') for i in [savepath_cepac, savepath_rnn]: if not os.path.exists(i): os.makedirs(i) if (not os.path.exists(os.path.join(savepath, 'results'))) and (not os.path.exists(os.path.join(savepath, str(0), 'results'))): # import base .in file basefile = link.import_all_cepac_in_files(filepath)['rio'] # start timer start = timeit.default_timer() # loop over samples one by one to create .in files #samples['Gaussian solution'] = [] feature_tensor = [] CEPAC_input_condensed = pd.DataFrame(-10, index = np.arange(EXAMPLES), columns = var_list) for run in range(0, EXAMPLES): float_df = deepcopy(basefile) # loop over all the variables for var in var_list: # store input value if var != 'InitAge':
#%% plot longitidinal histories long_data = {} folder_list = os.listdir(path) for i in folder_list: float_path = os.path.join(path, i, 'results') if not os.path.exists(float_path): print('Results folder is missing in following path:') print(float_path) else: long_data[i] = link.import_all_cepac_out_files(path = float_path, extensions = [r'\*.txt']) """ #%% create a set of final runs for var in percentage_decline: cepac_in = link.import_all_cepac_in_files(os.path.join(path, var)) float_in = deepcopy(cepac_in) # adjustments for run in cepac_in: if 'RunC' in run or 'C' in run: float_in.pop(run) elif 'RunA' in run or 'A' in run: float_in['beasecase'] = float_in.pop(run) elif 'RunB' in run or 'B' in run: float_in['intervention'] = float_in.pop(run) del cepac_in cepac_in = float_in del float_in # here we want to change the values percenatg decline
'incidence': np.array([2.45, 1]), 'index positive': 3926, 'index negative': 41728, 'on ART': 0.71 }, 'manaus': {'viral load distribution': np.array([0.1268, 0.0612 , 0.0621, 0.1034, 0.2898, 0.3566, 0.0000, 0.0000]), 'incidence': np.array([1.4, 1]), 'index positive': 2828, 'index negative': 45937, 'on ART': 0.70 } } # import all .in files filepath = r'/Users/vijetadeshpande/Downloads/MPEC/Brazil/SA on control levers' in_file = link.import_all_cepac_in_files(filepath) # create folder for new in files filepath_new = os.path.join(filepath, 'SA on testing rate') #'SA_percentage on ' if not os.path.exists(filepath_new): os.makedirs(filepath_new) def SA_attia(factors): # loop over scale down scenarios replace_var = ['DynamicTransmissionNumTransmissionsHRG', 'TransmissionRiskMultiplier_T1', 'TransmissionRiskMultiplier_T2', 'TransmissionRiskMultiplier_T3', 'TransmissionRateOnART', 'TransmissionRateOffART'] replace_val = {} for city in ['rio', 'salvador', 'manaus']: for k in factors:# this us scale down factor for Attia rates # multiply to attia
# aux functions def find_and_replace(df, var, val): df = deepcopy(df) # find var idx = df.loc[df.loc[:, 0] == var, :].index[0] col = df.loc[idx, :].dropna().index[1] # replace df.loc[idx, col] = val return df if False: # import base A, B and C path = {"base_abc": r"C:\Users\Vijeta\Documents\Projects\Brazil PrEP\Transmission runs\Input files"} cepac_in = link.import_all_cepac_in_files(path["base_abc"]) # which variables to change replace_val = {"PrEPCoverage": [0.1, 0.2, 0.3], "PrEPDuration": [24, 36, 48]} # replace values in b and c only for var1 in replace_val["PrEPCoverage"]: # get b and c run data run_b = deepcopy(cepac_in["B"]) run_c = deepcopy(cepac_in["C"]) # replce coverage run_b = find_and_replace(run_b, "PrEPCoverage", var1) run_c = find_and_replace(run_c, "PrEPCoverage", var1) # create directory
prep_cov_t = [6, 60] sample_float = sp.random.uniform(prep_cov_t[0], prep_cov_t[1], size=sample_mod) sample['PrEPDuration'] = sample_float # sampling PrEP coverage shape prep_cov_s = [0.1, 5] sample_float = sp.random.uniform(prep_cov_s[0], prep_cov_s[1], size=sample_mod) / 10 sample['PrEPShape'] = sample_float # we'll replace values in cepac input sheet one by one path = { "Input": r"C:\Users\Vijeta\Documents\Projects\Brazil PrEP\NN\RNN experiment" } cepac_in_dict = link.import_all_cepac_in_files(path["Input"]) # list of variables to replace in cepac in file var_list = [ 'PrEPCoverage', 'PrEPDuration', 'PrEPShape', 'HIVIncidReductionCoefficient', 'HIVmthIncidMale', 'DynamicTransmissionNumHIVPosHRG', 'DynamicTransmissionNumHIVNegHRG', 'TransmissionRiskMultiplier_T3' ] # required vars efficacy = 0.65 adherence = 0.739 stop_time = 60
BATCH_SIZE = pd.DataFrame(data_rnn[batch]['CEPAC_input']).shape[0] START_INDEX, END_INDEX = int(batch) * 1000, int(batch) * 1000 + BATCH_SIZE # check if we have cepac input tensor and collect if not 'CEPAC_input' in data_rnn[batch]: # get list of variables and dictionary to store values samples, sample_bounds, var_list, parameters = ipar.get_samples(1) # cepac_batch_input = pd.DataFrame(-10, index=np.arange(len( data_cepac[batch])), columns=var_list) # read all cepac .in files CEPAC_input_files = link.import_all_cepac_in_files( os.path.join(PATH_RAW, batch, 'Files for CEPAC')) # save location of each variable var_loc = {} for file in CEPAC_input_files: for var in var_list: if not var in var_loc: try: loc = t_op.search_var(var, CEPAC_input_files[file]) except: loc = {} # store location of the variable var_loc[var] = loc break # store value of each sample