def analyze_final_output(path_inv, path_sq, HORIZON=60, city=None):

    # extract cepac out
    cepac_out = link.import_all_cepac_out_files(path_inv, module='regression')

    # read SQ out
    cepac_out['SQ'] = (link.import_all_cepac_out_files(
        path_sq, module='regression'))['SQ']

    # create a plotting friendly df
    plot_dict = create_plot_df(cepac_out, HORIZON, city)

    # plot results and save images

    # lineplot
    folder_name = 'Line plots for CEPAC output_' + str(HORIZON)
    save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    #save_line_plots(plot_dict, save_path)

    # heatmap
    folder_name = 'Heatmaps for CEPAC output_' + str(HORIZON)
    save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    save_heatmaps(plot_dict, save_path,
                  'Percentage reduction in incidence rate')
    save_heatmaps(plot_dict, save_path, 'Infections averted')
    save_heatmaps(plot_dict, save_path, 'Infections averted (%)')
    save_heatmaps(plot_dict, save_path, 'Transmissions averted')

    # scatter
    folder_name = 'Scatter plots for CEPAC output_' + str(HORIZON)
    save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    #save_scatter_plots(plot_dict['percentage reduction'], save_path)

    # save results
    folder_name = 'Excel files for results_' + str(HORIZON)
    save_path = os.path.join(os.path.join(path_inv, '..', '..'), folder_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    plot_dict['percentage reduction'].to_csv(
        os.path.join(save_path, 'outcomes.csv'))

    return
def select_output_subset(path):

    # get number of directories in the current folder
    folder_list = os.listdir(path)

    # define dictionary to collect all the subsetted data
    data_dict = {}
    float_dict = {}

    # iterate over every folder
    for folder in folder_list:
        new_pairs = link.import_all_cepac_out_files(os.path.join(
            path, folder, "results"),
                                                    module="regression")
        new_pairs.pop("popstats")
        float_dict.update(new_pairs)

    # convert keys into int type
    for k in float_dict:
        data_dict[int(k)] = float_dict[k]

    return data_dict
    def __init__(self,
                 path,
                 sensitivity_module=False,
                 transmission_module=False):
        '''
        # according to the active module, set path
        path = os.path.join(path, r'Input files')
        # next folder depends on module
        if transmission_module:
            path = os.path.join(path, r'Runs prepared for calculating percentage decline in incidence')
        elif sensitivity_module:
            path = os.path.join(path, r'Runs prepared for sensitivity analysis')
        '''
        # get list of directories in current path
        folder_list = os.listdir(path)
        d = {}
        f_idx = -1
        for folder in folder_list:
            f_idx += 1
            # set path
            float_path = os.path.join(path, folder_list[f_idx], 'results')
            # import all the data
            which_module = sensitivity_module * [
                'sensitivity_module'
            ] + transmission_module * ['regression']
            d[folder] = self.get_structured_data(
                link.import_all_cepac_out_files(float_path, which_module[0]),
                sensitivity_module, transmission_module)

        # point the d dictionary to the relevant attribute
        self.output_data = d

        # if the transmission module is on, we'll need to read input files too
        if transmission_module:
            print('chan')

        return
예제 #4
0
def get_in_out_data(path):

    cepac_out, cepac_in = {}, {}

    # import required input and output
    for k in path:
        cepac_out[k] = link.import_all_cepac_out_files(os.path.join(
            path[k], r'results'),
                                                       module='regression')
        cepac_in[k] = link.import_all_cepac_in_files(path[k])
# =============================================================================
#     cepac_out['Positive coverage (15%)'] = link.import_all_cepac_out_files(os.path.join(path['positive coverage'], r'results'), module = 'regression')
#     cepac_out['Zero coverage'] = link.import_all_cepac_out_files(os.path.join(path['zero coverage'], r'results'), module = 'regression')
#     cepac_in['Positive coverage (15%)'] = link.import_all_cepac_in_files(path['positive coverage'])
#     cepac_in['Zero coverage'] = link.import_all_cepac_in_files(path['zero coverage'])
# =============================================================================
# get transmission mutiplier from input file
    var_name = ["DynamicTransmissionNumTransmissionsHRG"]
    for sce in cepac_in:
        for run in cepac_in[sce]:
            cepac_in[sce][run] = link.get_subset_of_in_file(
                cepac_in[sce][run], var_name)

    return cepac_in, cepac_out
예제 #5
0
 # collect SQ output
 if multisq:
     try:
         c_op.collect_output(os.path.join(sqpath, 'SQ_'+var))
     except:
         pass
 
 # read output
 # SQ
 if multisq:
     try:
         c_op.collect_output(os.path.join(sqpath, 'SQ_'+var, 'results'))
     except:
         pass
     
     cepac_outputs['status quo'] = link.import_all_cepac_out_files(os.path.join(sqpath, 'SQ_'+var, 'results'), module = 'regression')
 else:
     cepac_outputs['status quo'] = link.import_all_cepac_out_files(os.path.join(sqpath, 'SQ', 'results'), module = 'regression')['SQ']
     
 
 # INT
 cepac_outputs[var] = link.import_all_cepac_out_files(os.path.join(readpaths[var], 'results'), module = 'regression')
 
 # send to community benefit function 
 for file in cepac_outputs[var]:
     # calculate community benefit
     if multisq:
         per_red, coeff = community_benefit(cepac_outputs['status quo']['SQ_'+file], cepac_outputs[var][file], HORIZON)
     else:
         per_red, coeff = community_benefit(cepac_outputs['status quo'], cepac_outputs[var][file], HORIZON)
     coeff = max(coeff, 10**-8)
예제 #6
0
def write_final_runs(value_grid, path_dict, STOP_TIME = 60):
    
    # aux function for creating final run files
    def get_reduction_coeff(percentage_red, stop_time):
        # reduction coefficient will be as follows
        if ((0.01*percentage_red) < 1) and (percentage_red > 0):
            red_coeff = -1*stop_time/(np.log(1 - (0.01*percentage_red)))
        elif (percentage_red <= 0) or (percentage_red <= 0.0):
            red_coeff = 10000
        else:
            # this says percentage reduction is >= 100%
            red_coeff = -1*stop_time/np.log(10**-100)
        
        return red_coeff
    
    # check if both inputs are dictionaries
    if not (isinstance(value_grid, dict) or isinstance(path_dict, dict)):
        raise TypeError("Input needs to be in dictionary format.")
        return
    #
    val_to_replace = value_grid
    del value_grid
    
    # import out files
    #out_path = os.path.join(path_dict['output']['intervention'], r'results')
    cepac_out = {}
    for k in path_dict['output']:
        cepac_out[k] = link.import_all_cepac_out_files(os.path.join(path_dict['output'][k], r'results'), module = "regression")
    
    # set stop time and PrEP duration
    #SQ_DF = link.import_all_cepac_in_files(path_dict['output']['status quo'])['SQ']
    #STOP_TIME = int(t_op.read_values('HIVIncidReductionStopTime', SQ_DF))
    #del SQ_DF
    
    # create sets of runs ABC
    out = {}
    name_map = {}
    for k in cepac_out['intervention']:
        if k == 'popstats':
            continue
        key_name = aux.get_digit_for_file_name(k, val_to_replace)
        if not key_name in out.keys():
            out[key_name] = {}
        name_map[key_name] = k
        if "RunB" in k:
            out[key_name]['B'] = cepac_out['intervention'][k]
        elif "RunC" in k:
            out[key_name]['C'] = cepac_out['intervention'][k]
        else:
            continue
    
    # TODO: following value will change according to city, need to take care of this
    inp = {}
    for k in out:
        out[k]['A'] = cepac_out['status quo']['SQ']#['A']
        inp[k] = {'PrEPCoverage': 0, 'prep_efficacy': 0.96, 'CohortSize': 10000000,
           'PrEPDuration': 0, 'HIVmthIncidMale': 0.00357692085607886, 'prep_usage_at_initialization': 'n'}
    
    row_idx = 0
    percentage_decline = []
    for i in val_to_replace['PrEPCoverage']:
        for j in val_to_replace['PrEPDuration']:
            if i == 0.0:
                j_idx = np.where(val_to_replace['PrEPDuration'] == j)[0][0]
                out[j_idx] = {}
                out[j_idx]["A"] = cepac_out["status quo"]["SQ"]
                out[j_idx]["B"] = cepac_out["status quo"]["SQ"]
                out[j_idx]["C"] = cepac_out["status quo"]["SQ"]
                inp[j_idx] = inp[1]
            inp[row_idx]['PrEPCoverage'] = i
            inp[row_idx]['PrEPDuration'] = STOP_TIME #60 #600
            percentage_decline.append(tx_algo.get_percentage_decline(out[row_idx], inp[row_idx]))
            row_idx += 1
    
    # plot results
    plot_heatmap(val_to_replace, percentage_decline, path_dict)
    
    # write results to excel file
    export_abc_out_to_excel(cepac_out, out, val_to_replace, path_dict)
        
    # create directory to write the final run files
    if not os.path.exists(path_dict['output']['final runs']):
        os.makedirs(path_dict['output']['final runs'])
    final_path = path_dict['output']['final runs']
    
    # read one .in file (B, as that is intervention file) and alter following values
    # 1. reduction coefficient
    # 2. disable dynamic transmission module
    
    # import base file for B
    base_int = link.import_all_cepac_in_files(path_dict['input'])
    base_int = base_int['B']
    
    # first we'll find the indices of all the required variables
    idx = {}
    var_list = ["UseHIVIncidReduction", "HIVIncidReductionStopTime", "HIVIncidReductionCoefficient", 
                "UseDynamicTransmission", "PrEPCoverage", "PrEPDuration"]
    for k in var_list:
        idx[k] = base_int.loc[base_int.loc[:, 0] == k, :].index.values
    
    # replace required values
    for run in name_map:
        # deepcopy
        float_int = deepcopy(base_int)
        
        # following value of stop time might not be correct
        float_int.loc[idx["HIVIncidReductionStopTime"], 1] = STOP_TIME #480#120
        coeff = get_reduction_coeff(percentage_decline[run], float_int.loc[idx["HIVIncidReductionStopTime"], 1].values[0])
        if coeff <= 0:
            # disable incidence reduction 
            float_int.loc[idx["UseHIVIncidReduction"], 1] = 0
            #float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff
        else:
            # enable incidence reduction 
            float_int.loc[idx["UseHIVIncidReduction"], 1] = 1
            float_int.loc[idx["HIVIncidReductionCoefficient"], 1] = coeff
        
        # disable dynamic transmission module
        # TODO: we need to take care that the values for risk multiplier are correct
        float_int.loc[idx["UseDynamicTransmission"], 1] = 1
        
        # replace coverage and coverage time
        float_int.loc[idx["PrEPCoverage"], 1:2] = 0.01*(aux.get_coverage_level_from_file_name(name_map[run]))
        float_int.loc[idx["PrEPDuration"], 1:2] = aux.get_coverage_time_from_file_name(name_map[run])
        
        # write the file
        save_path = os.path.join(final_path, name_map[run].split('_')[1]) + '.in'
        link.write_cepac_in_file(save_path, float_int)
예제 #7
0
def create_target_tensor(path, device, SEQ_LEN=60):

    # path: result folder of the set of runs for which we want to create the target tensor

    # import all results
    cepac_outputs = link.import_all_cepac_out_files(path, module='regression')
    targets, targets_std = {}, {}

    # import output distribtion parameters
    para = pd.read_csv(
        r'/Users/vijetadeshpande/Documents/GitHub/meta-environment/Data and results/CEPAC RUNS/regression model input/output_mean_and_sd.csv'
    ).set_index('Unnamed: 0')

    # iterate over each file
    for file in cepac_outputs:
        # skip run_c
        if 'RunC' in file:
            continue

        # get coverage and coverage time, for setting it as key of dict
        try:
            coverage, cov_time = int(file.split('=')[1][:2]), int(
                file.split('=')[2])
        except:
            if 'SQ' in file:
                coverage, cov_time = 0, 0
            else:
                coverage, cov_time = 'NA', 'NA'

        # remove the multiplier array
        try:
            cepac_outputs[file].pop('multiplier')
        except:
            pass

        #
        for feature in cepac_outputs[file]:
            cepac_outputs[file][feature] = cepac_outputs[file][
                feature].to_list()[:SEQ_LEN]

        # store as matrix
        targets[(coverage,
                 cov_time)] = pd.DataFrame(0,
                                           index=np.arange(SEQ_LEN + 1),
                                           columns=cepac_outputs[file].keys())
        targets[(coverage, cov_time)].iloc[1:, :] = pd.DataFrame(
            cepac_outputs[file]).values

        # standardize targets
        targets_std[(coverage, cov_time)] = pd.DataFrame(
            0,
            index=targets[(coverage, cov_time)].index,
            columns=targets[(coverage, cov_time)].columns)
        for feature in cepac_outputs[file]:
            targets_std[(coverage, cov_time)].loc[1:, feature] = (
                targets[(coverage, cov_time)].loc[1:, feature] -
                para.loc['mean', feature]) / para.loc['sd', feature]

        # convert to numpy array
        targets[(coverage, cov_time)] = torch.tensor(
            np.array(targets[(coverage, cov_time)])).float().to(device)
        targets_std[(coverage, cov_time)] = torch.tensor(
            np.array(targets_std[(coverage, cov_time)])).float().to(device)

    return targets, targets_std
    basepath = os.path.join(base, strategy)
    sqpath = os.path.join(base, 'Common runs', 'SQ', 'results')
    readbase = os.path.join(
        basepath, 'Final runs',
        'Final runs_var1=PrEPAdherence_var2=PrEPDroputPostThreshold',
        'results')
    writebase = os.path.join(base, 'Results')

    # collect output
    try:
        c_op.collect_output(os.path.join(readbase, '..'))
    except:
        pass

    # import all cepac out files
    cepac_outputs = link.import_all_cepac_out_files(readbase,
                                                    module='regression')
    cepac_outputs['status quo'] = link.import_all_cepac_out_files(
        sqpath, module='regression')['SQ']

    #
    idx = -1
    for file in cepac_outputs:
        if file == 'status quo':
            continue

        #
        idx += 1

        # calculate averted infections
        averted_inf = (cepac_outputs['status quo']['infections'] -
                       cepac_outputs[file]['infections'])[0:HORIZON].sum()
예제 #9
0
cepac_out = {}
percentage_decline = {}
for p_coverage in dir_list["coverage"]:
    # create dictionary
    cepac_out[p_coverage] = {}
    percentage_decline[p_coverage] = {}
    
    # get all the subfolders
    dir_list["coverage time"] = os.listdir(os.path.join(path["output"], p_coverage))
    
    for p_coverage_time in dir_list["coverage time"]:
        p_res = os.path.join(path["output"], p_coverage, p_coverage_time, "results")
        
        # extract results
        cepac_out[p_coverage][p_coverage_time] = link.import_all_cepac_out_files(p_res, module = "regression")
        
        # calculate percentage decline
        percentage_decline[p_coverage][p_coverage_time], _ = dtm.get_community_benefit(cepac_out[p_coverage][p_coverage_time], os.path.join(p_res, ".."))

#%% create a set of final runs
for p_c in percentage_decline:
    for var in percentage_decline[p_c]:
        cepac_in = link.import_all_cepac_in_files(os.path.join(path["output"], p_c, var))
        float_in = deepcopy(cepac_in)
        # adjustments
        for run in cepac_in:
            if 'RunC' in run or 'C' in run:
                float_in.pop(run)
            elif 'RunA' in run or 'A' in run:
                float_in['beasecase'] = float_in.pop(run)
예제 #10
0
    #
    if os.path.exists(os.path.join(path_rnn)):
        # load data
        data_rnn[batch] = h_fun1.load_all_json(path_rnn)

        # check if we have CEPAC output
        #if not 'rnn_target' in data_rnn:

        if not 'CEPAC_output' in data_rnn[batch]:

            # collect the output which was parallelized for the cluster
            if os.path.exists(os.path.join(path_cepac, str(0))):
                c_op.collect_output(path_cepac)

            # import raw data
            data_cepac[batch] = link.import_all_cepac_out_files(
                os.path.join(path_cepac, 'results'), module='regression')

            for example in data_cepac[batch]:
                data_cepac[batch][example].pop('multiplier')
                for feature in data_cepac[batch][example]:
                    data_cepac[batch][example][feature] = data_cepac[batch][
                        example][feature].to_list()[:SEQ_LEN]

            # save raw data to json file
            filename = os.path.join(path_rnn, r'CEPAC_output.json')
            h_fun1.dump_json(data_cepac[batch], filename)

        else:
            data_cepac[batch] = data_rnn[batch].pop('CEPAC_output')

# coverting keys from str to int