def compare_trajectories(csv_file_1, csv_file_2, legends, figure_location): # clean the directory IO.delete_files('.png', figure_location) df1 = TrajsDataFrame(csv_file_1) df2 = TrajsDataFrame(csv_file_2) # for all trajectories i = 0 for key, trajs_one_outcome in df1.allTrajs.items(): fig, ax = plt.subplots(figsize=(5, 5)) # figure size ax.set(title=key) # title and labels # first (base) for traj in trajs_one_outcome.trajs: ax.plot(traj.times, traj.obss, 'g', alpha=1, zorder=2) # second (intervention) for traj in df2.allTrajs[key].trajs: ax.plot(traj.times, traj.obss, 'b', alpha=1, zorder=1) plt.legend(legends) #[::-1] file_name = figure_location + '/' + str(i) + ' ' + key output_figure(plt, file_name) i += 1
def print_trace(self, filename, directory='Trace', delete_existing_files=True): """ print the trace messages into a text file with the specified filename. It creates a sub directory where the python script is located. :param filename: filename of the text file where trace message should be exported to :param directory: directory (relative to the current root) where the trace files should be located :param delete_existing_files: set to True to delete the existing trace files in the directory """ if not self._on: return # create the directory if does not exist if not os.path.exists(directory): os.makedirs(directory) # delete existing files if delete_existing_files: io.delete_files(extension='.txt', path=os.getcwd() + '/' + directory) # create a new file filename = os.path.join(directory, filename) # open the file with a write access file = open(filename, 'w') # write the trace messages for message in self._messages: file.write('%s\n' % message) # close the file file.close()
def clear_txt_files(path='..'): """ removes every .txt files inside the directory :param path: path (relative to the current root) where the .txt files are located (the folder should already exist) """ io.delete_files('.txt', path)
def __init__(self, csv_file_name, wtp_range, csv_file_name_proj_thresholds='ProjectedOptimalThreshold.csv'): self.cols = io.read_csv_cols( file_name=csv_file_name, n_cols=3, if_ignore_first_row=True, if_convert_float=True) self.wtps = self.cols[0] self.OnTs = self.cols[1] self.OffTs = self.cols[2] self.RegToOn = Reg.ExpRegression(x=self.wtps, y=self.OnTs, if_c0_zero=True) print(self.RegToOn.get_coeffs()) self.RegToOff = Reg.ExpRegression(x=self.wtps, y=self.OffTs, if_c0_zero=True) print(self.RegToOff.get_coeffs()) wtps = np.linspace(wtp_range[0], wtp_range[1], 13) to_on_ts = self.RegToOn.get_predicted_y(wtps) to_off_ts = self.RegToOff.get_predicted_y(wtps) rows = [] for i in range(len(wtps)): row = [wtps[i], to_on_ts[i], to_off_ts[i]] rows.append(row) io.write_csv(rows=rows, file_name=csv_file_name_proj_thresholds)
def sample_posterior(self, n_samples): """ sample the posterior distribution of the mortality probability, :param n_samples: number of samples from the posterior distribution """ # specifying the seed of the numpy random number generator np.random.seed(1) # cohort ids self.cohortIDs = range(n_samples) # find values of mortality probability at which the posterior should be evaluated self.mortalitySamples = np.random.uniform(low=Sets.PRIOR_L, high=Sets.PRIOR_U, size=Sets.PRIOR_N) # create a multi cohort multi_cohort = SurvivalCls.MultiCohort( ids=self.cohortIDs, mortality_probs=self.mortalitySamples, pop_sizes=[Sets.SIM_POP_SIZE] * Sets.PRIOR_N) # simulate the multi cohort multi_cohort.simulate(n_time_steps=Sets.TIME_STEPS) # calculate the likelihood of each simulated cohort weights = [] for cohort_id in self.cohortIDs: # get the average survival time for this cohort mean = multi_cohort.multiCohortOutcomes.meanSurvivalTimes[ cohort_id] # construct a normal likelihood # with mean calculated from the simulated data and standard deviation from the clinical study. # evaluate this pdf (probability density function) at the mean reported in the clinical study. weight = stat.norm.pdf(x=Sets.OBS_MEAN, loc=mean, scale=Sets.OBS_STDEV) # store the weight weights.append(weight) # normalize the likelihood weights sum_weights = sum(weights) self.normalizedWeights = np.divide(weights, sum_weights) # produce the list to report the results csv_rows = \ [['Cohort ID', 'Likelihood Weights', 'Mortality Prob']] # list containing the calibration results for i in range(len(self.mortalitySamples)): csv_rows.append([ self.cohortIDs[i], self.normalizedWeights[i], self.mortalitySamples[i] ]) # write the calibration result into a csv file IO.write_csv(file_name='CalibrationResults.csv', rows=csv_rows)
def export_to_csv(self, file_name='PartialRank.csv', decimal=3): formatedResults = [['Parameter', 'Coefficient', 'P-Value']] for row in self.results: name = row[0] coef = F.format_number(number=row[1], deci=decimal) p = F.format_number(number=row[2], deci=decimal) formatedResults.append([name, coef, p]) IO.write_csv(file_name=file_name, rows=formatedResults)
def plot_histograms(self, par_names=None, csv_file_name_prior=None, posterior_fig_loc='figures_national'): """ creates histograms of parameters specified by ids :param par_names: (list) of parameter names to plot :param csv_file_name_prior: (string) filename where parameter prior ranges are located :param posterior_fig_loc: (string) location where posterior figures_national should be located """ raise ValueError('Needs to be debugged.') # clean the directory IO.delete_files('.png', posterior_fig_loc) # read prior distributions dict_of_priors = None if csv_file_name_prior is not None: dict_of_priors = self.get_dict_of_priors(prior_info_csv_file=csv_file_name_prior) if par_names is None: par_names = self.get_all_parameter_names() # for all parameters, read sampled parameter values and create the histogram for par_name in par_names: # get values for this parameter par_values = self.dictOfParamValues[par_name] # get info of this parameter title, multiplier, x_range = self.get_title_multiplier_x_range_decimal_format( par_name=par_name, dict_of_priors=dict_of_priors) # adjust parameter values par_values = [v*multiplier for v in par_values] # find the filename the histogram should be saved as file_name = posterior_fig_loc + '/Par-' + par_name + ' ' + F.proper_file_name(par_name) # plot histogram Fig.plot_histogram( data=par_values, title=title.replace('!', '\n'), x_range=x_range, figure_size=HISTOGRAM_FIG_SIZE, file_name=file_name )
def __init__(self, csv_file_name): """ :param csv_file_name: csv file where the parameter samples are located assumes that the first row of this csv file contains the parameter names to be used as the keys of the dictionary of parameters it creates """ # create a dictionary of parameter samples self.dictOfParams = IO.read_csv_cols_to_dictionary( csv_file_name, ',', True)
def export_means_and_intervals(self, poster_file='ParameterEstimates.csv', significance_level=0.05, sig_digits=3, param_names=None, prior_info_csv_file=None): """ calculate the mean and credible intervals of parameters specified by ids :param poster_file: csv file where the posterior ranges should be stored :param significance_level: :param sig_digits: number of significant digits :param param_names: (list) of parameter names :param prior_info_csv_file: (string) filename where parameter prior ranges are located :return: """ results = self.get_means_and_intervals(significance_level=significance_level, sig_digits=sig_digits, param_names=param_names, prior_info_csv_file=prior_info_csv_file) # write parameter estimates and credible intervals IO.write_csv(rows=results, file_name=poster_file)
def export_to_csv(self, corrs='r', file_name='Correlations.csv', decimal=3, delimiter=','): """ formats the correlation coefficients and p-value to the specified decimal point and exports to a csv file :param corrs: (string) or (list of strings) from 'r' for Pearson's, 'rho' for Spearman's rank correlation, 'p' for partial correlation, and 'pr' for partial rank correlation :param file_name: file name :param decimal: decimal points to round the estimates to :param delimiter: to separate by comma, use ',' and by tab, use '\t' """ if not isinstance(corrs, list): corrs = [corrs] # make the header tile_row = ['Parameter'] for corr in corrs: tile_row.append(self._full_name(corr=corr)) tile_row.append('P-value') # add the header formatted_results = [tile_row] # add the names of parameters (first column) for name in self.dicParameterValues: formatted_results.append([name]) # calculate all forms of correlation requested for corr in corrs: results, text = self._get_results_text(corr=corr) i = 1 for par, values in results.items(): coef = F.format_number(number=values[0], deci=decimal) p = F.format_number(number=values[1], deci=decimal) formatted_results[i].extend([coef, p]) i += 1 IO.write_csv(file_name=file_name, rows=formatted_results, delimiter=delimiter)
def get_dict_of_priors(prior_info_csv_file): dict_priors = {} rows_priors = IO.read_csv_rows( file_name=prior_info_csv_file, if_ignore_first_row=True, delimiter=',', if_convert_float=True ) for row in rows_priors: dict_priors[row[ColumnsPriorDistCSV.NAME.value]] = row return dict_priors
def __init__(self, csvfile_param_values=None, columns_to_be_deleted=()): """ :param csvfile_param_values: (string) csv file where the parameter values are located assumes that the first row of this csv file contains the parameter names and each column contains the parameter values :param columns_to_be_deleted: (list of string) list of column names to be deleted from analysis """ # dictionary of parameter samples with parameter names as keys self.dictOfParamValues = {} if csvfile_param_values is not None: self.dictOfParamValues = IO.read_csv_cols_to_dictionary(file_name=csvfile_param_values, if_convert_float=True) for name in columns_to_be_deleted: del(self.dictOfParamValues[name])
def __init__(self, csv_file_name): """ extracts seeds, mortality probabilities and the associated likelihood from the csv file where the calibration results are stored :param csv_file_name: name of the csv file where the calibrated results are stored :param drug_effectiveness_ratio: effectiveness of the drug """ # read the columns of the csv files containing the calibration results cols = InOutSupport.read_csv_cols(file_name=csv_file_name, n_cols=3, if_ignore_first_row=True, if_convert_float=True) # store likelihood weights, cohort IDs and sampled mortality probabilities self.cohortIDs = cols[CalibrationColIndex.ID.value].astype(int) self.weights = cols[CalibrationColIndex.W.value] self.mortalityProbs = cols[CalibrationColIndex.MORT_PROB.value] self.multiCohorts = None # multi-cohort
from apace import VisOptimClasses as Vis import SimPy.InOutFunctions as IO # delete existing figures_national IO.delete_files('.png', '../tests/VisualizeOptimization/optimization_figures/') Vis.plot_all_opt_itrs( csv_directory='../tests/VisualizeOptimization/optimization_csvfiles/', save_plots_directory='../tests/VisualizeOptimization/optimization_figures/', #f_range=[0, 5*10e7], #x_ranges=[[0, 0.3], [0, 0.2]], #y_axis_labels=[r'$f(\tau, \theta)$', r'$\tau$', r'$\theta$'] )
import SimPy.InOutFunctions as IO from covid19 import Support as Support ICU_CAPACITY = 0.89 / 10000 * 2 MAX_I = 0.1 # MAX = 30 * ICU_CAPACITY # MIN = 0 MAX = MAX_I / 5 MIN = 0.0005 N = 20 IO.write_csv(rows=Support.generate_triangular_scenarios(MIN, MAX, N), file_name='../csv_files/ICUPolicies.csv')
import SimPy.InOutFunctions as IO import SimPy.Plots.Histogram as Hist import SimPy.Plots.ProbDist as Plot import SimPy.RandomVariateGenerators as RVGs import SimPy.Statistics as Stat # read weekly number of drinks cols = IO.read_csv_cols(file_name='dataNumOfDrinks.csv', n_cols=1, if_ignore_first_row=True, if_convert_float=True) # make a histogram Hist.plot_histogram(data=cols[0], title='Weekly Number of Drinks', bin_width=1) # mean and standard deviation stat = Stat.SummaryStat(name='Weekly number of drinks', data=cols[0]) print('Mean = ', stat.get_mean()) print('StDev = ', stat.get_stdev()) # fit a Poisson distribution fit_results = RVGs.Poisson.fit_ml(data=cols[0]) print('Fitting a Poisson distribution:', fit_results) # plot the fitted Poisson distribution Plot.plot_poisson_fit(data=cols[0], fit_results=fit_results, x_label='Weekly number of drinks', x_range=(0, 40), bin_width=1)
import SimPy.InOutFunctions as IO from apace import ScenariosClasses as Sce scenario_keys = [ 'Base', '75% PTFU | No >1 FU | Drop % | No IPT', '75% PTFU | With >1 FU | Drop 15% | No IPT', '75% PTFU | No >1 FU | Drop % | With IPT', '75% PTFU | With >1 FU | Drop 15% | With IPT' ] # data frame for scenario analysis scenario_df = Sce.ScenarioDataFrame('csv_files\TBScenarios.csv') # read parameter samples parameter_values = IO.read_csv_cols_to_dictionary( file_name='csv_files/SampledParams.csv', delimiter=',', if_convert_float=True) # create a dictionaries of DALYs and cost dict_DALY = {} dict_cost = {} for key in scenario_keys: dict_DALY[key] = scenario_df.scenarios[key].outcomes['DALY'] dict_cost[key] = scenario_df.scenarios[key].outcomes['Total Cost'] # create dictionaries for dDALYS and dCost dict_dDALY = {} dict_dCost = {} for key in scenario_keys: if key != 'Base': dict_dDALY[key] = scenario_df.scenarios['Base'].outcomes['DALY'] \
import SimPy.InOutFunctions as IO from covid19 import Support as Support R_On = [1, 3] R_Off = [0.3, 1] I = [100, 500] N = 3 RPolicies = Support.generate_square_policies(R_On, R_Off, N) IPolicies = Support.generate_square_policies(I, I, N) RIPolicies = [] for R in RPolicies: for I in IPolicies: RI = R.copy() RI.extend(I) RIPolicies.append(RI) IO.write_csv(rows=RIPolicies, file_name='../csv_files/ThresholdsRtI.csv')
import SimPy.Plots.EffectiveSampleSize as P import SimPy.InOutFunctions as IO # read likelihoods data = IO.read_csv_cols(file_name='csv_files/TBLikelihoods.csv', n_cols=1, if_ignore_first_row=True, if_convert_float=True) P.plot_eff_sample_size(likelihood_weights=data[0], if_randomize=True, y_range=(0, 10), file_name='results/EffSampleSize.png')
def write_to_csv(self, file_name, directory): io.write_csv(rows=self.wtpAndThresholds, file_name=file_name, directory=directory)
from SimPy import InOutFunctions as io # --------------- # first run the TestCSVWritter.py to produce the csv file # --------------- # test reading by rows rows = io.read_csv_rows('CSVFolder/myCSV.csv', if_ignore_first_row=True, if_convert_float=True) print('Testing reading by rows:') for row in rows: print(sum(row[1:])) # test reading by columns cols = io.read_csv_cols('CSVFolder/myCSV.csv', n_cols=4, if_ignore_first_row=True, if_convert_float=True) print('Testing reading by columns:') for j in range(1, 4): print(sum(cols[j])) # rest reading by columns into a dictionary dict_cols = io.read_csv_cols_to_dictionary('CSVFolder/myCSV.csv', if_convert_float=True) print('Testing reading by columns into a dictionary:') print(dict_cols)
def resample_param_values(self, csvfile_param_values_and_weights, n, weight_col, csvfile_resampled_params, sample_by_weight=True, columns_to_be_deleted=(), seed=0): """ :param csvfile_param_values_and_weights: (string) csv file where the values of parameters along with their weights are provided. It assumes that 1) the first row contains the name of all parameters 2) rows contains the weight and the parameter values 3) rows are in decreasing order of parameter weights :param n: (int) number of parameter values to resamples :param weight_col: (int) index of the columns where the weights of parameter values are located. :param csvfile_resampled_params: (string) csvfile where the resampled parameter values will be stored. The first row will be the names of parameters. :param sample_by_weight: (bool) set to true to sample parameters by weight. If set to False, the first n parameters will be selected. :param columns_to_be_deleted: (list of string) list of column names to be deleted from analysis :param seed: (int) seed of the random number generator to resample parameters """ # read parameter weights and values rows_of_weights_and_parameter_values = IO.read_csv_rows( file_name=csvfile_param_values_and_weights, if_ignore_first_row=False, if_convert_float=True) # store header rows_of_selected_param_values = [rows_of_weights_and_parameter_values[0]] if not sample_by_weight: # choose the first n parameter values for i, row in enumerate(rows_of_weights_and_parameter_values): if i > n: # first rwo is the header break elif i > 0 and row[weight_col] > 0: rows_of_selected_param_values.append(row) else: # weight weights = [] for row in rows_of_weights_and_parameter_values: weights.append(row[weight_col]) del(weights[0]) # header # sample rows rng = np.random.RandomState(seed=seed) sampled_indices = rng.choice(a=range(len(weights)), size=n, p=weights) # build sampled rows for i in sampled_indices: rows_of_selected_param_values.append(rows_of_weights_and_parameter_values[i+1]) IO.write_csv(rows=rows_of_selected_param_values, file_name=csvfile_resampled_params) self.dictOfParamValues = IO.read_csv_cols_to_dictionary(file_name=csvfile_resampled_params, if_convert_float=True) # check if parameters have values for key, values in self.dictOfParamValues.items(): if len(values) == 0: raise ValueError('Parameter values are not provided in '+csvfile_resampled_params+'.') for name in columns_to_be_deleted: del (self.dictOfParamValues[name])
from SimPy import InOutFunctions as OutSupport myList = [['Col1', 'Col2', 'Col3']] for i in range(1, 10): myList.append([i, 2*i, 3*i]) OutSupport.write_csv('myCSV', myList)
from apace import VisOptimClasses as Vis import SimPy.InOutFunctions as IO # delete existing figures_national IO.delete_files('.png', 'figures/optimization_figs/') Vis.plot_all_opt_itrs( csv_directory='csv_files/optimization_csvs/', n_vars=2, save_plots_directory='figures/optimization_figs/', show_titles=False, f_range=[-7500, -5000], #var_ranges=[[0, 5], [-1, 0], [0, 0.5], [0, 1000], [-1, 0], [0, 1]], y_axis_labels=[ r'$f$', r'$\tau_1$', r'$\tau_2$', r'$R: \rho$', r'$\%I: \tau_0$', r'$\%I: \tau_1$', r'$\%I: rho$' ], # , r'$\rho_1$' window=50)
def export_results(self, directory): rows = [[self.seed, self.sum]] name = 'Seed ' + str(self.seed) + ".csv" IO.write_csv(rows=rows, file_name=name, directory=directory)
from SimPy import InOutFunctions as OutSupport myList = [['Col1', 'Col2', 'Col3', 'Col4']] #myList.append(['s1', 's2', 's3']) #myList.append(['A', '-100', '1,000']) for i in range(1, 10): myList.append(['row'+str(i), i, 2*i, 3*i]) OutSupport.write_csv(file_name ='myCSV.csv', rows=myList, directory='CSVFolder')
import SimPy.InOutFunctions as IO from covid19 import Support as Support Incidence = [1500, 14000] DeltaIncidence = [50, 3000] N = 10 IO.write_csv(rows=Support.generate_square_policies(Incidence, DeltaIncidence, N), file_name='../csv_files/IncidencePolicies.csv')
from SimPy import InOutFunctions as InOutSupport # test reading by rows rows = InOutSupport.read_csv_rows('myCSV', if_del_first_row=True, if_convert_float=True) print('Testing reading by rows:') for row in rows: print(sum(row)) # test reading by columns cols = InOutSupport.read_csv_cols('myCSV', n_cols=3, if_ignore_first_row=True, if_convert_float=True) print('Testing reading by columns:') for j in range(0, 3): print(sum(cols[j])) # rest reading by columns into a dictionary dict_cols = InOutSupport.read_csv_cols_to_dictionary('myCSV', if_convert_float=True) print('Testing reading by columns into a dictionary:') print(dict_cols)
def write_to_csv(self, file_name, directory): io.write_csv(rows=self.wtpToffTon, file_name=file_name, directory=directory)
import SimPy.InOutFunctions as IO from covid19 import Support as Support R_On = [1, 3] R_Off = [0.2, 1] N = 10 IO.write_csv(rows=Support.generate_square_policies(R_On, R_Off, N), file_name='../csv_files/ThresholdsRt.csv')