def get_dict_of_priors(prior_info_csv_file):

        dict_priors = {}
        rows_priors = IO.read_csv_rows(
            file_name=prior_info_csv_file,
            if_ignore_first_row=True,
            delimiter=',',
            if_convert_float=True
        )
        for row in rows_priors:
            dict_priors[row[ColumnsPriorDistCSV.NAME.value]] = row

        return dict_priors
Beispiel #2
0
from SimPy import InOutFunctions as InOutSupport

# test reading by rows
rows = InOutSupport.read_csv_rows('myCSV',
                                  if_del_first_row=True,
                                  if_convert_float=True)
print('Testing reading by rows:')
for row in rows:
    print(sum(row))

# test reading by columns
cols = InOutSupport.read_csv_cols('myCSV',
                                  n_cols=3,
                                  if_ignore_first_row=True,
                                  if_convert_float=True)
print('Testing reading by columns:')
for j in range(0, 3):
    print(sum(cols[j]))

# rest reading by columns into a dictionary
dict_cols = InOutSupport.read_csv_cols_to_dictionary('myCSV',
                                                     if_convert_float=True)
print('Testing reading by columns into a dictionary:')
print(dict_cols)
Beispiel #3
0
from SimPy import InOutFunctions as io

# ---------------
# first run the TestCSVWritter.py to produce the csv file
# ---------------

# test reading by rows
rows = io.read_csv_rows('CSVFolder/myCSV.csv',
                        if_ignore_first_row=True,
                        if_convert_float=True)
print('Testing reading by rows:')
for row in rows:
    print(sum(row[1:]))

# test reading by columns
cols = io.read_csv_cols('CSVFolder/myCSV.csv',
                        n_cols=4,
                        if_ignore_first_row=True,
                        if_convert_float=True)
print('Testing reading by columns:')
for j in range(1, 4):
    print(sum(cols[j]))

# rest reading by columns into a dictionary
dict_cols = io.read_csv_cols_to_dictionary('CSVFolder/myCSV.csv',
                                           if_convert_float=True)

print('Testing reading by columns into a dictionary:')
print(dict_cols)
    def resample_param_values(self, csvfile_param_values_and_weights,
                              n, weight_col, csvfile_resampled_params,
                              sample_by_weight=True, columns_to_be_deleted=(), seed=0):
        """
        :param csvfile_param_values_and_weights: (string) csv file where the values of parameters
            along with their weights are provided.
            It assumes that
                1) the first row contains the name of all parameters
                2) rows contains the weight and the parameter values
                3) rows are in decreasing order of parameter weights
        :param n: (int) number of parameter values to resamples
        :param weight_col: (int) index of the columns where the weights of parameter values are located.
        :param csvfile_resampled_params: (string) csvfile where the resampled parameter values will be stored.
            The first row will be the names of parameters.
        :param sample_by_weight: (bool) set to true to sample parameters by weight.
            If set to False, the first n parameters will be selected.
        :param columns_to_be_deleted: (list of string) list of column names to be deleted from analysis
        :param seed: (int) seed of the random number generator to resample parameters
        """

        # read parameter weights and values
        rows_of_weights_and_parameter_values = IO.read_csv_rows(
            file_name=csvfile_param_values_and_weights,
            if_ignore_first_row=False,
            if_convert_float=True)

        # store header
        rows_of_selected_param_values = [rows_of_weights_and_parameter_values[0]]

        if not sample_by_weight:
            # choose the first n parameter values
            for i, row in enumerate(rows_of_weights_and_parameter_values):
                if i > n: # first rwo is the header
                    break
                elif i > 0 and row[weight_col] > 0:
                    rows_of_selected_param_values.append(row)
        else:
            # weight
            weights = []
            for row in rows_of_weights_and_parameter_values:
                weights.append(row[weight_col])
            del(weights[0]) # header

            # sample rows
            rng = np.random.RandomState(seed=seed)
            sampled_indices = rng.choice(a=range(len(weights)), size=n, p=weights)

            # build sampled rows
            for i in sampled_indices:
                rows_of_selected_param_values.append(rows_of_weights_and_parameter_values[i+1])

        IO.write_csv(rows=rows_of_selected_param_values, file_name=csvfile_resampled_params)

        self.dictOfParamValues = IO.read_csv_cols_to_dictionary(file_name=csvfile_resampled_params,
                                                                if_convert_float=True)
        # check if parameters have values
        for key, values in self.dictOfParamValues.items():
            if len(values) == 0:
                raise ValueError('Parameter values are not provided in '+csvfile_resampled_params+'.')

        for name in columns_to_be_deleted:
            del (self.dictOfParamValues[name])
    def plot_histograms(self,
                        ids=None,
                        csv_file_name_prior=None,
                        posterior_fig_loc='figures_national'):
        """ creates histograms of parameters specified by ids
        :param ids: (list) list of parameter ids
        :param csv_file_name_prior: (string) filename where parameter prior ranges are located
        :param posterior_fig_loc: (string) location where posterior figures_national should be located
        """

        # clean the directory
        IO.delete_files('.png', posterior_fig_loc)

        # read prior distributions
        if csv_file_name_prior is not None:
            priors = IO.read_csv_rows(file_name=csv_file_name_prior,
                                      if_ignore_first_row=True,
                                      delimiter=',',
                                      if_convert_float=True)

        # for all parameters, read sampled parameter values and create the histogram
        par_id = 0
        for key, par_values in self.dictOfParams.items():

            # skip these columns
            if key in ['Simulation Replication', 'Random Seed']:
                continue

            # check if the histogram should be created for this parameter
            if_show = False
            if ids is None:
                if_show = True
            elif par_id in ids:
                if_show = True

            # create the histogram
            if if_show:
                # find prior range
                x_range = None
                if priors is not None:
                    try:
                        x_range = [
                            float(priors[par_id][Column.LB.value]),
                            float(priors[par_id][Column.UB.value])
                        ]
                    except:
                        print(
                            'Could not convert string to float to find the prior distribution of parameter:',
                            par_id)
                else:
                    x_range = None

                # find the filename the histogram should be saved as
                file_name = posterior_fig_loc + '\Par-' + str(
                    par_id) + ' ' + F.proper_file_name(key)

                # find title
                if priors[par_id][Column.TITLE.value] in ('', None):
                    title = priors[par_id][Column.NAME.value]
                else:
                    title = priors[par_id][Column.TITLE.value]

                # find multiplier
                if priors[par_id][Column.MULTIPLIER.value] in ('', None):
                    multiplier = 1
                else:
                    multiplier = float(priors[par_id][Column.MULTIPLIER.value])
                x_range = [x * multiplier for x in x_range]
                par_values = [v * multiplier for v in par_values]

                # plot histogram
                Fig.plot_histogram(data=par_values,
                                   title=title.replace('!', '\n'),
                                   x_range=x_range,
                                   figure_size=HISTOGRAM_FIG_SIZE,
                                   file_name=file_name)

            # move to the next parameter
            par_id += 1
    def calculate_means_and_intervals(self,
                                      poster_file='ParameterEstimates.csv',
                                      significance_level=0.05,
                                      deci=3,
                                      ids=None,
                                      csv_file_name_prior=None):
        """ calculate the mean and credible intervals of parameters specified by ids
        :param poster_file: csv file where the posterior ranges should be stored
        :param significance_level:
        :param deci:
        :param ids:
        :param csv_file_name_prior: (string) filename where parameter prior ranges are located
        :return:
        """

        # read prior distributions
        priors = None
        if csv_file_name_prior is not None:
            priors = IO.read_csv_rows(file_name=csv_file_name_prior,
                                      if_ignore_first_row=True,
                                      delimiter=',',
                                      if_convert_float=True)

        results = []  # list of parameter estimates and credible intervals

        par_id = 0
        for key, value in self.dictOfParams.items():

            # skip these columns
            if key in ['Simulation Replication', 'Random Seed']:
                continue

            # if estimates and credible intervals should be calculated for this parameter
            if_record = False
            if ids is None:
                if_record = True
            elif par_id in ids:
                if_record = True

            # record the calculated estimate and credible interval
            if if_record:

                if priors is None:
                    decimal = deci
                    form = ''
                    multip = 1
                else:
                    decimal = priors[par_id][Column.DECI.value]
                    decimal = 0 if decimal is None else decimal
                    form = priors[par_id][Column.FORMAT.value]
                    multip = priors[par_id][Column.MULTIPLIER.value]

                if multip is None:
                    data = value
                else:
                    multip = float(multip)
                    data = [multip * x for x in value]

                sum_stat = Stat.SummaryStat(name=key, data=data)
                mean_text = Format.format_number(number=sum_stat.get_mean(),
                                                 deci=decimal,
                                                 format=form)
                PI_text = Format.format_interval(
                    interval=sum_stat.get_PI(significance_level),
                    deci=decimal,
                    format=form)

                results.append([par_id, key, mean_text, PI_text])

            # next parameter
            par_id += 1

        # write parameter estimates and credible intervals
        IO.write_csv(rows=results, file_name=poster_file)
    def plot_pairwise(self,
                      ids=None,
                      csv_file_name_prior=None,
                      fig_filename='pairwise_correlation.png',
                      figure_size=(10, 10)):
        """ creates pairwise corrolation between parameters specified by ids
        :param ids: (list) list of parameter ids
        :param csv_file_name_prior: (string) filename where parameter prior ranges are located
        :param fig_filename: (string) filename to save the figure as
        :param figure_size: (tuple) figure size
        """

        # read prior distributions
        priors = None
        if csv_file_name_prior is not None:
            priors = IO.read_csv_rows(file_name=csv_file_name_prior,
                                      if_ignore_first_row=True,
                                      delimiter=',',
                                      if_convert_float=True)

        # find the names of parameters to include in the analysis
        info_of_params_to_include = []

        par_id = 0
        for key, par_values in self.dictOfParams.items():

            # skip these columns
            if key in ['Simulation Replication', 'Random Seed']:
                continue

            # check if the histogram should be created for this parameter
            if_show = False
            if ids is None:
                if_show = True
            elif par_id in ids:
                if_show = True

            # create the histogram
            if if_show:
                # find prior range
                x_range = None
                if priors is not None:
                    try:
                        x_range = [
                            float(priors[par_id][Column.LB.value]),
                            float(priors[par_id][Column.UB.value])
                        ]
                    except:
                        print(
                            'Could not convert string to float to find the prior distribution of parameter:',
                            par_id)
                else:
                    x_range = None

                # find title
                if priors[par_id][Column.TITLE.value] in ('', None):
                    label = priors[par_id][Column.NAME.value]
                else:
                    label = priors[par_id][Column.TITLE.value]

                # find multiplier
                if priors[par_id][Column.MULTIPLIER.value] in ('', None):
                    multiplier = 1
                else:
                    multiplier = float(priors[par_id][Column.MULTIPLIER.value])
                x_range = [x * multiplier for x in x_range]
                par_values = [v * multiplier for v in par_values]

                # append the info for this parameter
                info_of_params_to_include.append(
                    ParamInfo(idx=par_id,
                              name=key,
                              label=label.replace('!', '\n'),
                              values=par_values,
                              range=x_range))

            # move to the next parameter
            par_id += 1

        # plot pairwise
        # set default properties of the figure
        plt.rc('font', size=6)  # fontsize of texts
        plt.rc('axes', titlesize=6)  # fontsize of the figure title
        plt.rc('axes',
               titleweight='semibold')  # fontweight of the figure title

        # plot each panel
        n = len(info_of_params_to_include)
        f, axarr = plt.subplots(nrows=n, ncols=n, figsize=figure_size)

        for i in range(n):
            for j in range(n):

                # get the current axis
                ax = axarr[i, j]

                if j == 0:
                    ax.set_ylabel(info_of_params_to_include[i].label)
                if i == n - 1:
                    ax.set_xlabel(info_of_params_to_include[j].label)

                if i == j:
                    # plot histogram
                    Fig.add_histogram_to_ax(
                        ax=ax,
                        data=info_of_params_to_include[i].values,
                        x_range=info_of_params_to_include[i].range)
                    ax.set_yticklabels([])
                    ax.set_yticks([])

                else:
                    ax.scatter(info_of_params_to_include[j].values,
                               info_of_params_to_include[i].values,
                               alpha=0.5,
                               s=2)
                    ax.set_xlim(info_of_params_to_include[j].range)
                    ax.set_ylim(info_of_params_to_include[i].range)
                    # correlation line
                    b, m = polyfit(info_of_params_to_include[j].values,
                                   info_of_params_to_include[i].values, 1)
                    ax.plot(info_of_params_to_include[j].values,
                            b + m * info_of_params_to_include[j].values,
                            '-',
                            c='black')
                    corr, p = pearsonr(info_of_params_to_include[j].values,
                                       info_of_params_to_include[i].values)
                    ax.text(0.95,
                            0.95,
                            '{0:.2f}'.format(corr),
                            transform=ax.transAxes,
                            fontsize=6,
                            va='top',
                            ha='right')

        f.align_ylabels(axarr[:, 0])
        f.tight_layout()
        f.savefig(fig_filename, bbox_inches='tight', dpi=300)