history_thin=1,
        model_name='earm_smallest_dreamzs_5chain2',
        verbose=True)

    # Save sampling output (sampled parameter values and their corresponding logps).
    for chain in range(len(sampled_params)):
        np.save(
            'earm_smallest_dreamzs_5chain_sampled_params_chain_' + str(chain) +
            '_' + str(total_iterations), sampled_params[chain])
        np.save(
            'earm_smallest_dreamzs_5chain_logps_chain_' + str(chain) + '_' +
            str(total_iterations), log_ps[chain])

    #Check convergence and continue sampling if not converged

    GR = Gelman_Rubin(sampled_params)
    print('At iteration: ', total_iterations, ' GR = ', GR)
    np.savetxt(
        'earm_smallest_dreamzs_5chain_GelmanRubin_iteration_' +
        str(total_iterations) + '.txt', GR)

    old_samples = sampled_params
    if np.any(GR > 1.2):
        starts = [sampled_params[chain][-1, :] for chain in range(nchains)]
        while not converged:
            total_iterations += niterations
            sampled_params, log_ps = run_dream(
                parameters=sampled_parameter_names,
                likelihood=likelihood,
                niterations=niterations,
                nchains=nchains,
Beispiel #2
0
def DREAM_fit(model, priors_list, posterior, start_params,
              sampled_param_names, niterations, nchains, sim_name,
              save_dir, custom_params={}, GR_cutoff=1.2, iteration_cutoff=1E7,
              verbose=True, plot_posteriors=True):
    """
    The DREAM fitting algorithm as implemented in run_dream(), plus decorations
    for saving run parameters, checking convergence, and post fitting analysis.
    """
    converged = False
    total_iterations = niterations
    np.save(save_dir + os.sep + 'param_names.npy', sampled_param_names)
    with open(save_dir + os.sep + 'init_params.pkl', 'wb') as f:
        pickle.dump(dict(model.parameters), f)

    # Run DREAM sampling.  Documentation of DREAM options is in Dream.py.
    sampled_params, log_ps = run_dream(priors_list, posterior,
                                       start=start_params,
                                       niterations=niterations,
                                       nchains=nchains,
                                       multitry=True, parallel=True,
                                       gamma_levels=4, adapt_gamma=True,
                                       history_thin=1, model_name=sim_name,
                                       verbose=verbose)

    # Save sampling output (sampled param values and their corresponding logps)
    for chain in range(len(sampled_params)):
        np.save(os.path.join(save_dir, sim_name+str(chain) + '_' + str(total_iterations)), sampled_params[chain])
        np.save(os.path.join(save_dir, sim_name+str(chain) + '_logPs_' + str(total_iterations)), log_ps[chain])

    # Check convergence and continue sampling if not converged

    GR = Gelman_Rubin(sampled_params)
    print('At iteration: ', total_iterations, ' GR = ', GR)
    np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations) + '.txt'), GR)

    old_samples = sampled_params
    if np.any(GR > GR_cutoff):
        starts = [sampled_params[chain][-1, :] for chain in range(nchains)]
        while not converged:
            total_iterations += niterations

            sampled_params, log_ps = run_dream(priors_list, posterior,
                                               start=starts,
                                               niterations=niterations,
                                               nchains=nchains,
                                               multitry=True, parallel=True,
                                               gamma_levels=4,
                                               adapt_gamma=True,
                                               history_thin=1,
                                               model_name=sim_name,
                                               verbose=verbose, restart=True)

            for chain in range(len(sampled_params)):
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), sampled_params[chain])
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_logPs_' + str(total_iterations)), log_ps[chain])

            old_samples = [np.concatenate((old_samples[chain], sampled_params[chain])) for chain in range(nchains)]
            GR = Gelman_Rubin(old_samples)
            print('At iteration: ', total_iterations, ' GR = ', GR)
            np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations)+'.txt'), GR)

            if np.all(GR < GR_cutoff) or total_iterations >= iteration_cutoff:
                converged = True

    log_ps = np.array(log_ps)
    sampled_params = np.array(sampled_params)

    try:
        # Maximum posterior model:
        max_in_each_chain = [np.argmax(chain) for chain in log_ps]
        global_max_chain_idx = np.argmax([log_ps[chain][max_idx] for chain, max_idx in enumerate(max_in_each_chain)])
        ml_params = sampled_params[global_max_chain_idx, max_in_each_chain[global_max_chain_idx]]
        ml_model = {pname: 10 ** pvalue for pname, pvalue in zip(sampled_param_names, ml_params)}
        print(ml_model,
              file=open(os.path.join(save_dir, sim_name + '_ML_params.txt'), 'w'))
        # Maximum posterior for each chain
        ml_samples = [{pname: 10 ** pvalue for pname, pvalue in zip(sampled_param_names, sampled_params[chain_idx, max_in_each_chain[chain_idx]])} for chain_idx in range(nchains)]
        print(ml_samples,
              file=open(os.path.join(save_dir, sim_name + '_ML_samples.txt'), 'w'))

    except IndexError:
        print("IndexError finding maximum posterior parameters")
        pass

    try:
        # Compute burn-in
        total_iterations = len(old_samples[0])
        burnin = int(total_iterations / 2)
        samples = np.concatenate(list((old_samples[i][burnin:, :] for i in range(len(old_samples)))))
        np.save(os.path.join(save_dir, sim_name+'_samples'), samples)

        # Basic statistics
        mean_parameters = np.mean(samples, axis=0)
        median_parameters = np.median(samples, axis=0)
        np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters)
        np.save(os.path.join(save_dir, 'median_parameters'), median_parameters)
        df = pd.DataFrame(samples, columns=sampled_param_names)
        df.describe().to_csv(os.path.join(save_dir,
                             'descriptive_statistics.csv'))

        if plot_posteriors:
            # Prepare plot canvas
            ndims = len(old_samples[0][0])
            colors = sns.color_palette(n_colors=ndims)
            priors_dict = dict(list(zip(sampled_param_names, priors_list)))
            # computes the factors of ndims:
            f1 = list(set(reduce(list.__add__, ([i, ndims//i] for i in range(1, int(ndims**0.5) + 1) if ndims % i == 0))))
            ncols = f1[int(len(f1) / 2 - 1)]
            nrows = f1[int(len(f1) / 2)]
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols)

            # Plot posterior distributions
            for dim, ax in enumerate(fig.axes):
                p = sampled_param_names[dim]
                sns.histplot(samples[:, dim], color=colors[dim], ax=ax, kde=True, stat='density')
                xrange = np.arange(priors_dict[p][0] - 3 * priors_dict[p][1],
                                   priors_dict[p][0] + 3 * priors_dict[p][1], 0.01)
                yrange = norm.pdf(xrange, priors_dict[p][0], priors_dict[p][1])
                ax.plot(xrange, yrange, 'k--')
                ax.set_xlabel(p)
                ax.set_ylabel(None)
                ax.spines['right'].set_visible(False)
                ax.spines['top'].set_visible(False)
            plt.tight_layout()
            plt.savefig(os.path.join(save_dir, sim_name + 'posteriors.pdf'))
            plt.close()

            # Create pairplot
            g = sns.pairplot(df)
            for i, j in zip(*np.triu_indices_from(g.axes, 1)):
                g.axes[i, j].set_visible(False)
            g.savefig(os.path.join(save_dir, 'corner_plot.png'))

    except (ImportError, OSError, AttributeError, TypeError):
        pass

    # Clean up stray files
    try:
        shutil.move(os.path.join(os.getcwd(), os.sep, '*_DREAM_chain_*.*'),
                    save_dir)
    except FileNotFoundError:
        pass
def fit_with_DREAM(sim_name, parameter_dict, likelihood):
    original_params = [parameter_dict[k] for k in parameter_dict.keys()]

    priors_list = []
    for p in original_params:
        priors_list.append(SampledParam(norm, loc=np.log(p), scale=1.0))
    # Set simulation parameters
    niterations = 10000
    converged = False
    total_iterations = niterations
    nchains = 5

    # Make save directory
    today = datetime.now()
    save_dir = "PyDREAM_" + today.strftime('%d-%m-%Y') + "_" + str(niterations)
    os.makedirs(os.path.join(os.getcwd(), save_dir), exist_ok=True)

    # Run DREAM sampling.  Documentation of DREAM options is in Dream.py.
    sampled_params, log_ps = run_dream(priors_list, likelihood, start=np.log(original_params),
                                       niterations=niterations, nchains=nchains, multitry=False,
                                       gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name,
                                       verbose=True)

    # Save sampling output (sampled parameter values and their corresponding logps).
    for chain in range(len(sampled_params)):
        np.save(os.path.join(save_dir, sim_name + str(chain) + '_' + str(total_iterations)), sampled_params[chain])
        np.save(os.path.join(save_dir, sim_name + str(chain) + '_' + str(total_iterations)), log_ps[chain])

    # Check convergence and continue sampling if not converged

    GR = Gelman_Rubin(sampled_params)
    print('At iteration: ', total_iterations, ' GR = ', GR)
    np.savetxt(os.path.join(save_dir, sim_name + str(total_iterations) + '.txt'), GR)

    old_samples = sampled_params
    if np.any(GR > 1.2):
        starts = [sampled_params[chain][-1, :] for chain in range(nchains)]
        while not converged:
            total_iterations += niterations

            sampled_params, log_ps = run_dream(priors_list, likelihood, start=starts, niterations=niterations,
                                               nchains=nchains, multitry=False, gamma_levels=4, adapt_gamma=True,
                                               history_thin=1, model_name=sim_name, verbose=True, restart=True)

            for chain in range(len(sampled_params)):
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)),
                        sampled_params[chain])
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)),
                        log_ps[chain])

            old_samples = [np.concatenate((old_samples[chain], sampled_params[chain])) for chain in range(nchains)]
            GR = Gelman_Rubin(old_samples)
            print('At iteration: ', total_iterations, ' GR = ', GR)
            np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations) + '.txt'), GR)

            if np.all(GR < 1.2):
                converged = True
    try:
        # Plot output
        total_iterations = len(old_samples[0])
        burnin = int(total_iterations / 2)
        samples = np.concatenate(list((old_samples[i][burnin:, :] for i in range(len(old_samples)))))
        np.save(os.path.join(save_dir, sim_name+'_samples'), samples)
        ndims = len(old_samples[0][0])
        colors = sns.color_palette(n_colors=ndims)
        for dim in range(ndims):
            fig = plt.figure()
            sns.distplot(samples[:, dim], color=colors[dim])
            fig.savefig(os.path.join(save_dir, sim_name + '_dimension_' + str(dim) + '_' + list(parameter_dict.keys())[dim]+ '.pdf'))

        # Convert to dataframe
        df = pd.DataFrame(samples, columns=parameter_dict.keys())
        g = sns.pairplot(df)
        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            g.axes[i,j].set_visible(False)
        g.savefig(os.path.join(save_dir, 'corner_plot.pdf'))

        # Basic statistics
        mean_parameters = np.mean(samples, axis=0)
        median_parameters = np.median(samples, axis=0)
        np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters)
        np.save(os.path.join(save_dir, 'median_parameters'), median_parameters)
        df.describe().to_csv(os.path.join(save_dir, 'descriptive_statistics.csv'))

    except ImportError:
        pass
    return 0
Beispiel #4
0
converged = False
if __name__ == '__main__':
    sampled_params, log_ps = run_dream(
        parameters=sampled_params_list,
        likelihood=likelihood,
        niterations=niterations,
        nchains=nchains,
        multitry=False,
        gamma_levels=6,
        nCR=6,
        snooker_=0.4,
        adapt_gamma=False,
        history_thin=1,
        model_name='dreamzs_5chain_NEv2_Sage_test_NM',
        verbose=True)
    total_iterations = niterations
    # Save sampling output (sampled parameter values and their corresponding logps).
    for chain in range(len(sampled_params)):
        np.save(
            'dreamzs_5chain_NEv2_Sage_test_NM_sampled_params_chain_' +
            str(chain) + '_' + str(total_iterations), sampled_params[chain])
        np.save(
            'dreamzs_5chain_NEv2_Sage_test_NM_logps_chain_' + str(chain) +
            '_' + str(total_iterations), log_ps[chain])

    GR = Gelman_Rubin(sampled_params)
    print('At iteration: ', total_iterations, ' GR = ', GR)
    np.savetxt(
        'dreamzs_5chain_NEv2_Sage_test_NM_GelmanRubin_iteration_' +
        str(total_iterations) + '.txt', GR)
Beispiel #5
0
def DREAM_fit(model, priors_list, posterior, start_params,
              sampled_param_names, niterations, nchains, sim_name,
              save_dir, custom_params={}, GR_cutoff=1.2):
    """
    The DREAM fitting algorithm as implemented in run_dream(), plus decorations
    for saving run parameters, checking convergence, and post fitting analysis.
    """
    converged = False
    total_iterations = niterations

    # Run DREAM sampling.  Documentation of DREAM options is in Dream.py.
    sampled_params, log_ps = run_dream(priors_list, posterior,
                                       start=start_params,
                                       niterations=niterations,
                                       nchains=nchains,
                                       multitry=False,
                                       gamma_levels=4, adapt_gamma=True,
                                       history_thin=1, model_name=sim_name,
                                       verbose=True)

    # Save sampling output (sampled param values and their corresponding logps)
    for chain in range(len(sampled_params)):
        np.save(os.path.join(save_dir, sim_name+str(chain) + '_' +
                             str(total_iterations)), sampled_params[chain])
        np.save(os.path.join(save_dir, sim_name+str(chain) + '_' +
                             str(total_iterations)), log_ps[chain])

    # Check convergence and continue sampling if not converged

    GR = Gelman_Rubin(sampled_params)
    print('At iteration: ', total_iterations, ' GR = ', GR)
    np.savetxt(os.path.join(save_dir, sim_name + str(total_iterations) +
                            '.txt'), GR)

    old_samples = sampled_params
    if np.any(GR > GR_cutoff):
        starts = [sampled_params[chain][-1, :] for chain in range(nchains)]
        while not converged:
            total_iterations += niterations

            sampled_params, log_ps = run_dream(priors_list, posterior,
                                               start=starts,
                                               niterations=niterations,
                                               nchains=nchains, multitry=False,
                                               gamma_levels=4,
                                               adapt_gamma=True,
                                               history_thin=1,
                                               model_name=sim_name,
                                               verbose=True, restart=True)

            for chain in range(len(sampled_params)):
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) +
                                     '_' + str(total_iterations)),
                        sampled_params[chain])
                np.save(os.path.join(save_dir, sim_name + '_' + str(chain) +
                                     '_' + str(total_iterations)),
                        log_ps[chain])

            old_samples = [np.concatenate((old_samples[chain],
                           sampled_params[chain])) for chain in range(nchains)]
            GR = Gelman_Rubin(old_samples)
            print('At iteration: ', total_iterations, ' GR = ', GR)
            np.savetxt(os.path.join(save_dir, sim_name + '_' +
                                    str(total_iterations)+'.txt'), GR)

            if np.all(GR < GR_cutoff):
                converged = True

    log_ps = np.array(log_ps)
    sampled_params = np.array(sampled_params)

    try:
        # Maximum posterior model:
        max_in_each_chain = [np.argmax(chain) for chain in log_ps]
        global_max_chain_idx = np.argmax([log_ps[chain][max_idx] for
                                          chain, max_idx in
                                          enumerate(max_in_each_chain)])
        ml_params = sampled_params[global_max_chain_idx,
                                   max_in_each_chain[global_max_chain_idx]]
        ml_model = {pname: 10 ** pvalue for pname, pvalue in
                    zip(sampled_param_names, ml_params)}
        print(ml_model,
              file=open(os.path.join(save_dir, sim_name +
                                     '_ML_params.txt'), 'w'))

    except IndexError:
        print("IndexError finding maximum posterior parameters")
        pass

    try:
        # Plot output
        total_iterations = len(old_samples[0])
        burnin = int(total_iterations / 2)
        samples = np.concatenate(list((old_samples[i][burnin:, :] for
                                       i in range(len(old_samples)))))
        np.save(os.path.join(save_dir, sim_name+'_samples'), samples)
        ndims = len(old_samples[0][0])
        colors = sns.color_palette(n_colors=ndims)
        for dim in range(ndims):
            sns.distplot(samples[:, dim], color=colors[dim])
            plt.savefig(os.path.join(save_dir, sim_name + '_dimension_' +
                                     str(dim) + '_' +
                                     sampled_param_names[dim] +
                                     '.pdf'))

        # Convert to dataframe
        df = pd.DataFrame(samples, columns=sampled_param_names)
        g = sns.pairplot(df)
        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            g.axes[i, j].set_visible(False)
        g.savefig(os.path.join(save_dir, 'corner_plot.pdf'))

        # Basic statistics
        mean_parameters = np.mean(samples, axis=0)
        median_parameters = np.median(samples, axis=0)
        np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters)
        np.save(os.path.join(save_dir, 'median_parameters'), median_parameters)
        df.describe().to_csv(os.path.join(save_dir,
                             'descriptive_statistics.csv'))

    except (ImportError, OSError):
        pass

    # Clean up stray files
    try:
        shutil.move(os.path.join(os.getcwd(), '*_DREAM_chain_*.*'),
                    save_dir)
    except FileNotFoundError:
        pass