history_thin=1, model_name='earm_smallest_dreamzs_5chain2', verbose=True) # Save sampling output (sampled parameter values and their corresponding logps). for chain in range(len(sampled_params)): np.save( 'earm_smallest_dreamzs_5chain_sampled_params_chain_' + str(chain) + '_' + str(total_iterations), sampled_params[chain]) np.save( 'earm_smallest_dreamzs_5chain_logps_chain_' + str(chain) + '_' + str(total_iterations), log_ps[chain]) #Check convergence and continue sampling if not converged GR = Gelman_Rubin(sampled_params) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt( 'earm_smallest_dreamzs_5chain_GelmanRubin_iteration_' + str(total_iterations) + '.txt', GR) old_samples = sampled_params if np.any(GR > 1.2): starts = [sampled_params[chain][-1, :] for chain in range(nchains)] while not converged: total_iterations += niterations sampled_params, log_ps = run_dream( parameters=sampled_parameter_names, likelihood=likelihood, niterations=niterations, nchains=nchains,
def DREAM_fit(model, priors_list, posterior, start_params, sampled_param_names, niterations, nchains, sim_name, save_dir, custom_params={}, GR_cutoff=1.2, iteration_cutoff=1E7, verbose=True, plot_posteriors=True): """ The DREAM fitting algorithm as implemented in run_dream(), plus decorations for saving run parameters, checking convergence, and post fitting analysis. """ converged = False total_iterations = niterations np.save(save_dir + os.sep + 'param_names.npy', sampled_param_names) with open(save_dir + os.sep + 'init_params.pkl', 'wb') as f: pickle.dump(dict(model.parameters), f) # Run DREAM sampling. Documentation of DREAM options is in Dream.py. sampled_params, log_ps = run_dream(priors_list, posterior, start=start_params, niterations=niterations, nchains=nchains, multitry=True, parallel=True, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=verbose) # Save sampling output (sampled param values and their corresponding logps) for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name+str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name+str(chain) + '_logPs_' + str(total_iterations)), log_ps[chain]) # Check convergence and continue sampling if not converged GR = Gelman_Rubin(sampled_params) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations) + '.txt'), GR) old_samples = sampled_params if np.any(GR > GR_cutoff): starts = [sampled_params[chain][-1, :] for chain in range(nchains)] while not converged: total_iterations += niterations sampled_params, log_ps = run_dream(priors_list, posterior, start=starts, niterations=niterations, nchains=nchains, multitry=True, parallel=True, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=verbose, restart=True) for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_logPs_' + str(total_iterations)), log_ps[chain]) old_samples = [np.concatenate((old_samples[chain], sampled_params[chain])) for chain in range(nchains)] GR = Gelman_Rubin(old_samples) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations)+'.txt'), GR) if np.all(GR < GR_cutoff) or total_iterations >= iteration_cutoff: converged = True log_ps = np.array(log_ps) sampled_params = np.array(sampled_params) try: # Maximum posterior model: max_in_each_chain = [np.argmax(chain) for chain in log_ps] global_max_chain_idx = np.argmax([log_ps[chain][max_idx] for chain, max_idx in enumerate(max_in_each_chain)]) ml_params = sampled_params[global_max_chain_idx, max_in_each_chain[global_max_chain_idx]] ml_model = {pname: 10 ** pvalue for pname, pvalue in zip(sampled_param_names, ml_params)} print(ml_model, file=open(os.path.join(save_dir, sim_name + '_ML_params.txt'), 'w')) # Maximum posterior for each chain ml_samples = [{pname: 10 ** pvalue for pname, pvalue in zip(sampled_param_names, sampled_params[chain_idx, max_in_each_chain[chain_idx]])} for chain_idx in range(nchains)] print(ml_samples, file=open(os.path.join(save_dir, sim_name + '_ML_samples.txt'), 'w')) except IndexError: print("IndexError finding maximum posterior parameters") pass try: # Compute burn-in total_iterations = len(old_samples[0]) burnin = int(total_iterations / 2) samples = np.concatenate(list((old_samples[i][burnin:, :] for i in range(len(old_samples))))) np.save(os.path.join(save_dir, sim_name+'_samples'), samples) # Basic statistics mean_parameters = np.mean(samples, axis=0) median_parameters = np.median(samples, axis=0) np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters) np.save(os.path.join(save_dir, 'median_parameters'), median_parameters) df = pd.DataFrame(samples, columns=sampled_param_names) df.describe().to_csv(os.path.join(save_dir, 'descriptive_statistics.csv')) if plot_posteriors: # Prepare plot canvas ndims = len(old_samples[0][0]) colors = sns.color_palette(n_colors=ndims) priors_dict = dict(list(zip(sampled_param_names, priors_list))) # computes the factors of ndims: f1 = list(set(reduce(list.__add__, ([i, ndims//i] for i in range(1, int(ndims**0.5) + 1) if ndims % i == 0)))) ncols = f1[int(len(f1) / 2 - 1)] nrows = f1[int(len(f1) / 2)] fig, axes = plt.subplots(nrows=nrows, ncols=ncols) # Plot posterior distributions for dim, ax in enumerate(fig.axes): p = sampled_param_names[dim] sns.histplot(samples[:, dim], color=colors[dim], ax=ax, kde=True, stat='density') xrange = np.arange(priors_dict[p][0] - 3 * priors_dict[p][1], priors_dict[p][0] + 3 * priors_dict[p][1], 0.01) yrange = norm.pdf(xrange, priors_dict[p][0], priors_dict[p][1]) ax.plot(xrange, yrange, 'k--') ax.set_xlabel(p) ax.set_ylabel(None) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.tight_layout() plt.savefig(os.path.join(save_dir, sim_name + 'posteriors.pdf')) plt.close() # Create pairplot g = sns.pairplot(df) for i, j in zip(*np.triu_indices_from(g.axes, 1)): g.axes[i, j].set_visible(False) g.savefig(os.path.join(save_dir, 'corner_plot.png')) except (ImportError, OSError, AttributeError, TypeError): pass # Clean up stray files try: shutil.move(os.path.join(os.getcwd(), os.sep, '*_DREAM_chain_*.*'), save_dir) except FileNotFoundError: pass
def fit_with_DREAM(sim_name, parameter_dict, likelihood): original_params = [parameter_dict[k] for k in parameter_dict.keys()] priors_list = [] for p in original_params: priors_list.append(SampledParam(norm, loc=np.log(p), scale=1.0)) # Set simulation parameters niterations = 10000 converged = False total_iterations = niterations nchains = 5 # Make save directory today = datetime.now() save_dir = "PyDREAM_" + today.strftime('%d-%m-%Y') + "_" + str(niterations) os.makedirs(os.path.join(os.getcwd(), save_dir), exist_ok=True) # Run DREAM sampling. Documentation of DREAM options is in Dream.py. sampled_params, log_ps = run_dream(priors_list, likelihood, start=np.log(original_params), niterations=niterations, nchains=nchains, multitry=False, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=True) # Save sampling output (sampled parameter values and their corresponding logps). for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name + str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name + str(chain) + '_' + str(total_iterations)), log_ps[chain]) # Check convergence and continue sampling if not converged GR = Gelman_Rubin(sampled_params) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + str(total_iterations) + '.txt'), GR) old_samples = sampled_params if np.any(GR > 1.2): starts = [sampled_params[chain][-1, :] for chain in range(nchains)] while not converged: total_iterations += niterations sampled_params, log_ps = run_dream(priors_list, likelihood, start=starts, niterations=niterations, nchains=nchains, multitry=False, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=True, restart=True) for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), log_ps[chain]) old_samples = [np.concatenate((old_samples[chain], sampled_params[chain])) for chain in range(nchains)] GR = Gelman_Rubin(old_samples) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations) + '.txt'), GR) if np.all(GR < 1.2): converged = True try: # Plot output total_iterations = len(old_samples[0]) burnin = int(total_iterations / 2) samples = np.concatenate(list((old_samples[i][burnin:, :] for i in range(len(old_samples))))) np.save(os.path.join(save_dir, sim_name+'_samples'), samples) ndims = len(old_samples[0][0]) colors = sns.color_palette(n_colors=ndims) for dim in range(ndims): fig = plt.figure() sns.distplot(samples[:, dim], color=colors[dim]) fig.savefig(os.path.join(save_dir, sim_name + '_dimension_' + str(dim) + '_' + list(parameter_dict.keys())[dim]+ '.pdf')) # Convert to dataframe df = pd.DataFrame(samples, columns=parameter_dict.keys()) g = sns.pairplot(df) for i, j in zip(*np.triu_indices_from(g.axes, 1)): g.axes[i,j].set_visible(False) g.savefig(os.path.join(save_dir, 'corner_plot.pdf')) # Basic statistics mean_parameters = np.mean(samples, axis=0) median_parameters = np.median(samples, axis=0) np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters) np.save(os.path.join(save_dir, 'median_parameters'), median_parameters) df.describe().to_csv(os.path.join(save_dir, 'descriptive_statistics.csv')) except ImportError: pass return 0
converged = False if __name__ == '__main__': sampled_params, log_ps = run_dream( parameters=sampled_params_list, likelihood=likelihood, niterations=niterations, nchains=nchains, multitry=False, gamma_levels=6, nCR=6, snooker_=0.4, adapt_gamma=False, history_thin=1, model_name='dreamzs_5chain_NEv2_Sage_test_NM', verbose=True) total_iterations = niterations # Save sampling output (sampled parameter values and their corresponding logps). for chain in range(len(sampled_params)): np.save( 'dreamzs_5chain_NEv2_Sage_test_NM_sampled_params_chain_' + str(chain) + '_' + str(total_iterations), sampled_params[chain]) np.save( 'dreamzs_5chain_NEv2_Sage_test_NM_logps_chain_' + str(chain) + '_' + str(total_iterations), log_ps[chain]) GR = Gelman_Rubin(sampled_params) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt( 'dreamzs_5chain_NEv2_Sage_test_NM_GelmanRubin_iteration_' + str(total_iterations) + '.txt', GR)
def DREAM_fit(model, priors_list, posterior, start_params, sampled_param_names, niterations, nchains, sim_name, save_dir, custom_params={}, GR_cutoff=1.2): """ The DREAM fitting algorithm as implemented in run_dream(), plus decorations for saving run parameters, checking convergence, and post fitting analysis. """ converged = False total_iterations = niterations # Run DREAM sampling. Documentation of DREAM options is in Dream.py. sampled_params, log_ps = run_dream(priors_list, posterior, start=start_params, niterations=niterations, nchains=nchains, multitry=False, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=True) # Save sampling output (sampled param values and their corresponding logps) for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name+str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name+str(chain) + '_' + str(total_iterations)), log_ps[chain]) # Check convergence and continue sampling if not converged GR = Gelman_Rubin(sampled_params) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + str(total_iterations) + '.txt'), GR) old_samples = sampled_params if np.any(GR > GR_cutoff): starts = [sampled_params[chain][-1, :] for chain in range(nchains)] while not converged: total_iterations += niterations sampled_params, log_ps = run_dream(priors_list, posterior, start=starts, niterations=niterations, nchains=nchains, multitry=False, gamma_levels=4, adapt_gamma=True, history_thin=1, model_name=sim_name, verbose=True, restart=True) for chain in range(len(sampled_params)): np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), sampled_params[chain]) np.save(os.path.join(save_dir, sim_name + '_' + str(chain) + '_' + str(total_iterations)), log_ps[chain]) old_samples = [np.concatenate((old_samples[chain], sampled_params[chain])) for chain in range(nchains)] GR = Gelman_Rubin(old_samples) print('At iteration: ', total_iterations, ' GR = ', GR) np.savetxt(os.path.join(save_dir, sim_name + '_' + str(total_iterations)+'.txt'), GR) if np.all(GR < GR_cutoff): converged = True log_ps = np.array(log_ps) sampled_params = np.array(sampled_params) try: # Maximum posterior model: max_in_each_chain = [np.argmax(chain) for chain in log_ps] global_max_chain_idx = np.argmax([log_ps[chain][max_idx] for chain, max_idx in enumerate(max_in_each_chain)]) ml_params = sampled_params[global_max_chain_idx, max_in_each_chain[global_max_chain_idx]] ml_model = {pname: 10 ** pvalue for pname, pvalue in zip(sampled_param_names, ml_params)} print(ml_model, file=open(os.path.join(save_dir, sim_name + '_ML_params.txt'), 'w')) except IndexError: print("IndexError finding maximum posterior parameters") pass try: # Plot output total_iterations = len(old_samples[0]) burnin = int(total_iterations / 2) samples = np.concatenate(list((old_samples[i][burnin:, :] for i in range(len(old_samples))))) np.save(os.path.join(save_dir, sim_name+'_samples'), samples) ndims = len(old_samples[0][0]) colors = sns.color_palette(n_colors=ndims) for dim in range(ndims): sns.distplot(samples[:, dim], color=colors[dim]) plt.savefig(os.path.join(save_dir, sim_name + '_dimension_' + str(dim) + '_' + sampled_param_names[dim] + '.pdf')) # Convert to dataframe df = pd.DataFrame(samples, columns=sampled_param_names) g = sns.pairplot(df) for i, j in zip(*np.triu_indices_from(g.axes, 1)): g.axes[i, j].set_visible(False) g.savefig(os.path.join(save_dir, 'corner_plot.pdf')) # Basic statistics mean_parameters = np.mean(samples, axis=0) median_parameters = np.median(samples, axis=0) np.save(os.path.join(save_dir, 'mean_parameters'), mean_parameters) np.save(os.path.join(save_dir, 'median_parameters'), median_parameters) df.describe().to_csv(os.path.join(save_dir, 'descriptive_statistics.csv')) except (ImportError, OSError): pass # Clean up stray files try: shutil.move(os.path.join(os.getcwd(), '*_DREAM_chain_*.*'), save_dir) except FileNotFoundError: pass