## kill replicas master.terminate_replicas() else: from rexfw.replicas import Replica from rexfw.slaves import Slave from rexfw.proposers.re import REProposer from isd2.samplers.gibbs import GibbsSampler from ensemble_hic.setup_functions import make_posterior, make_subsamplers from ensemble_hic.setup_functions import setup_initial_state from ensemble_hic.replica import CompatibleReplica posterior = make_posterior(settings) for replica_parameter in schedule: posterior[replica_parameter].set(schedule[replica_parameter][rank - 1]) initial_state = setup_initial_state(settings['initial_state'], posterior) if not 'norm' in initial_state.variables: posterior['norm'].set( np.max(posterior.likelihoods['ensemble_contacts'].error_model.data) / float(settings['general']['n_structures'])) initial_state.update_variables( structures=np.load(cont_folder + 'init_states.npy')[rank - 1]) initial_state.update_variables(norm=np.load(cont_folder + 'init_norms.npy')[rank - 1]) settings['structures_hmc'].update( timestep=np.load(cont_folder + 'timesteps.npy')[rank - 1]) subsamplers = make_subsamplers(posterior, initial_state.variables,
def calculate_DOS(config_file, n_samples, subsamples_fraction, burnin, n_iter=100000, tol=1e-10, save_output=True, output_suffix=''): """Calculates the density of states (DOS) using non-parametric histogram reweighting (WHAM). :param config_file: Configuration file :type config_file: str :param n_samples: number of samples the simulation ran :type n_samples: int :param subsamples_fraction: faction of samples (after burnin) to be analyzed set this to, e.g., 10 to use one tenth of n_samples to decrease compution time :type subsamples_fraction: int :param burnin: number of samples to be thrown away as part of the burn-in period :type burnin: int :param n_iter: number of WHAM iterations :type n_iter: int :param tol: threshold up to which the negative log-likelihood being minimized in WHAM can change before iteration stops :type tol: float :param save_output: save resulting DOS object, parameters used during calculation and indices of randomly chosen samples in simulation output folder :type save_output: True :returns: DOS object :rtype: DOS """ from ensemble_hic.wham import PyWHAM as WHAM, DOS from ensemble_hic.setup_functions import parse_config_file, make_posterior from ensemble_hic.analysis_functions import load_sr_samples settings = parse_config_file(config_file) n_replicas = int(settings['replica']['n_replicas']) target_replica = n_replicas params = { 'n_samples': n_samples, 'burnin': burnin, 'subsamples_fraction': subsamples_fraction, 'niter': n_iter, 'tol': tol } n_samples = min(params['n_samples'], int(settings['replica']['n_samples'])) dump_interval = int(settings['replica']['samples_dump_interval']) output_folder = settings['general']['output_folder'] if output_folder[-1] != '/': output_folder += '/' n_beads = int(settings['general']['n_beads']) n_structures = int(settings['general']['n_structures']) schedule = np.load(output_folder + 'schedule.pickle') posterior = make_posterior(settings) p = posterior variables = p.variables energies = [] L = p.likelihoods['ensemble_contacts'] data = L.forward_model.data_points P = p.priors['nonbonded_prior'] sels = [] for i in range(n_replicas): samples = load_sr_samples(output_folder + 'samples/', i + 1, n_samples + 1, dump_interval, burnin=params['burnin']) sel = np.random.choice(len(samples), int(len(samples) / float(subsamples_fraction)), replace=False) samples = samples[sel] sels.append(sel) energies.append([[ -L.log_prob(**x.variables) if 'lammda' in schedule else 0, -P.log_prob(structures=x.variables['structures']) if 'beta' in schedule else 0 ] for x in samples]) print "Calculated energies for {}/{} replicas...".format(i, n_replicas) energies = np.array(energies) energies_flat = energies.reshape(np.prod(energies.shape[:2]), 2) sched = np.array([schedule['lammda'], schedule['beta']]) q = np.array([[(energy * replica_params).sum() for energy in energies_flat] for replica_params in sched.T]) wham = WHAM(len(energies_flat), n_replicas) wham.N[:] = len(energies_flat) / n_replicas wham.run(q, niter=params['niter'], tol=params['tol'], verbose=100) dos = DOS(energies_flat, wham.s, sort_energies=False) if save_output: import os import sys from cPickle import dump ana_path = output_folder + 'analysis/' if not os.path.exists(ana_path): os.makedirs(ana_path) with open(ana_path + 'dos{}.pickle'.format(output_suffix), 'w') as opf: dump(dos, opf) with open(ana_path + 'wham_params{}.pickle'.format(output_suffix), 'w') as opf: dump(params, opf) with open(ana_path + 'wham_sels{}.pickle'.format(output_suffix), 'w') as opf: dump(np.array(sels), opf) return dos
from ensemble_hic.setup_functions import parse_config_file, make_posterior from ensemble_hic.analysis_functions import load_samples_from_cfg from ensemble_hic.analysis_functions import write_ensemble sys.path.append(os.path.expanduser('~/projects/ensemble_hic/scripts/misc/')) from simlist import simulations sims = simulations['1pga_1shf_fwm_poisson_new_fixed_it3'] cpath = sims['common_path'] n_structures = sims['n_structures'] odirs = sims['output_dirs'] MAP_samples = [] for (n, odir) in zip(n_structures, odirs): cfg_file = cpath + odir + '/config.cfg' p = make_posterior(parse_config_file(cfg_file)) samples = load_samples_from_cfg(cfg_file, burnin=60000) Es = map(lambda x: -p.log_prob(**x.variables), samples) MAP_sample = samples[np.argmin(Es)] MAP_samples.append(MAP_sample.variables['structures'].reshape(n, 56, 3)) MAP_samples_flat = np.array([x for y in MAP_samples for x in y]) invar_rmsd = lambda x, y: min(rmsd(x, y), rmsd(x, -y)) tmp = StructureParser( os.path.expanduser('~/projects/ensemble_hic/data/proteins/1pga.pdb')) ref_1pga = tmp.parse().get_coordinates(['CA']) tmp = StructureParser( os.path.expanduser('~/projects/ensemble_hic/data/proteins/1shf.pdb')) ref_1shf = tmp.parse().get_coordinates(['CA']) rmsds_to_1pga = map(lambda x: invar_rmsd(ref_1pga, x), MAP_samples_flat)
def plot_correlations(ax): correlations = [] n_structures = sims['n_structures'] cpath = sims['common_path'] opdirs = sims['output_dirs'] for i, n in enumerate(n_structures): sim_path = cpath + opdirs[i] + '/' config_file = sim_path + 'config.cfg' settings = parse_config_file(config_file) # samples = load_sr_samples(sim_path + 'samples/', # int(settings['replica']['n_replicas']), # 48001, 1000, 30000) from ensemble_hic.analysis_functions import load_samples_from_cfg_auto samples = load_samples_from_cfg_auto(config_file, burnin=30000) p = make_posterior(settings) fwm = p.likelihoods['ensemble_contacts'].forward_model dps = fwm.data_points inds = np.where(np.abs(dps[:, 0] - dps[:, 1]) > 8) corrs = [] for sample in samples: md = fwm(**sample.variables) corrs.append(np.corrcoef(md[inds], dps[:, 2][inds])[0, 1]) correlations.append([np.mean(corrs), np.std(corrs)]) # energies = np.array(map(lambda x: -p.log_prob(**x.variables), samples)) # map_sample = samples[np.argmin(energies)] # md = fwm(**map_sample.variables) # correlations.append(np.corrcoef(md[inds], dps[:,2][inds])[0,1]) correlations = np.array(correlations) # ax.plot(n_structures, correlations, marker='o', ls='--', c='k') ax.errorbar(n_structures, correlations[:, 0], correlations[:, 1], marker='o', ls='--', c='k') ax.set_xticks(n_structures) ax.set_xlabel('number of states $n$') ax.set_ylabel(r'$\rho$(back-calculated data, experimental data)') for spine in ['top', 'right']: ax.spines[spine].set_visible(False) if not True: ## make inset from mpl_toolkits.axes_grid1.inset_locator import inset_axes inset_ax = inset_axes(ax, width='40%', height='25%', loc=10) inset_ax.plot(n_structures[2:6], correlations[2:6], ls='--', marker='o', color='black') inset_ax.set_xticks(n_structures[2:6]) inset_ax.spines['top'].set_visible(False) inset_ax.spines['right'].set_visible(False) inset_ax.set_ylim((0.985, 1.005)) inset_ax.set_yticks((0.99, 1.0)) inset_ax.set_xlim((7, 105))
logZs = [] data_terms = [] for x in output_dirs: dos = np.load(x + '/analysis/dos.pickle') logZs.append(log_sum_exp(-dos.E.sum(1) + dos.s) - \ log_sum_exp(-dos.E[:,1] + dos.s)) a = x.find('replicas') b = x[a-4:].find('_') n_replicas = int(x[a-4+b+1:a]) p = np.load(x + '/analysis/wham_params.pickle') c = parse_config_file(x + '/config.cfg') s = load_sr_samples(x + '/samples/', n_replicas, p['n_samples']+1, int(c['replica']['samples_dump_interval']), p['burnin']) sels = np.load(x + '/analysis/wham_sels.pickle') s = s[sels[-1]] p = make_posterior(parse_config_file(x + '/config.cfg')) L = p.likelihoods['ensemble_contacts'] d = L.forward_model.data_points[:,2] f = gammaln(d+1).sum() print "mean log-posterior:", np.mean(map(lambda x: p.log_prob(**x.variables), s)) logZs[-1] -= f + np.log(len(d)) * (not '1pga' in x) data_terms.append(np.array(map(lambda x: -L.log_prob(**x.variables), s)).mean() + f) print "evidence:", logZs[-1] data_terms = np.array(data_terms) with open(out_file, "w") as opf: dump((n_structures, logZs, data_terms), opf)