def extract_perses_repex_to_local(from_dir, to_dir, phases = ['complex', 'solvent']): """ extract perses data from nonlocal directory and copy to local; extract positions, topology, and system for each phase. arguments from_dir : str full path (including `lig{i}to{j}`) from which to extract perses results to_dir : str full_path (including `lig{i}to{j}`) to which to extract perses results """ import numpy as np import os import sys import mdtraj as md from qmlify.executables import extract_sys_top from perses.analysis.utils import open_netcdf os.mkdir(to_dir) factory_npz = os.path.join(from_dir, 'outhybrid_factory.npy.npz') os.system(f"cp {factory_npz} {os.path.join(to_dir, 'outhybrid_factory.npy.npz')}") extract_sys_top(to_dir, phases = phases + ['vacuum']) npz = np.load(factory_npz, allow_pickle=True) htf = npz['arr_0'].item() #topology proposal top_proposal_filename = os.path.join(from_dir, f"out_topology_proposals.pkl") TPs = np.load(top_proposal_filename, allow_pickle=True) for phase in phases: nc_checkpoint_filename = os.path.join(from_dir, f"out-{phase}_checkpoint.nc") nc_checkpoint = open_netcdf(nc_checkpoint_filename) #yank the checkpoint interval checkpoint_interval = nc_checkpoint.CheckpointInterval all_positions = nc_checkpoint.variables['positions'] #pull all of the positions bv = nc_checkpoint.variables['box_vectors'] #pull the box vectors n_iter, n_replicas, n_atom, _ = np.shape(all_positions) nc_out_filename = os.path.join(from_dir, f"out-{phase}.nc") nc = open_netcdf(nc_out_filename) endstates = [('ligandAlambda0','old',0),('ligandBlambda1','new',n_replicas-1)] for endstate in endstates: lig, state, replica = endstate topology = getattr(TPs[f'{phase}_topology_proposal'], f'{state}_topology') molecule = [res for res in topology.residues() if res.name == 'MOL'] molecule_indices = [a.index for a in molecule[0].atoms()] start_id = molecule_indices[0] n_atoms = topology.getNumAtoms() h_to_state = getattr(htf[f"{phase}"], f'_hybrid_to_{state}_map') positions = np.zeros(shape=(n_iter,n_atoms,3)) lengths, angles = [], [] bv_frames = [] for i in range(n_iter): replica_id = np.where(nc.variables['states'][i*checkpoint_interval] == replica)[0] pos = all_positions[i,replica_id,:,:][0] for hybrid, index in h_to_state.items(): positions[i,index,:] = pos[hybrid] _bv_frame = bv[i, replica_id][0] bv_frames.append(_bv_frame) bv_frames = np.array(bv_frames) np.savez(os.path.join(to_dir, f"{lig}_{phase}.positions.npz"), positions = positions, box_vectors = bv_frames)
def historic_fes(self,stepsize=100): from pymbar import timeseries from pymbar import MBAR from perses.analysis import utils import os from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer # find the output files output = [x for x in os.listdir(self.directory) if x[-3:] == '.nc' and 'checkpoint' not in x] for out in output: if 'vacuum' in out: vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): vacuum_analyzer = MultiStateSamplerAnalyzer(vacuum_reporter,max_n_iterations=step) f_ij, df_ij = vacuum_analyzer.get_free_energy() self.vacdg_history.append(f_ij[1, -2]) self.vacddg_history.append(df_ij[1,-2]) self.vacdg_history_es.append(f_ij[0, -1]) self.vacddg_history_es.append(df_ij[0,-1]) if 'solvent' in out: solvent_reporter = MultiStateReporter(f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): solvent_analyzer = MultiStateSamplerAnalyzer(solvent_reporter,max_n_iterations=step) f_ij, df_ij = solvent_analyzer.get_free_energy() self.soldg_history.append(f_ij[1, -2]) self.solddg_history.append(df_ij[1,-2]) self.soldg_history_es.append(f_ij[0, -1]) self.solddg_history_es.append(df_ij[0,-1]) if 'complex' in out: complex_reporter = MultiStateReporter(f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): complex_analyzer = MultiStateSamplerAnalyzer(complex_reporter,max_n_iterations=step) f_ij, df_ij = complex_analyzer.get_free_energy() self.comdg_history.append(f_ij[1, -2]) self.comddg_history.append(df_ij[1,-2]) self.comdg_history_es.append(f_ij[0, -1]) self.comddg_history_es.append(df_ij[0,-1]) return
def get_positions(file): ncfile = open_netcdf(file) all_positions = ncfile.variables['positions'] results = [] for i, pos in enumerate(all_positions): coords = [] pos = pos.tolist() results.append(pos[0]) return results
def historic_fes(self, stepsize=100): """ Function that performs mbar at intervals of the simulation by postprocessing. Can be slow if stepsize is small Parameters ---------- stepsize : int, optional, default=100 number of iterations at which to run MBAR Returns ------- None """ from perses.analysis import utils from simtk import unit import os from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer # find the output files output = [ x for x in os.listdir(self.directory) if x[-3:] == '.nc' and 'checkpoint' not in x ] for out in output: if 'vacuum' in out: vacuum_reporter = MultiStateReporter(f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): vacuum_analyzer = MultiStateSamplerAnalyzer( vacuum_reporter, max_n_iterations=step) f_ij, df_ij = vacuum_analyzer.get_free_energy() f = f_ij[0, -1] * vacuum_analyzer.kT self._vacdg_history.append( f.in_units_of(unit.kilocalories_per_mole)) df = df_ij[0, -1] * vacuum_analyzer.kT self._vacddg_history.append( df.in_units_of(unit.kilocalories_per_mole)) if 'solvent' in out: solvent_reporter = MultiStateReporter( f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): solvent_analyzer = MultiStateSamplerAnalyzer( solvent_reporter, max_n_iterations=step) f_ij, df_ij = solvent_analyzer.get_free_energy() f = f_ij[0, -1] * solvent_analyzer.kT self._soldg_history.append( f.in_units_of(unit.kilocalories_per_mole)) df = df_ij[0, -1] * solvent_analyzer.kT self._solddg_history.append( df.in_units_of(unit.kilocalories_per_mole)) if 'complex' in out: complex_reporter = MultiStateReporter( f'{self.directory}/{out}') ncfile = utils.open_netcdf(f'{self.directory}/{out}') n_iterations = ncfile.variables['last_iteration'][0] for step in range(stepsize, n_iterations, stepsize): complex_analyzer = MultiStateSamplerAnalyzer( complex_reporter, max_n_iterations=step) f_ij, df_ij = complex_analyzer.get_free_energy() f = f_ij[0, -1] * complex_analyzer.kT self._comdg_history.append( f.in_units_of(unit.kilocalories_per_mole)) df = df_ij[0, -1] * complex_analyzer.kT self._comddg_history.append( df.in_units_of(unit.kilocalories_per_mole)) return
import matplotlib.pyplot as plt import os import sys from glob import glob from perses.analysis import utils if __name__ == '__main__': directory = sys.argv[1] files = sorted(glob(os.path.join(os.getcwd(), directory, '*.nc'))) files = [x for x in files if 'checkpoint' not in x] f, axarr = plt.subplots(2, 3, sharex=False, sharey=False, figsize=(16, 8)) for i, filename in enumerate(files): phase = filename.split('-')[1].rstrip('.nc') ncfile = utils.open_netcdf(filename) t0 = ncfile.groups['online_analysis'].variables['t0'] logZ = ncfile.groups['online_analysis'].variables['logZ_history'] n_iterations, n_states = logZ.shape axarr[i, 0].plot(logZ, '.') axarr[i, 0].set_xlabel('iteration') axarr[i, 0].set_ylabel('logZ / kT') axarr[i, 0].set_title('%s_%s' % (phase, directory)) ymin, ymax = axarr[i, 0].get_ylim() axarr[i, 0].vlines(t0, ymin, ymax, linestyles='--', color='grey') states = ncfile.variables['states'] n_iterations, n_replicas = states.shape axarr[i, 1].plot(states, '.') axarr[i, 1].set_xlabel('iteration') axarr[i, 1].set_ylabel('thermodynamic state') axarr[i, 1].axis([0, n_iterations, 0, n_states]) ymin, ymax = axarr[i, 1].get_ylim() axarr[i, 1].vlines(t0, ymin, ymax, linestyles='--', color='grey')