def minimize(thermodynamic_state: states.ThermodynamicState, sampler_state: states.SamplerState, max_iterations: int = 20) -> states.SamplerState: """ Minimize the given system and state, up to a maximum number of steps. Parameters ---------- thermodynamic_state : openmmtools.states.ThermodynamicState The state at which the system could be minimized sampler_state : openmmtools.states.SamplerState The starting state at which to minimize the system. max_iterations : int, optional, default 20 The maximum number of minimization steps. Default is 20. Returns ------- sampler_state : openmmtools.states.SamplerState The posititions and accompanying state following minimization """ mc_move = mcmc.LangevinSplittingDynamicsMove() mcmc_sampler = mcmc.MCMCSampler(thermodynamic_state, sampler_state, mc_move) mcmc_sampler.minimize(max_iterations=max_iterations) return mcmc_sampler.sampler_state
def create_hss(pkl, suffix, selection, checkpoint_interval, n_states): with open(pkl, 'rb') as f: htf = pickle.load(f) lambda_protocol = LambdaProtocol(functions='default') reporter_file = pkl[:-3] + suffix + '.nc' reporter = MultiStateReporter( reporter_file, analysis_particle_indices=htf.hybrid_topology.select(selection), checkpoint_interval=checkpoint_interval) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=4.0 * unit.femtoseconds, collision_rate=5.0 / unit.picosecond, n_steps=250, reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), hybrid_factory=htf, online_analysis_interval=10) hss.setup(n_states=n_states, temperature=300 * unit.kelvin, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=False) return hss, reporter
def create_hss(reporter_name, hybrid_factory, selection_string='all', checkpoint_interval=1, n_states=13): lambda_protocol = LambdaProtocol(functions='default') reporter = MultiStateReporter( reporter_name, analysis_particle_indices=hybrid_factory.hybrid_topology.select( selection_string), checkpoint_interval=checkpoint_interval) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=4.0 * unit.femtoseconds, collision_rate=5.0 / unit.picosecond, n_steps=250, reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), hybrid_factory=hybrid_factory, online_analysis_interval=10) hss.setup(n_states=n_states, temperature=300 * unit.kelvin, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=False) return hss, reporter
def create_integrator(htf, constraint_tol): """ create lambda alchemical states, thermodynamic states, sampler states, integrator, and return context, thermostate, sampler_state, integrator """ fast_lambda_alchemical_state = RelativeAlchemicalState.from_system( htf.hybrid_system) fast_lambda_alchemical_state.set_alchemical_parameters( 0.0, LambdaProtocol(functions='default')) fast_thermodynamic_state = CompoundThermodynamicState( ThermodynamicState(htf.hybrid_system, temperature=temperature), composable_states=[fast_lambda_alchemical_state]) fast_sampler_state = SamplerState( positions=htf._hybrid_positions, box_vectors=htf.hybrid_system.getDefaultPeriodicBoxVectors()) # integrator_1 = integrators.LangevinIntegrator(temperature = temperature, # timestep = 0.5* unit.femtoseconds, # splitting = 'V R O R V', # measure_shadow_work = False, # measure_heat = False, # constraint_tolerance = constraint_tol, # collision_rate = 5.0 / unit.picoseconds) mcmc_moves = mcmc.LangevinSplittingDynamicsMove( timestep=4.0 * unit.femtoseconds, collision_rate=5.0 / unit.picosecond, n_steps=1, reassign_velocities=False, n_restart_attempts=20, splitting="V R O R V", constraint_tolerance=constraint_tol) #print(integrator_1.getConstraintTolerance()) # fast_context, fast_integrator = cache.global_context_cache.get_context(fast_thermodynamic_state, integrator_1) # fast_sampler_state.apply_to_context(fast_context) return mcmc_moves, fast_thermodynamic_state, fast_sampler_state
def run_setup(setup_options, serialize_systems=True, build_samplers=True): """ Run the setup pipeline and return the relevant setup objects based on a yaml input file. Parameters ---------- setup_options : dict result of loading yaml input file Returns ------- setup_dict: dict {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss} - 'topology_proposals': """ phases = setup_options['phases'] known_phases = ['complex', 'solvent', 'vacuum'] for phase in phases: assert ( phase in known_phases ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}" if 'use_given_geometries' not in list(setup_options.keys()): use_given_geometries = False else: assert type(setup_options['use_given_geometries']) == type(True) use_given_geometries = setup_options['use_given_geometries'] if 'complex' in phases: _logger.info(f"\tPulling receptor (as pdb or mol2)...") # We'll need the protein PDB file (without missing atoms) try: protein_pdb_filename = setup_options['protein_pdb'] assert protein_pdb_filename is not None receptor_mol2 = None except KeyError: try: receptor_mol2 = setup_options['receptor_mol2'] assert receptor_mol2 is not None protein_pdb_filename = None except KeyError as e: print( "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation" ) raise e else: protein_pdb_filename = None receptor_mol2 = None # And a ligand file containing the pair of ligands between which we will transform ligand_file = setup_options['ligand_file'] _logger.info(f"\tdetected ligand file: {ligand_file}") # get the indices of ligands out of the file: old_ligand_index = setup_options['old_ligand_index'] new_ligand_index = setup_options['new_ligand_index'] _logger.info( f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}" ) _logger.info(f"\tsetting up forcefield files...") forcefield_files = setup_options['forcefield_files'] if "timestep" in setup_options: if isinstance(setup_options['timestep'], float): timestep = setup_options['timestep'] * unit.femtoseconds else: timestep = setup_options['timestep'] _logger.info(f"\ttimestep: {timestep}.") else: timestep = 1.0 * unit.femtoseconds _logger.info(f"\tno timestep detected: setting default as 1.0fs.") if "neq_splitting" in setup_options: neq_splitting = setup_options['neq_splitting'] _logger.info(f"\tneq_splitting: {neq_splitting}") try: eq_splitting = setup_options['eq_splitting'] _logger.info(f"\teq_splitting: {eq_splitting}") except KeyError as e: print( "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one." ) raise e else: eq_splitting = "V R O R V" neq_splitting = "V R O R V" _logger.info( f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}." ) if "measure_shadow_work" in setup_options: measure_shadow_work = setup_options['measure_shadow_work'] _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.") else: measure_shadow_work = False _logger.info( f"\tno measure_shadow_work specified: defaulting to False.") if isinstance(setup_options['pressure'], float): pressure = setup_options['pressure'] * unit.atmosphere else: pressure = setup_options['pressure'] if isinstance(setup_options['temperature'], float): temperature = setup_options['temperature'] * unit.kelvin else: temperature = setup_options['temperature'] if isinstance(setup_options['solvent_padding'], float): solvent_padding_angstroms = setup_options[ 'solvent_padding'] * unit.angstrom else: solvent_padding_angstroms = setup_options['solvent_padding'] if isinstance(setup_options['ionic_strength'], float): ionic_strength = setup_options['ionic_strength'] * unit.molar else: ionic_strength = setup_options['ionic_strength'] _logger.info(f"\tsetting pressure: {pressure}.") _logger.info(f"\tsetting temperature: {temperature}.") _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.") _logger.info(f"\tsetting ionic strength: {ionic_strength}M.") setup_pickle_file = setup_options[ 'save_setup_pickle_as'] if 'save_setup_pickle_as' in list( setup_options) else None _logger.info(f"\tsetup pickle file: {setup_pickle_file}") trajectory_directory = setup_options['trajectory_directory'] _logger.info(f"\ttrajectory directory: {trajectory_directory}") try: atom_map_file = setup_options['atom_map'] with open(atom_map_file, 'r') as f: atom_map = { int(x.split()[0]): int(x.split()[1]) for x in f.readlines() } _logger.info(f"\tsucceeded parsing atom map.") except Exception: atom_map = None _logger.info(f"\tno atom map specified: default to None.") if 'topology_proposal' not in list(setup_options.keys( )) or setup_options['topology_proposal'] is None: _logger.info( f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n" ) if 'set_solvent_box_dims_to_complex' in list(setup_options.keys( )) and setup_options['set_solvent_box_dims_to_complex']: set_solvent_box_dims_to_complex = True else: set_solvent_box_dims_to_complex = False _logger.info( f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}' ) fe_setup = RelativeFEPSetup( ligand_file, old_ligand_index, new_ligand_index, forcefield_files, phases=phases, protein_pdb_filename=protein_pdb_filename, receptor_mol2_filename=receptor_mol2, pressure=pressure, temperature=temperature, solvent_padding=solvent_padding_angstroms, spectator_filenames=setup_options['spectators'], map_strength=setup_options['map_strength'], atom_expr=setup_options['atom_expr'], bond_expr=setup_options['bond_expr'], atom_map=atom_map, neglect_angles=setup_options['neglect_angles'], anneal_14s=setup_options['anneal_1,4s'], small_molecule_forcefield=setup_options[ 'small_molecule_forcefield'], small_molecule_parameters_cache=setup_options[ 'small_molecule_parameters_cache'], trajectory_directory=trajectory_directory, trajectory_prefix=setup_options['trajectory_prefix'], nonbonded_method=setup_options['nonbonded_method'], complex_box_dimensions=setup_options['complex_box_dimensions'], solvent_box_dimensions=setup_options['solvent_box_dimensions'], ionic_strength=ionic_strength, remove_constraints=setup_options['remove_constraints'], use_given_geometries=use_given_geometries) _logger.info(f"\twriting pickle output...") if setup_pickle_file is not None: with open( os.path.join(os.getcwd(), trajectory_directory, setup_pickle_file), 'wb') as f: try: pickle.dump(fe_setup, f) _logger.info(f"\tsuccessfully dumped pickle.") except Exception as e: print(e) print("\tUnable to save setup object as a pickle") _logger.info( f"\tsetup is complete. Writing proposals and positions for each phase to top_prop dict..." ) else: _logger.info( f"\tsetup is complete. Omitted writing proposals and positions for each phase to top_prop dict..." ) top_prop = dict() for phase in phases: top_prop[f'{phase}_topology_proposal'] = getattr( fe_setup, f'{phase}_topology_proposal') top_prop[f'{phase}_geometry_engine'] = getattr( fe_setup, f'_{phase}_geometry_engine') top_prop[f'{phase}_old_positions'] = getattr( fe_setup, f'{phase}_old_positions') top_prop[f'{phase}_new_positions'] = getattr( fe_setup, f'{phase}_new_positions') top_prop[f'{phase}_added_valence_energy'] = getattr( fe_setup, f'_{phase}_added_valence_energy') top_prop[f'{phase}_subtracted_valence_energy'] = getattr( fe_setup, f'_{phase}_subtracted_valence_energy') top_prop[f'{phase}_logp_proposal'] = getattr( fe_setup, f'_{phase}_logp_proposal') top_prop[f'{phase}_logp_reverse'] = getattr( fe_setup, f'_{phase}_logp_reverse') top_prop[f'{phase}_forward_neglected_angles'] = getattr( fe_setup, f'_{phase}_forward_neglected_angles') top_prop[f'{phase}_reverse_neglected_angles'] = getattr( fe_setup, f'_{phase}_reverse_neglected_angles') top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new top_prop[ 'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map _logger.info(f"\twriting atom_mapping.png") atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory, 'atom_mapping.png') if 'render_atom_map' in list( setup_options.keys()) and setup_options['render_atom_map']: render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old, fe_setup._ligand_oemol_new, fe_setup.non_offset_new_to_old_atom_map) else: _logger.info(f"\tloading topology proposal from yaml setup options...") top_prop = np.load(setup_options['topology_proposal']).item() n_steps_per_move_application = setup_options[ 'n_steps_per_move_application'] _logger.info( f"\t steps per move application: {n_steps_per_move_application}") trajectory_directory = setup_options['trajectory_directory'] trajectory_prefix = setup_options['trajectory_prefix'] _logger.info(f"\ttrajectory prefix: {trajectory_prefix}") if 'atom_selection' in setup_options: atom_selection = setup_options['atom_selection'] _logger.info(f"\tatom selection detected: {atom_selection}") else: _logger.info(f"\tno atom selection detected: default to all.") atom_selection = 'all' if setup_options['fe_type'] == 'neq': _logger.info(f"\tInstantiating nonequilibrium switching FEP") n_equilibrium_steps_per_iteration = setup_options[ 'n_equilibrium_steps_per_iteration'] ncmc_save_interval = setup_options['ncmc_save_interval'] write_ncmc_configuration = setup_options['write_ncmc_configuration'] if setup_options['LSF']: _internal_parallelism = { 'library': ('dask', 'LSF'), 'num_processes': setup_options['processes'] } else: _internal_parallelism = None ne_fep = dict() for phase in phases: _logger.info(f"\t\tphase: {phase}") hybrid_factory = HybridTopologyFactory( top_prop['%s_topology_proposal' % phase], top_prop['%s_old_positions' % phase], top_prop['%s_new_positions' % phase], neglected_new_angle_terms=top_prop[ f"{phase}_forward_neglected_angles"], neglected_old_angle_terms=top_prop[ f"{phase}_reverse_neglected_angles"], softcore_LJ_v2=setup_options['softcore_v2'], interpolate_old_and_new_14s=setup_options['anneal_1,4s']) if build_samplers: ne_fep[phase] = SequentialMonteCarlo( factory=hybrid_factory, lambda_protocol=setup_options['lambda_protocol'], temperature=temperature, trajectory_directory=trajectory_directory, trajectory_prefix=f"{trajectory_prefix}_{phase}", atom_selection=atom_selection, timestep=timestep, eq_splitting_string=eq_splitting, neq_splitting_string=neq_splitting, collision_rate=setup_options['ncmc_collision_rate_ps'], ncmc_save_interval=ncmc_save_interval, internal_parallelism=_internal_parallelism) print("Nonequilibrium switching driver class constructed") return {'topology_proposals': top_prop, 'ne_fep': ne_fep} else: _logger.info(f"\tno nonequilibrium detected.") htf = dict() hss = dict() _logger.info(f"\tcataloging HybridTopologyFactories...") for phase in phases: _logger.info(f"\t\tphase: {phase}:") #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP _logger.info( f"\t\twriting HybridTopologyFactory for phase {phase}...") htf[phase] = HybridTopologyFactory( top_prop['%s_topology_proposal' % phase], top_prop['%s_old_positions' % phase], top_prop['%s_new_positions' % phase], neglected_new_angle_terms=top_prop[ f"{phase}_forward_neglected_angles"], neglected_old_angle_terms=top_prop[ f"{phase}_reverse_neglected_angles"], softcore_LJ_v2=setup_options['softcore_v2'], interpolate_old_and_new_14s=setup_options['anneal_1,4s']) for phase in phases: # Define necessary vars to check energy bookkeeping _top_prop = top_prop['%s_topology_proposal' % phase] _htf = htf[phase] _forward_added_valence_energy = top_prop['%s_added_valence_energy' % phase] _reverse_subtracted_valence_energy = top_prop[ '%s_subtracted_valence_energy' % phase] if not use_given_geometries: zero_state_error, one_state_error = validate_endstate_energies( _top_prop, _htf, _forward_added_valence_energy, _reverse_subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD ) #, trajectory_directory=f'{xml_directory}{phase}') _logger.info(f"\t\terror in zero state: {zero_state_error}") _logger.info(f"\t\terror in one state: {one_state_error}") else: _logger.info( f"'use_given_geometries' was passed to setup; skipping endstate validation" ) #TODO expose more of these options in input if build_samplers: n_states = setup_options['n_states'] _logger.info(f"\tn_states: {n_states}") if 'n_replicas' not in setup_options: n_replicas = n_states else: n_replicas = setup_options['n_replicas'] checkpoint_interval = setup_options['checkpoint_interval'] # generating lambda protocol lambda_protocol = LambdaProtocol( functions=setup_options['protocol-type']) _logger.info( f'Using lambda protocol : {setup_options["protocol-type"]}' ) if atom_selection: selection_indices = htf[phase].hybrid_topology.select( atom_selection) else: selection_indices = None storage_name = str(trajectory_directory) + '/' + str( trajectory_prefix) + '-' + str(phase) + '.nc' _logger.info(f'\tstorage_name: {storage_name}') _logger.info(f'\tselection_indices {selection_indices}') _logger.info(f'\tcheckpoint interval {checkpoint_interval}') reporter = MultiStateReporter( storage_name, analysis_particle_indices=selection_indices, checkpoint_interval=checkpoint_interval) if phase == 'vacuum': endstates = False else: endstates = True if setup_options['fe_type'] == 'fah': _logger.info('SETUP FOR FAH DONE') return { 'topology_proposals': top_prop, 'hybrid_topology_factories': htf } if setup_options['fe_type'] == 'sams': hss[phase] = HybridSAMSSampler( mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=timestep, collision_rate=1.0 / unit.picosecond, n_steps=n_steps_per_move_application, reassign_velocities=False, n_restart_attempts=20, constraint_tolerance=1e-06), hybrid_factory=htf[phase], online_analysis_interval=setup_options['offline-freq'], online_analysis_minimum_iterations=10, flatness_criteria=setup_options['flatness-criteria'], gamma0=setup_options['gamma0']) hss[phase].setup(n_states=n_states, n_replicas=n_replicas, temperature=temperature, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=endstates) elif setup_options['fe_type'] == 'repex': hss[phase] = HybridRepexSampler( mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=timestep, collision_rate=1.0 / unit.picosecond, n_steps=n_steps_per_move_application, reassign_velocities=False, n_restart_attempts=20, constraint_tolerance=1e-06), hybrid_factory=htf[phase], online_analysis_interval=setup_options['offline-freq']) hss[phase].setup(n_states=n_states, temperature=temperature, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=endstates) else: _logger.info(f"omitting sampler construction") if serialize_systems: # save the systems and the states pass _logger.info('WRITING OUT XML FILES') #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop) xml_directory = f'{setup_options["trajectory_directory"]}/xml/' if not os.path.exists(xml_directory): os.makedirs(xml_directory) from perses.utils import data _logger.info('WRITING OUT XML FILES') _logger.info(f'Saving the hybrid, old and new system to disk') data.serialize( htf[phase].hybrid_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz' ) data.serialize( htf[phase]._old_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz' ) data.serialize( htf[phase]._new_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz' ) return { 'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss }
def run_equilibrium(task): """ Run n_iterations*nsteps_equil integration steps. n_iterations mcmc moves are conducted in the initial equilibration, returning n_iterations reduced potentials. This is the guess as to the burn-in time for a production. After which, a single mcmc move of nsteps_equil will be conducted at a time, including a time-series (pymbar) analysis to determine whether the data are decorrelated. The loop will conclude when a single configuration yields an iid sample. This will be saved. Arguments --------- task : EquilibriumFEPTask namedtuple The namedtuple should have an 'input' argument. The 'input' argument is a dict characterized with at least the following keys and values: { thermodynamic_state: (<openmmtools.states.CompoundThermodynamicState>; compound thermodynamic state comprising state at lambda = 0 (1)), nsteps_equil: (<int>; The number of equilibrium steps that a move should make when apply is called), topology: (<mdtraj.Topology>; an MDTraj topology object used to construct the trajectory), n_iterations: (<int>; The number of times to apply the move. Note that this is not the number of steps of dynamics), splitting: (<str>; The splitting string for the dynamics), atom_indices_to_save: (<list of int, default None>; list of indices to save when excluding waters, for instance. If None, all indices are saved.), trajectory_filename: (<str, optional, default None>; Full filepath of trajectory files. If none, trajectory files are not written.), max_size: (<float>; maximum size of the trajectory numpy array allowable until it is written to disk), timer: (<bool, default False>; whether to time all parts of the equilibrium run), _minimize: (<bool, default False>; whether to minimize the sampler_state before conducting equilibration), file_iterator: (<int, default 0>; which index to begin writing files), timestep: (<unit.Quantity=float*unit.femtoseconds>; dynamical timestep) } Returns ------- out_task : EquilibriumFEPTask namedtuple output EquilibriumFEPTask after equilibration """ inputs = task.inputs timer = inputs['timer'] #bool timers = {} file_numsnapshots = [] file_iterator = inputs['file_iterator'] # creating copies in case computation is parallelized if timer: start = time.time() thermodynamic_state = copy.deepcopy(inputs['thermodynamic_state']) sampler_state = task.sampler_state if timer: timers['copy_state'] = time.time() - start if inputs['_minimize']: _logger.debug(f"conducting minimization") if timer: start = time.time() minimize(thermodynamic_state, sampler_state) if timer: timers['minimize'] = time.time() - start #get the atom indices we need to subset the topology and positions if timer: start = time.time() if not inputs['atom_indices_to_save']: atom_indices = list(range(inputs['topology'].n_atoms)) subset_topology = inputs['topology'] else: atom_indices = inputs['atom_indices_to_save'] subset_topology = inputs['topology'].subset(atom_indices) if timer: timers['define_topology'] = time.time() - start n_atoms = subset_topology.n_atoms #construct the MCMove: mc_move = mcmc.LangevinSplittingDynamicsMove( n_steps=inputs['nsteps_equil'], splitting=inputs['splitting'], timestep=inputs['timestep']) mc_move.n_restart_attempts = 10 #create a numpy array for the trajectory trajectory_positions, trajectory_box_lengths, trajectory_box_angles = list( ), list(), list() reduced_potentials = list() #loop through iterations and apply MCMove, then collect positions into numpy array _logger.debug(f"conducting {inputs['n_iterations']} of production") if timer: eq_times = [] init_file_iterator = inputs['file_iterator'] for iteration in tqdm.trange(inputs['n_iterations']): if timer: start = time.time() _logger.debug(f"\tconducting iteration {iteration}") mc_move.apply(thermodynamic_state, sampler_state) #add reduced potential to reduced_potential_final_frame_list reduced_potentials.append( thermodynamic_state.reduced_potential(sampler_state)) #trajectory_positions[iteration, :,:] = sampler_state.positions[atom_indices, :].value_in_unit_system(unit.md_unit_system) trajectory_positions.append( sampler_state.positions[atom_indices, :].value_in_unit_system( unit.md_unit_system)) #get the box lengths and angles a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles( *sampler_state.box_vectors) trajectory_box_lengths.append([a, b, c]) trajectory_box_angles.append([alpha, beta, gamma]) #if tajectory positions is too large, we have to write it to disk and start fresh if np.array(trajectory_positions).nbytes > inputs['max_size']: trajectory = md.Trajectory( np.array(trajectory_positions), subset_topology, unitcell_lengths=np.array(trajectory_box_lengths), unitcell_angles=np.array(trajectory_box_angles)) if inputs['trajectory_filename'] is not None: new_filename = inputs[ 'trajectory_filename'][:-2] + f'{file_iterator:04}' + '.h5' file_numsnapshots.append( (new_filename, len(trajectory_positions))) file_iterator += 1 write_equilibrium_trajectory(trajectory, new_filename) #re_initialize the trajectory positions, box_lengths, box_angles trajectory_positions, trajectory_box_lengths, trajectory_box_angles = list( ), list(), list() if timer: eq_times.append(time.time() - start) if timer: timers['run_eq'] = eq_times _logger.debug(f"production done") #If there is a trajectory filename passed, write out the results here: if timer: start = time.time() if inputs['trajectory_filename'] is not None: #construct trajectory object: if trajectory_positions != list(): #if it is an empty list, then the last iteration satistifed max_size and wrote the trajectory to disk; #in this case, we can just skip this trajectory = md.Trajectory( np.array(trajectory_positions), subset_topology, unitcell_lengths=np.array(trajectory_box_lengths), unitcell_angles=np.array(trajectory_box_angles)) if file_iterator == init_file_iterator: #this means that no files have been written yet new_filename = inputs[ 'trajectory_filename'][:-2] + f'{file_iterator:04}' + '.h5' file_numsnapshots.append( (new_filename, len(trajectory_positions))) else: new_filename = inputs[ 'trajectory_filename'][:-2] + f'{file_iterator+1:04}' + '.h5' file_numsnapshots.append( (new_filename, len(trajectory_positions))) write_equilibrium_trajectory(trajectory, new_filename) if timer: timers['write_traj'] = time.time() - start if not timer: timers = {} out_task = EquilibriumFEPTask(sampler_state=sampler_state, inputs=task.inputs, outputs={ 'reduced_potentials': reduced_potentials, 'files': file_numsnapshots, 'timers': timers }) return out_task
def run_equilibrium( equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState, nsteps_equil: int, topology: md.Topology, n_iterations: int, atom_indices_to_save: List[int] = None, trajectory_filename: str = None, splitting: str = "V R O R V", timestep: unit.Quantity = 1.0 * unit.femtoseconds ) -> EquilibriumResult: """ Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting trajectory; these are the result of 100,000 steps (1000*100) of dynamics. Parameters ---------- equilibrium_result : EquilibriumResult EquilibriumResult namedtuple containing the information necessary to resume thermodynamic_state : openmmtools.states.ThermodynamicState The thermodynamic state (including context parameters) that should be used nsteps_equil : int The number of equilibrium steps that a move should make when apply is called topology : mdtraj.Topology an MDTraj topology object used to construct the trajectory n_iterations : int The number of times to apply the move. Note that this is not the number of steps of dynamics; it is n_iterations*n_steps (which is set in the MCMove). splitting: str, default "V R O H R V" The splitting string for the dynamics atom_indices_to_save : list of int, default None list of indices to save (when excluding waters, for instance). If None, all indices are saved. trajectory_filename : str, optional, default None Full filepath of trajectory files. If none, trajectory files are not written. splitting: str, default "V R O H R V" The splitting string for the dynamics Returns ------- equilibrium_result : EquilibriumResult Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the final frame. """ sampler_state = equilibrium_result.sampler_state #get the atom indices we need to subset the topology and positions if atom_indices_to_save is None: atom_indices = list(range(topology.n_atoms)) subset_topology = topology else: subset_topology = topology.subset(atom_indices_to_save) atom_indices = atom_indices_to_save n_atoms = subset_topology.n_atoms #construct the MCMove: mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil, splitting=splitting) mc_move.n_restart_attempts = 10 #create a numpy array for the trajectory trajectory_positions = np.zeros([n_iterations, n_atoms, 3]) trajectory_box_lengths = np.zeros([n_iterations, 3]) trajectory_box_angles = np.zeros([n_iterations, 3]) #loop through iterations and apply MCMove, then collect positions into numpy array for iteration in range(n_iterations): mc_move.apply(thermodynamic_state, sampler_state) trajectory_positions[iteration, :] = sampler_state.positions[ atom_indices, :].value_in_unit_system(unit.md_unit_system) #get the box lengths and angles a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles( *sampler_state.box_vectors) trajectory_box_lengths[iteration, :] = [a, b, c] trajectory_box_angles[iteration, :] = [alpha, beta, gamma] #construct trajectory object: trajectory = md.Trajectory(trajectory_positions, subset_topology, unitcell_lengths=trajectory_box_lengths, unitcell_angles=trajectory_box_angles) #get the reduced potential from the final frame for endpoint perturbations reduced_potential_final_frame = thermodynamic_state.reduced_potential( sampler_state) #construct equilibrium result object equilibrium_result = EquilibriumResult(sampler_state, reduced_potential_final_frame) #If there is a trajectory filename passed, write out the results here: if trajectory_filename is not None: write_equilibrium_trajectory(equilibrium_result, trajectory, trajectory_filename) return equilibrium_result
def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None): self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules] environments = ['explicit', 'vacuum'] temperature = 298.15 * unit.kelvin pressure = 1.0 * unit.atmospheres constraints = app.HBonds self._storage = NetCDFStorage(output_filename) self._ncmc_switching_times = ncmc_switching_times self._n_equilibrium_steps = equilibrium_steps self._geometry_options = geometry_options # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator system_generators = dict() from pkg_resources import resource_filename gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom, 'implicitSolvent': None, 'constraints': constraints, 'ewaldErrorTolerance': 1e-5, 'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME} barostat=barostat) system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], forcefield_kwargs={'implicitSolvent': None, 'constraints': constraints, 'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}) # # Create topologies and positions # topologies = dict() positions = dict() from openmoltools import forcefield_generators forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) # Create molecule in vacuum. from perses.utils.openeye import extractPositionsFromOEMol from openmoltools.openeye import smiles_to_oemol, generate_conformers if initial_molecule: smiles = initial_molecule else: smiles = np.random.choice(molecules) molecule = smiles_to_oemol(smiles) molecule = generate_conformers(molecule, max_confs=1) topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) positions['vacuum'] = extractPositionsFromOEMol(molecule) # Create molecule in solvent. modeller = app.Modeller(topologies['vacuum'], positions['vacuum']) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) topologies['explicit'] = modeller.getTopology() positions['explicit'] = modeller.getPositions() # Set up the proposal engines. proposal_metadata = {} proposal_engines = dict() for environment in environments: proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules, system_generators[environment]) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) # Define thermodynamic state of interest. thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() exen_samplers = dict() sams_samplers = dict() for environment in environments: storage = NetCDFStorageView(self._storage, envname=environment) if self._geometry_options: n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment] use_sterics = self._geometry_options['use_sterics'][environment] else: n_torsion_divisions = 180 use_sterics = False geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics) move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V", n_restart_attempts=10) chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment]) if environment == 'explicit': sampler_state = states.SamplerState(positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors()) else: sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move) exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], geometry_engine, options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment]) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True # Create test MultiTargetDesign sampler. from perses.samplers.samplers import MultiTargetDesign target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0} designer = MultiTargetDesign(target_samplers, storage=self._storage) # Store things. self.molecules = molecules self.environments = environments self.topologies = topologies self.positions = positions self.system_generators = system_generators self.proposal_engines = proposal_engines self.thermodynamic_states = thermodynamic_states self.mcmc_samplers = mcmc_samplers self.exen_samplers = exen_samplers self.sams_samplers = sams_samplers self.designer = designer
def __init__(self, topology_proposal, pos_old, new_positions, use_dispersion_correction=False, forward_functions=None, ncmc_nsteps=100, nsteps_per_iteration=1, concurrency=4, platform_name="OpenCL", temperature=300.0 * unit.kelvin, trajectory_directory=None, trajectory_prefix=None): #construct the hybrid topology factory object self._factory = HybridTopologyFactory( topology_proposal, pos_old, new_positions, use_dispersion_correction=use_dispersion_correction) #use default functions if none specified if forward_functions == None: self._forward_functions = self.default_forward_functions else: self._forward_functions = forward_functions #reverse functions to get a symmetric protocol self._reverse_functions = { param: param_formula.replace("lambda", "(1-lambda)") for param, param_formula in self._forward_functions.items() } #set up some class attributes self._hybrid_system = self._factory.hybrid_system self._initial_hybrid_positions = self._factory.hybrid_positions self._concurrency = concurrency self._ncmc_nsteps = ncmc_nsteps self._nsteps_per_iteration = nsteps_per_iteration self._trajectory_prefix = trajectory_prefix self._trajectory_directory = trajectory_directory self._zero_endpoint_n_atoms = topology_proposal.n_atoms_old self._one_endpoint_n_atoms = topology_proposal.n_atoms_new #initialize lists for results self._forward_nonequilibrium_trajectories = [] self._reverse_nonequilibrium_trajectories = [] self._forward_nonequilibrium_cumulative_works = [] self._reverse_nonequilibrium_cumulative_works = [] self._forward_nonequilibrium_results = [] self._reverse_nonequilibrium_results = [] self._forward_total_work = [] self._reverse_total_work = [] self._lambda_zero_reduced_potentials = [] self._lambda_one_reduced_potentials = [] self._nonalchemical_zero_endpt_reduced_potentials = [] self._nonalchemical_one_endpt_reduced_potentials = [] self._nonalchemical_zero_results = [] self._nonalchemical_one_results = [] #Set the number of times that the nonequilbrium move will have to be run in order to complete a protocol: if self._ncmc_nsteps % self._nsteps_per_iteration != 0: logging.warning( "The number of ncmc steps is not divisible by the number of steps per iteration. You may not have a full protocol." ) self._n_iterations_per_call = self._ncmc_nsteps // self._nsteps_per_iteration #create the thermodynamic state lambda_zero_alchemical_state = alchemy.AlchemicalState.from_system( self._hybrid_system) lambda_one_alchemical_state = copy.deepcopy( lambda_zero_alchemical_state) #ensure their states are set appropriately lambda_zero_alchemical_state.set_alchemical_parameters(0.0) lambda_one_alchemical_state.set_alchemical_parameters(0.0) #create the base thermodynamic state with the hybrid system self._thermodynamic_state = ThermodynamicState(self._hybrid_system, temperature=temperature) #Create thermodynamic states for the nonalchemical endpoints self._nonalchemical_zero_thermodynamic_state = ThermodynamicState( topology_proposal.old_system, temperature=temperature) self._nonalchemical_one_thermodynamic_state = ThermodynamicState( topology_proposal.new_system, temperature=temperature) #Now create the compound states with different alchemical states self._lambda_zero_thermodynamic_state = CompoundThermodynamicState( self._thermodynamic_state, composable_states=[lambda_zero_alchemical_state]) self._lambda_one_thermodynamic_state = CompoundThermodynamicState( self._thermodynamic_state, composable_states=[lambda_one_alchemical_state]) #create the forward and reverse integrators self._forward_integrator = AlchemicalNonequilibriumLangevinIntegrator( alchemical_functions=self._forward_functions, nsteps_neq=ncmc_nsteps, temperature=temperature) self._reverse_integrator = AlchemicalNonequilibriumLangevinIntegrator( alchemical_functions=self._reverse_functions, nsteps_neq=ncmc_nsteps, temperature=temperature) #create the forward and reverse MCMoves self._forward_ne_mc_move = NonequilibriumSwitchingMove( self._forward_integrator, self._nsteps_per_iteration) self._reverse_ne_mc_move = NonequilibriumSwitchingMove( self._reverse_integrator, self._nsteps_per_iteration) #create the equilibrium MCMove self._equilibrium_mc_move = mcmc.LangevinSplittingDynamicsMove() #set the SamplerState for the lambda 0 and 1 equilibrium simulations self._lambda_one_sampler_state = SamplerState( self._initial_hybrid_positions, box_vectors=self._hybrid_system.getDefaultPeriodicBoxVectors()) self._lambda_zero_sampler_state = copy.deepcopy( self._lambda_one_sampler_state) #initialize by minimizing self.minimize() #initialize the trajectories for the lambda 0 and 1 equilibrium simulations a_0, b_0, c_0, alpha_0, beta_0, gamma_0 = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles( *self._lambda_zero_sampler_state.box_vectors) a_1, b_1, c_1, alpha_1, beta_1, gamma_1 = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles( *self._lambda_one_sampler_state.box_vectors) self._lambda_zero_traj = md.Trajectory( np.array(self._lambda_zero_sampler_state.positions), self._factory.hybrid_topology, unitcell_lengths=[a_0, b_0, c_0], unitcell_angles=[alpha_0, beta_0, gamma_0]) self._lambda_one_traj = md.Trajectory( np.array(self._lambda_one_sampler_state.positions), self._factory.hybrid_topology, unitcell_lengths=[a_1, b_1, c_1], unitcell_angles=[alpha_1, beta_1, gamma_1])