def __init__(self, ff_type=None, system_file=None, **kwargs): """Two pathways can be used starting from FF-specific files or OpenMM XML system. Additional kwargs are variously used depending on the forcefield / pathway that was chosen. supported ff_type ----------------- amber :: give a prmtop and an inpcrd openmm :: give an XML file for the system supported kwargs ---------------- topology :: system-specific or not depending on FF coordinates :: source of coordinates for initial state """ assert (ff_type is None) or (system_file is None) # This dict will store the API calls # along with atom groups and force # parameters needed to generate all # the given restraints self._restraints = dict() self._topology = None topofile = kwargs.get("topology", None) coordfile = kwargs.get("coordinates", None) if ff_type is not None: if ff_type.lower() == "amber": prmtop = AmberPrmtopFile(topofile) inpcrd = AmberInpcrdFile(coordfile) self.system = prmtop.createSystem( nonbondedMethod=NoCutoff ) #CutoffNonPeriodic - according to Ada, this would be good bc its what amber does - preliminary tests show that this hurts small/medium proteins self._topology = Topology.from_openmm(prmtop.topology) self._positions = inpcrd elif system_file is not None: self.load_xml(system_file) if topofile: if topofile.endswith(".pdb"): # this line is a bit silly but Topology class # doesn't seem to directly load PDB so keeps # the imports clean self._topology = Topology.from_openmm( PDBFile(topofile).topology) else: # Inspect and set ff_type # TODO ff_type as instance attribute pass
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format assert np.all(chainid == '' for chainid in ChainID), 'Im not prepaed to parse multiple chains' chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, str): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, str): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] element = mdtraj.pdb.element.get_by_symbol(element_symbol) topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] element = mdtraj.pdb.element.get_by_symbol(element_symbol) topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def show_conformations(self,centers_indices=None, rotations_indices=None, nodes_labels=None, least_rmsd_fit='receptor', center_rmsd_fit='receptor'): tmp_molcomplex = self.get_conformations(centers_indices, rotations_indices, nodes_labels) tmp_mdtraj_topol = _mdtraj_topology.from_openmm(tmp_molcomplex.topology) tmp_mdtraj_traj = _mdtraj_trajectory(tmp_molcomplex.positions/unit.nanometer,tmp_mdtraj_topol) tmp_view = _nv_show_mdtraj(tmp_mdtraj_traj) del(tmp_molcomplex, tmp_mdtraj_topol, tmp_mdtraj_traj) return tmp_view
def subset(self, selector): """ Returns a list of atom indices corresponding to a MDTraj DSL query. Also will accept list of numbers, which will be coerced to int and returned. """ if isinstance(selector, (list, tuple)): return map(int, selector) selector = SELECTORS.get(selector, selector) mdtop = MDTrajTopology.from_openmm(self.handler.topology) return mdtop.select(selector)
def _map_topology(self): """ Create CG topology from given topology and mapping """ # Ensure that a trajectory has been loaded if self._aa_traj is None: raise OutOfOrderError("An atomistic trajectory has not " "been loaded into this Mapper yet.") self._atom_bead_mapping = dict() self._cg_top = Topology() self._solvent_counter = 0 # Loop over all residues for residue in self._aa_top.residues: if residue.name == self._solvent_name: self._map_solvent_top(residue) else: self._map_nonsolvent_top(residue)
def add_mol_to_topology(self, coords: np.ndarray, types: np.ndarray, topology: mdtraj.Topology): assert coords.shape[0] == types.shape[0] assert coords.ndim == 2 chain = topology.add_chain() # Convert types to symbols if types.ndim == 2: seqs = np.argmax(types.copy(), axis=1) atms = [self.sequential_to_atomic_number()[t] for t in seqs] syms = [atomic_number_to_symbol()[t] for t in atms] elif types.ndim == 1: syms = [atomic_number_to_symbol()[t] for t in types] else: raise ValueError( "Types must either be one hot vectors with ndim==2 XOR numbers with ndim==1." ) for i, s in enumerate(syms): res = topology.add_residue("mol_{}".format(i), chain) topology.add_atom(s, mdtraj.element.get_by_symbol(s), res)
def _traj_from_xyza(xyz, atomic_numbers, units='nm'): """ Parameters ---------- xyz : np.array, float, shape( num_atom, 3) array of x,y,z,a atomic_numbers : np.array, int, shape( num_atom, 1 ) the atomic numbers of each of the atoms. Optional Parameters ------------------- units : str if units == 'nm' then nothing happens. if units == 'ang' then we convert them to nm. Returns ------- structure : mdtraj.trajectory A meta-data minimal mdtraj instance """ if units == 'ang': xyz /= 10. top = Topology() chain = top.add_chain() residue = top.add_residue('XXX', chain) for i in range(xyz.shape[0]): element_symb = periodic_table[atomic_numbers[i]][1] # should give symbol element = Element.getBySymbol(element_symb) name = '%s' % element_symb top.add_atom(name, element, residue) structure = Trajectory(xyz=xyz, topology=top) return structure
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] try: element = elem.get_by_symbol(element_symbol) except KeyError: element = None topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def test_3nch_serial_resSeq(): # If you use zero-based indexing, this PDB has quite large gaps in residue and atom numbering, so it's a good test case. See #528 # Gold standard values obtained via # cat 3nch.pdb |grep ATM|tail -n 5 # HETATM19787 S SO4 D 804 -4.788 -9.395 22.515 1.00121.87 S # HETATM19788 O1 SO4 D 804 -3.815 -9.511 21.425 1.00105.97 O # HETATM19789 O2 SO4 D 804 -5.989 -8.733 21.999 1.00116.13 O # HETATM19790 O3 SO4 D 804 -5.130 -10.726 23.043 1.00108.74 O # HETATM19791 O4 SO4 D 804 -4.210 -8.560 23.575 1.00112.54 O t1 = load_pdb(get_fn('3nch.pdb.gz')) top, bonds = t1.top.to_dataframe() top2 = Topology.from_dataframe(top, bonds) eq(t1.top, top2) top = top.set_index('serial') # Index by the actual data in the PDB eq(str(top.ix[19791]["name"]), "O4") eq(str(top.ix[19787]["name"]), "S") eq(str(top.ix[19787]["resName"]), "SO4") eq(int(top.ix[19787]["resSeq"]), 804)
def to_mdtraj_Topology(item, atom_indices='all', check=True): if check: digest_item(item, 'molsysmt.Topology') atom_indices = digest_atom_indices(atom_indices) try: from mdtraj import Topology from mdtraj.core import element except: raise LibraryNotFound('mdtraj') n_atoms = item.atoms_dataframe.shape[0] atom_index_array = item.atoms_dataframe["atom_index"].to_numpy() atom_name_array = item.atoms_dataframe["atom_name"].to_numpy() atom_id_array = item.atoms_dataframe["atom_id"].to_numpy() atom_type_array = item.atoms_dataframe["atom_type"].to_numpy() group_index_array = item.atoms_dataframe["group_index"].to_numpy() group_name_array = item.atoms_dataframe["group_name"].to_numpy() group_id_array = item.atoms_dataframe["group_id"].to_numpy() group_type_array = item.atoms_dataframe["group_type"].to_numpy() chain_index_array = item.atoms_dataframe["chain_index"].to_numpy() chain_name_array = item.atoms_dataframe["chain_name"].to_numpy() chain_id_array = item.atoms_dataframe["chain_id"].to_numpy() chain_type_array = item.atoms_dataframe["chain_type"].to_numpy() bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy() bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy() tmp_item = Topology() former_group_index = -1 former_chain_index = -1 list_new_atoms = [] for ii in range(n_atoms): atom_index = atom_index_array[ii] atom_name = atom_name_array[ii] atom_id = atom_id_array[ii] atom_type = atom_type_array[ii] group_index = group_index_array[ii] chain_index = chain_index_array[ii] new_group = (former_group_index != group_index) new_chain = (former_chain_index != chain_index) if new_chain: chain = tmp_item.add_chain() former_chain_index = chain_index if new_group: residue_name = group_name_array[ii] residue_id = group_id_array[ii] residue = tmp_item.add_residue(residue_name, chain, resSeq=str(residue_id)) former_group_index = group_index elem = element.get_by_symbol(atom_type) atom = tmp_item.add_atom(atom_name, elem, residue) list_new_atoms.append(atom) for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2): tmp_item.add_bond( list_new_atoms[atom_1], list_new_atoms[atom_2]) # falta bond type and bond order return tmp_item
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """%mapping_options.keys() if not len(atom_indices_list)==len(bead_label_list): raise ValueError("Must supply a list of bead labels of the same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label)>4 or len(bead_label)<1: raise ValueError("Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"%bead_label) bead_label_list = [ bead_label.upper() for bead_label in bead_label_list ] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"%mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list),dtype=int) elif len(chain_list)!=len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list)!=len(atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list)!=len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') forces = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=np.double,order='C') columns = ["serial","name","element","resSeq","resName","chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0],trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array([a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],dtype=np.float64) map_coords(xyz_i,trj.xyz,atom_indices,masses_i,unitcell_lengths=trj.unitcell_lengths) xyz[:,i,:] = xyz_i if "forces" in trj.__dict__ and len(trj.forces)>0: forces_i = map_forces(trj,atom_indices) forces[:,i,:] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label='%2s'%('B%i'%(len(element_label_dict)%10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper() not in element.Element._elements_by_symbol: element.Element(1000+resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append( [i,bead_label,element_label,resSeq,'%3s'%bead_label,chain_list[i]] ) df = pd.DataFrame(topology_labels,columns=columns) topology = Topology.from_dataframe(df,bonds=bonds) if segment_id_list is not None: for beadidx,bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def run_protocol(equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState, alchemical_functions: dict, nstep_neq: int, topology: md.Topology, work_save_interval: int, splitting: str = "V R O H R V", atom_indices_to_save: List[int] = None, trajectory_filename: str = None, write_configuration: bool = False, timestep: unit.Quantity = 1.0 * unit.femtoseconds, measure_shadow_work: bool = False) -> NonequilibriumResult: """ Perform a nonequilibrium switching protocol and return the nonequilibrium protocol work. Note that it is expected that this will perform an entire protocol, that is, switching lambda completely from 0 to 1, in increments specified by the ne_mc_move. The trajectory that results, along with the work values, will contain n_iterations elements. Parameters ---------- equilibrium_result : EquilibriumResult namedtuple The result of an equilibrium simulation thermodynamic_state : openmmtools.states.ThermodynamicState The thermodynamic state at which to run the protocol alchemical_functions : dict The alchemical functions to use for switching nstep_neq : int The number of nonequilibrium steps in the protocol topology : mdtraj.Topology An MDtraj topology for the system to generate trajectories work_save_interval : int How often to write the work and, if requested, configurations splitting : str, default "V R O H R V" The splitting string to use for the Langevin integration atom_indices_to_save : list of int, default None list of indices to save (when excluding waters, for instance). If None, all indices are saved. trajectory_filename : str, default None Full filepath of output trajectory, if desired. If None, no trajectory file is written. write_configuration : bool, default False Whether to also write configurations of the trajectory at the requested interval. timestep : unit.Quantity, default 1 fs The timestep to use in the integrator Returns ------- nonequilibrium_result : NonequilibriumResult result object containing the trajectory of the nonequilibrium calculation, as well as the cumulative work for each frame. """ #get the sampler state needed for the simulation sampler_state = equilibrium_result.sampler_state temperature = thermodynamic_state.temperature #get the atom indices we need to subset the topology and positions if atom_indices_to_save is None: atom_indices = list(range(topology.n_atoms)) subset_topology = topology else: subset_topology = topology.subset(atom_indices_to_save) atom_indices = atom_indices_to_save ne_mc_move = NonequilibriumSwitchingMove( alchemical_functions, splitting, temperature, nstep_neq, timestep, work_save_interval, subset_topology, atom_indices, save_configuration=write_configuration, measure_shadow_work=measure_shadow_work) ne_mc_move.reset() #apply the nonequilibrium move ne_mc_move.apply(thermodynamic_state, sampler_state) #get the cumulative work cumulative_work = ne_mc_move.cumulative_work #get the protocol work protocol_work = ne_mc_move.protocol_work #if we're measuring shadow work, get that. Otherwise just fill in zeros: if measure_shadow_work: shadow_work = ne_mc_move.shadow_work else: shadow_work = np.zeros_like(protocol_work) #create a result object and return that nonequilibrium_result = NonequilibriumResult(cumulative_work, protocol_work, shadow_work) #if desired, write nonequilibrium trajectories: if trajectory_filename is not None: #to get the filename for cumulative work, replace the extension of the trajectory file with .cw.npy filepath_parts = trajectory_filename.split(".") cw_filepath_parts = copy.deepcopy(filepath_parts) pw_filepath_parts = copy.deepcopy(filepath_parts) if measure_shadow_work: sw_filepath_parts = copy.deepcopy(filepath_parts) sw_filepath_parts[-1] = "sw.npy" shad_work_filepath = ".".join(sw_filepath_parts) cw_filepath_parts[-1] = "cw.npy" pw_filepath_parts[-1] = "pw.npy" cum_work_filepath = ".".join(cw_filepath_parts) prot_work_filepath = ".".join(pw_filepath_parts) #if writing configurations was requested, get the trajectory if write_configuration: try: trajectory = ne_mc_move.trajectory write_nonequilibrium_trajectory(nonequilibrium_result, trajectory, trajectory_filename) except NoTrajectoryException: pass np.save(cum_work_filepath, nonequilibrium_result.cumulative_work) np.save(prot_work_filepath, nonequilibrium_result.protocol_work) if measure_shadow_work: np.save(shad_work_filepath, shadow_work) return nonequilibrium_result
def run_equilibrium( equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState, nsteps_equil: int, topology: md.Topology, n_iterations: int, atom_indices_to_save: List[int] = None, trajectory_filename: str = None, splitting: str = "V R O R V", timestep: unit.Quantity = 1.0 * unit.femtoseconds ) -> EquilibriumResult: """ Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting trajectory; these are the result of 100,000 steps (1000*100) of dynamics. Parameters ---------- equilibrium_result : EquilibriumResult EquilibriumResult namedtuple containing the information necessary to resume thermodynamic_state : openmmtools.states.ThermodynamicState The thermodynamic state (including context parameters) that should be used nsteps_equil : int The number of equilibrium steps that a move should make when apply is called topology : mdtraj.Topology an MDTraj topology object used to construct the trajectory n_iterations : int The number of times to apply the move. Note that this is not the number of steps of dynamics; it is n_iterations*n_steps (which is set in the MCMove). splitting: str, default "V R O H R V" The splitting string for the dynamics atom_indices_to_save : list of int, default None list of indices to save (when excluding waters, for instance). If None, all indices are saved. trajectory_filename : str, optional, default None Full filepath of trajectory files. If none, trajectory files are not written. splitting: str, default "V R O H R V" The splitting string for the dynamics Returns ------- equilibrium_result : EquilibriumResult Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the final frame. """ sampler_state = equilibrium_result.sampler_state #get the atom indices we need to subset the topology and positions if atom_indices_to_save is None: atom_indices = list(range(topology.n_atoms)) subset_topology = topology else: subset_topology = topology.subset(atom_indices_to_save) atom_indices = atom_indices_to_save n_atoms = subset_topology.n_atoms #construct the MCMove: mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil, splitting=splitting) mc_move.n_restart_attempts = 10 #create a numpy array for the trajectory trajectory_positions = np.zeros([n_iterations, n_atoms, 3]) trajectory_box_lengths = np.zeros([n_iterations, 3]) trajectory_box_angles = np.zeros([n_iterations, 3]) #loop through iterations and apply MCMove, then collect positions into numpy array for iteration in range(n_iterations): mc_move.apply(thermodynamic_state, sampler_state) trajectory_positions[iteration, :] = sampler_state.positions[ atom_indices, :].value_in_unit_system(unit.md_unit_system) #get the box lengths and angles a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles( *sampler_state.box_vectors) trajectory_box_lengths[iteration, :] = [a, b, c] trajectory_box_angles[iteration, :] = [alpha, beta, gamma] #construct trajectory object: trajectory = md.Trajectory(trajectory_positions, subset_topology, unitcell_lengths=trajectory_box_lengths, unitcell_angles=trajectory_box_angles) #get the reduced potential from the final frame for endpoint perturbations reduced_potential_final_frame = thermodynamic_state.reduced_potential( sampler_state) #construct equilibrium result object equilibrium_result = EquilibriumResult(sampler_state, reduced_potential_final_frame) #If there is a trajectory filename passed, write out the results here: if trajectory_filename is not None: write_equilibrium_trajectory(equilibrium_result, trajectory, trajectory_filename) return equilibrium_result
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError( "Must supply a list of bead labels of the same length as a list of selected atom indices" ) for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError( "Specified bead label '%s' is not valid, must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s" % mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError( "Supplied chain_list must be of the same length as a list of selected atom indices" ) if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError( "Supplied segment_id_list must be of the same length as a list of selected atom indices" ) if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError( "Supplied resSeq_list must be of the same length as a list of selected atom indices" ) n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0], trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array( [a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]], dtype=np.float64) map_coords(xyz_i, trj.xyz, atom_indices, masses_i, unitcell_lengths=trj.unitcell_lengths) xyz[:, i, :] = xyz_i if "forces" in trj.__dict__ and len(trj.forces) > 0: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
class Mapper: """ An object to convert an atomistic system to a CG system Attributes ---------- mappings : dict A dictionary containing the {name : mapping} for each residue solvent_mapping : int, default=4 Number of solvent molecules to map to a single bead via k-means clustering solvent_name : string, default='tip3p' Name of solvent residue in atomistic system aa_traj : mdtraj.Trajectory The atomistic trajectory to convert to CG aa_top: mdtraj.Topology The atomistic topology to convert to CG cg_traj : mdtraj.Trajectory The CG trajectory to converted from atomistic cg_top: mdtraj.Topology The CG topology to convert from atomistic """ def __init__(self, solvent_name='tip3p', solvent_mapping=4): self._mappings = dict() self._solvent_mapping = solvent_mapping self._solvent_name = solvent_name self._cg_traj = None self._cg_top = None @property def mappings(self): return deepcopy(self._mappings) @mappings.setter def mappings(self): raise TypeError("'mappings' attribute does not support assignment") def load_trajectory(self, trajectory): self._aa_traj = trajectory self._aa_top = trajectory.top def load_mapping_dir(self, mapping_dir=None, **kwargs): """ Load all mapping files from a directory. Arguments: ---------- mapping_dir : string, default=None Path to the directory containing mapping files. Loads from the internal `mappings` directory by default **kwargs : keyword arguments Keyword arguments to pass to mapping_dir. Namely the ff arg. """ if mapping_dir is None: mapping_dir = default_mapping_dir(**kwargs) assert path.exists(mapping_dir) for filename in glob.glob("{}/*map".format(mapping_dir)): self.load_mapping(filename) def load_mapping(self, filename_or_mapping): """ Load a single mapping file from disk. Arguments: ---------- filename : string or ResMapping Path to the mapping file or ResMapping object to add to library """ if isinstance(filename_or_mapping, ResMapping): self._mappings.update({ filename_or_mapping.name : filename_or_mapping}) else: assert path.exists(filename_or_mapping) name = path.basename(filename_or_mapping).split(".")[0] mapping = ResMapping.load(name, filename_or_mapping) self._mappings.update({name : mapping}) def cg_map(self): """ Execute full CG mapping pipeline and return the CG trajectory """ if self._cg_traj is None: self._map_topology() self._convert_xyz() self._construct_traj() return self._cg_traj def _map_topology(self): """ Create CG topology from given topology and mapping """ # Ensure that a trajectory has been loaded if self._aa_traj is None: raise OutOfOrderError("An atomistic trajectory has not " "been loaded into this Mapper yet.") self._atom_bead_mapping = dict() self._cg_top = Topology() self._solvent_counter = 0 # Loop over all residues for residue in self._aa_top.residues: if residue.name == self._solvent_name: self._map_solvent_top(residue) else: self._map_nonsolvent_top(residue) def _map_solvent_top(self, residue): """ Create CG solvent residue from given residue and add it to the CG topology. Arguments: ---------- residue: mdtraj.topology.Residue The atomistic residue to be mapped to CG """ self._solvent_counter += 1 if self._solvent_counter % self._solvent_mapping == 0: cg_residue = self._cg_top.add_residue(self._solvent_name, self._cg_top.add_chain()) cg_bead = CGBead(bead_type=self._solvent_name) mdtraj_bead = self._cg_top.add_atom(self._solvent_name, None, cg_residue) self._atom_bead_mapping[mdtraj_bead] = cg_bead return cg_residue def _map_nonsolvent_top(self, residue): """ Create CG non-solvent residue from given residue and add it to the CG topology. Arguments: ---------- residue: mdtraj.topology.Residue The atomistic residue to be mapped to CG """ # Obtain the correct molecule mapping based on the residue res_mapping = self._mappings[residue.name] # Add an empty residue to the CG topology cg_residue = self._cg_top.add_residue( residue.name, self._cg_top.add_chain()) # Make a list of atoms in the residue atoms = np.array([atom.index for atom in residue.atoms]) # Make an empty list to store beads cg_beads = [] # Create CG beads for each bead in the mapping for bead in res_mapping.beads: bead_atoms = atoms.take(bead.mapping_indices) cg_bead = CGBead(bead_type=bead.name, atom_indices=bead_atoms) mdtraj_bead = self._cg_top.add_atom(cg_bead.bead_type, None, cg_residue) cg_beads.append(mdtraj_bead) self._atom_bead_mapping[mdtraj_bead] = cg_bead # Add bonds to topology for index_i, index_j in res_mapping.bonds: self._cg_top.add_bond(cg_beads[int(index_i)], cg_beads[int(index_j)]) return cg_residue def _convert_xyz(self): """ Take atomistic trajectory and convert to CG trajectory """ cg_xyz = [] for bead in self._cg_top.atoms: if bead.name == self._solvent_name: bead_xyz = np.zeros((self._aa_traj.n_frames,3)) else: atom_indices = self._atom_bead_mapping[bead].atom_indices masses = np.array([self._aa_top.atom(i).element.mass for i in atom_indices]) bead_xyz = (np.sum((self._aa_traj.xyz[:,atom_indices,:] * masses[None,:,None]), axis=1) / np.sum(masses)) cg_xyz.append(bead_xyz) cg_xyz = np.array(cg_xyz) cg_xyz = np.swapaxes(cg_xyz, 0, 1) # Figure out at which coarse grain index the waters start # Perform kmeans, frame-by-frame, over all water residues # Workers will return centers of masses of clusters, frame index, and cg index # Master will assign to CG_xyz if self._solvent_counter > 0: with Pool(cpu_count()) as pool: chunksize = int(self._aa_traj.n_frames / cpu_count()) + 1 args = list(zip(self._aa_traj, [self._solvent_mapping]*self._aa_traj.n_frames, [self._solvent_name]*self._aa_traj.n_frames)) coms = pool.starmap(_map_solvent, args, chunksize) pool.join() coms = np.squeeze(np.array(coms)) cg_xyz[:,self._cg_top.select(f"name {self._solvent_name}"),:] = coms self._cg_xyz = cg_xyz def _construct_traj(self): """ Create an mdtraj.Trajectory from the CG topology and xyz. """ cg_traj = Trajectory(self._cg_xyz, self._cg_top, time=self._aa_traj.time, unitcell_lengths=self._aa_traj.unitcell_lengths, unitcell_angles=self._aa_traj.unitcell_angles) self._cg_traj = cg_traj
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, split_shared_atoms=False, mod_weights_list=None, mapping_function="com", charge_tol=1e-5, center_postwrap=False): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology split_shared_atoms: boolean If specified, check to see if atoms are shared per molecule in beads. If so, equally divide their weight accordingly for each bead. mapping_function: string, default='com': how to map xyz coordinates options: %s center_postwrap: Boolean Whether to wrap the CG system after it is mapped. Assumes that box is centered at 0, and only has effect if periodic information is present. Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError("Must supply a list of bead labels of the " "same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError("Specified bead label '%s' is not valid, \ must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"\ %mapping_options.keys()) if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length " "as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same " "length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same " "length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] #total masse for each cg bead. masses = np.zeros((n_beads), dtype=np.float64) #list of masses for elements in cg bead. masses_i = [] #masses for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_masses = np.array([]) for jj in atom_indices: temp_masses = np.append(temp_masses, trj.top.atom(jj).element.mass) masses_i.append(temp_masses) masses[ii] = masses_i[ii].sum() if hasattr(trj.top.atom(1), 'charge'): #total charge for each cg bead. charges = np.zeros((n_beads), dtype=np.float64) #lists of charges for in current cg bead charges_i = [] #charges for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_charges = np.array([]) for jj in atom_indices: temp_charges = np.append(temp_charges, trj.top.atom(jj).charge) charges_i.append(temp_charges) charges[ii] = charges_i[ii].sum() forcenorm_i = [] if mapping_function == 'cof' or mapping_function == 'center_of_force': for ii in range(n_beads): atom_indices = atom_indices_list[ii] forcenorm_i.append(get_forcenorms(trj, atom_indices)) if mapping_function == 'coc' or mapping_function == 'center_of_charge': for charge in charges: if np.absolute(charge) < charge_tol: raise ValueError("Total charge on site %i is near zero" % ii) topology_labels = [] element_label_dict = {} if (split_shared_atoms): mod_weights_list = gen_unique_overlap_mod_weights(atom_indices_list) has_forces = False try: trj.__dict__['forces'] test_forces = map_forces(trj, (0, )) has_forces = True except TypeError: print("WARNING: Invalid Forces\nNo Map applied to forces") except KeyError: pass except: print("Unknown error, check your forces\nexiting...") raise for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] xyz_i = xyz[:, i, :] if mapping_function == 'coc' or mapping_function == 'center_of_charge': weights = charges_i[i] elif mapping_function == 'com' or mapping_function == 'center_of_mass': weights = masses_i[i] elif mapping_function == 'cof' or mapping_function == 'center_of_force': weights = forcenorm_i[i] elif mapping_function == 'center': weights = np.ones(len(atom_indices)) if (mod_weights_list is not None): weights[:] = np.multiply(weights, mod_weights_list[i]) compute_center_weighted(xyz_i, trj.xyz, atom_indices, weights, unitcell_lengths=trj.unitcell_lengths, center_postwrap=center_postwrap) if has_forces: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def map_molecules(trj, selection_list, bead_label_list, transfer_labels=False, molecule_types=None, molecule_type_order=False, return_call=False, *args, **kwargs): """ This performs the mapping where each molecule has been assigned a type. Parameters ---------- traj : Trajectory Trajectory to sum forces on selection_list : Indexible collection of strings bead_label_list : Indexible collection transfer_labels : Whether to transfer over labels in @trj. Moves over resSeq, resName for every bead, assuming that the atoms in each bead are uniform in those qualities. molecule_types : Indexible collection of integers molecule_type_order : boolean Specifying molecule_type_order means that the map will be reordered so that all molecules of type 0 come first, then 1, etc. return_call: boolean Whether to return the arguments that cg_by_index would be called with instead of actually calling it. Useful for modifying the call. Returns ------- traj: trajectory trajectory formed by applying given molecular map. -OR- tuple: list of arguments which would be passed to cg_by_index """ ### First, deal with optional arguments and argument validation. if molecule_type_order is True: raise ValueError("molecule_type_order not currently supported.") #if the array of molecule types isn't given, assume 1 molecule type. if molecule_types is None: molecule_types = [0] * trj.top.n_residues n_molecule_types = len(selection_list) if sorted(set(molecule_types)) != list(range(n_molecule_types)): raise ValueError("Error in map molecules, molecule types list must " "contain only and all numbers from 0 to " "n_molecule_types-1.") # if len(molecule_types) != trj.top.n_residues: # raise ValueError("Error in map molecules, molecule types list must " # "have the same length as number of residues.") if len(selection_list) != len(bead_label_list): raise ValueError("Error in map molecules, must submit selection list " "and bead label list of same length.") for i in range(n_molecule_types): if len(selection_list[i]) != len(bead_label_list[i]): raise ValueError("Error in map molecules, selection list %i and " "bead label list %i must be of same length." % (i, i)) ### generate the indices local to each molecule for mapping # get the first molecule index for each molecule type first_molecules = [ molecule_types.index(i) for i in range(n_molecule_types) ] internal_indices_list = [[] for i in range(n_molecule_types)] iterable = zip(selection_list, first_molecules, internal_indices_list) for selection, first_mol, mol_indices in iterable: first_index = trj.top.select("(resid == %i)" % (first_mol)).min() for sel in selection: has_index = sel.find("index") > -1 has_name = sel.find("name") > -1 internal_indices = [] if has_index and has_name: raise ValueError("Error in map molecules, do not specify " "selection by index and by type.") elif has_index: # use atom selection language to parse selection #string containing only indices on whole system, then offset later internal_indices = trj.top.select("%s" % (sel)) elif has_name: # have to un-shift list because this will be added to current id later filter_string = "(resid == %i) and (%s)" % (first_mol, sel) internal_indices = trj.top.select(filter_string) - first_index if len(internal_indices) == 0: raise ValueError( "Error in map_molecules, selection string '%s'" "produced an empty list of atom indices" % sel) mol_indices.append(internal_indices) # get list of type [ (0,r0), (1,r1) etc ] if molecule_type_order is True: residue_list = sorted( enumerate(trj.top.residues),\ key=lambda x: molecule_types[x[0]]) else: residue_list = enumerate(trj.top.residues) index_list = [] resSeq_list = [] label_list = [] start_index = 0 resSeq = 1 for ridx, r in residue_list: molecule_type = molecule_types[ridx] for bead_idx, internal_indices in enumerate( internal_indices_list[molecule_type]): system_indices = internal_indices + start_index index_list.append(system_indices) resSeq_list.append(resSeq) label_list.append(bead_label_list[molecule_type][bead_idx]) resSeq = resSeq + 1 start_index = start_index + r.n_atoms if (return_call is True): arg_list = [trj, index_list, label_list] arg_list.extend(args) arg_list.append(kwargs) return (arg_list) #exit early. cg_trj = cg_by_index(trj, index_list, label_list, *args, **kwargs) #do a more sophisticated labeling. if (transfer_labels is True): df_aa_top = trj.top.to_dataframe()[0] df_cg_top = cg_trj.top.to_dataframe()[0] #get resSeq info. aa_resSeq = df_aa_top.loc[:, 'resSeq'] #find atom indices for first atoms of each residue. res_starting_indices = \ np.sort(np.unique(aa_resSeq,return_index=True)[1]) #get resids and resnames for startings atoms. aa_starting_resids = df_aa_top.loc[res_starting_indices, 'resSeq'] aa_starting_resnames = df_aa_top.loc[res_starting_indices, 'resName'] #needed for duplicating atomistic info across cg molecules n_sites_per_cg = [len(desc) for desc in bead_label_list] #generate and place resids cg_resids = typed_elementwise_rep(aa_starting_resids, molecule_types, n_sites_per_cg) df_cg_top.loc[:, "resSeq"] = cg_resids #generate and place resNames cg_resnames = typed_elementwise_rep(aa_starting_resnames, molecule_types, n_sites_per_cg) df_cg_top.loc[:, "resName"] = cg_resnames #convert and put back. cg_trj.top = Topology.from_dataframe(df_cg_top) return (cg_trj)
def render_traj(topology, positions): traj = Trajectory(positions / unit.nanometers, Topology.from_openmm(topology)) return (show_mdtraj(traj).add_ball_and_stick('all').center_view(zoom=True))
def run_protocol(equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState, alchemical_functions: dict, nstep_neq: int, topology: md.Topology, work_save_interval: int, splitting: str="V R O H R V", atom_indices_to_save: List[int] = None, trajectory_filename: str = None, write_configuration: bool = False, timestep: unit.Quantity=1.0*unit.femtoseconds, measure_shadow_work: bool=False) -> NonequilibriumResult: """ Perform a nonequilibrium switching protocol and return the nonequilibrium protocol work. Note that it is expected that this will perform an entire protocol, that is, switching lambda completely from 0 to 1, in increments specified by the ne_mc_move. The trajectory that results, along with the work values, will contain n_iterations elements. Parameters ---------- equilibrium_result : EquilibriumResult namedtuple The result of an equilibrium simulation thermodynamic_state : openmmtools.states.ThermodynamicState The thermodynamic state at which to run the protocol alchemical_functions : dict The alchemical functions to use for switching nstep_neq : int The number of nonequilibrium steps in the protocol topology : mdtraj.Topology An MDtraj topology for the system to generate trajectories work_save_interval : int How often to write the work and, if requested, configurations splitting : str, default "V R O H R V" The splitting string to use for the Langevin integration atom_indices_to_save : list of int, default None list of indices to save (when excluding waters, for instance). If None, all indices are saved. trajectory_filename : str, default None Full filepath of output trajectory, if desired. If None, no trajectory file is written. write_configuration : bool, default False Whether to also write configurations of the trajectory at the requested interval. timestep : unit.Quantity, default 1 fs The timestep to use in the integrator Returns ------- nonequilibrium_result : NonequilibriumResult result object containing the trajectory of the nonequilibrium calculation, as well as the cumulative work for each frame. """ #get the sampler state needed for the simulation sampler_state = equilibrium_result.sampler_state temperature = thermodynamic_state.temperature #get the atom indices we need to subset the topology and positions if atom_indices_to_save is None: atom_indices = list(range(topology.n_atoms)) subset_topology = topology else: subset_topology = topology.subset(atom_indices_to_save) atom_indices = atom_indices_to_save ne_mc_move = NonequilibriumSwitchingMove(alchemical_functions, splitting, temperature, nstep_neq, timestep, work_save_interval, subset_topology, atom_indices, save_configuration=write_configuration, measure_shadow_work=measure_shadow_work) ne_mc_move.reset() #apply the nonequilibrium move ne_mc_move.apply(thermodynamic_state, sampler_state) #get the cumulative work cumulative_work = ne_mc_move.cumulative_work #get the protocol work protocol_work = ne_mc_move.protocol_work #if we're measuring shadow work, get that. Otherwise just fill in zeros: if measure_shadow_work: shadow_work = ne_mc_move.shadow_work else: shadow_work = np.zeros_like(protocol_work) #create a result object and return that nonequilibrium_result = NonequilibriumResult(cumulative_work, protocol_work, shadow_work) #if desired, write nonequilibrium trajectories: if trajectory_filename is not None: #to get the filename for cumulative work, replace the extension of the trajectory file with .cw.npy filepath_parts = trajectory_filename.split(".") cw_filepath_parts = copy.deepcopy(filepath_parts) pw_filepath_parts = copy.deepcopy(filepath_parts) if measure_shadow_work: sw_filepath_parts = copy.deepcopy(filepath_parts) sw_filepath_parts[-1] = "sw.npy" shad_work_filepath = ".".join(sw_filepath_parts) cw_filepath_parts[-1] = "cw.npy" pw_filepath_parts[-1] = "pw.npy" cum_work_filepath = ".".join(cw_filepath_parts) prot_work_filepath = ".".join(pw_filepath_parts) #if writing configurations was requested, get the trajectory if write_configuration: try: trajectory = ne_mc_move.trajectory write_nonequilibrium_trajectory(nonequilibrium_result, trajectory, trajectory_filename) except NoTrajectoryException: pass np.save(cum_work_filepath, nonequilibrium_result.cumulative_work) np.save(prot_work_filepath, nonequilibrium_result.protocol_work) if measure_shadow_work: np.save(shad_work_filepath, shadow_work) return nonequilibrium_result
def run_equilibrium(equilibrium_result: EquilibriumResult, thermodynamic_state: states.ThermodynamicState, nsteps_equil: int, topology: md.Topology, n_iterations : int, atom_indices_to_save: List[int] = None, trajectory_filename: str = None, splitting: str="V R O R V", timestep: unit.Quantity=1.0*unit.femtoseconds) -> EquilibriumResult: """ Run nsteps of equilibrium sampling at the specified thermodynamic state and return the final sampler state as well as a trajectory of the positions after each application of an MCMove. This means that if the MCMove is configured to run 1000 steps of dynamics, and n_iterations is 100, there will be 100 frames in the resulting trajectory; these are the result of 100,000 steps (1000*100) of dynamics. Parameters ---------- equilibrium_result : EquilibriumResult EquilibriumResult namedtuple containing the information necessary to resume thermodynamic_state : openmmtools.states.ThermodynamicState The thermodynamic state (including context parameters) that should be used nsteps_equil : int The number of equilibrium steps that a move should make when apply is called topology : mdtraj.Topology an MDTraj topology object used to construct the trajectory n_iterations : int The number of times to apply the move. Note that this is not the number of steps of dynamics; it is n_iterations*n_steps (which is set in the MCMove). splitting: str, default "V R O H R V" The splitting string for the dynamics atom_indices_to_save : list of int, default None list of indices to save (when excluding waters, for instance). If None, all indices are saved. trajectory_filename : str, optional, default None Full filepath of trajectory files. If none, trajectory files are not written. splitting: str, default "V R O H R V" The splitting string for the dynamics Returns ------- equilibrium_result : EquilibriumResult Container namedtuple that has the SamplerState for resuming, an MDTraj trajectory, and the reduced potential of the final frame. """ sampler_state = equilibrium_result.sampler_state #get the atom indices we need to subset the topology and positions if atom_indices_to_save is None: atom_indices = list(range(topology.n_atoms)) subset_topology = topology else: subset_topology = topology.subset(atom_indices_to_save) atom_indices = atom_indices_to_save n_atoms = subset_topology.n_atoms #construct the MCMove: mc_move = mcmc.LangevinSplittingDynamicsMove(n_steps=nsteps_equil, splitting=splitting) mc_move.n_restart_attempts = 10 #create a numpy array for the trajectory trajectory_positions = np.zeros([n_iterations, n_atoms, 3]) trajectory_box_lengths = np.zeros([n_iterations, 3]) trajectory_box_angles = np.zeros([n_iterations, 3]) #loop through iterations and apply MCMove, then collect positions into numpy array for iteration in range(n_iterations): mc_move.apply(thermodynamic_state, sampler_state) trajectory_positions[iteration, :] = sampler_state.positions[atom_indices, :].value_in_unit_system(unit.md_unit_system) #get the box lengths and angles a, b, c, alpha, beta, gamma = mdtrajutils.unitcell.box_vectors_to_lengths_and_angles(*sampler_state.box_vectors) trajectory_box_lengths[iteration, :] = [a, b, c] trajectory_box_angles[iteration, :] = [alpha, beta, gamma] #construct trajectory object: trajectory = md.Trajectory(trajectory_positions, subset_topology, unitcell_lengths=trajectory_box_lengths, unitcell_angles=trajectory_box_angles) #get the reduced potential from the final frame for endpoint perturbations reduced_potential_final_frame = thermodynamic_state.reduced_potential(sampler_state) #construct equilibrium result object equilibrium_result = EquilibriumResult(sampler_state, reduced_potential_final_frame) #If there is a trajectory filename passed, write out the results here: if trajectory_filename is not None: write_equilibrium_trajectory(equilibrium_result, trajectory, trajectory_filename) return equilibrium_result