def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] element = mdtraj.pdb.element.get_by_symbol(element_symbol) topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format assert np.all(chainid == '' for chainid in ChainID), 'Im not prepaed to parse multiple chains' chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, str): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, str): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] element = mdtraj.pdb.element.get_by_symbol(element_symbol) topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def add_mol_to_topology(self, coords: np.ndarray, types: np.ndarray, topology: mdtraj.Topology): assert coords.shape[0] == types.shape[0] assert coords.ndim == 2 chain = topology.add_chain() # Convert types to symbols if types.ndim == 2: seqs = np.argmax(types.copy(), axis=1) atms = [self.sequential_to_atomic_number()[t] for t in seqs] syms = [atomic_number_to_symbol()[t] for t in atms] elif types.ndim == 1: syms = [atomic_number_to_symbol()[t] for t in types] else: raise ValueError( "Types must either be one hot vectors with ndim==2 XOR numbers with ndim==1." ) for i, s in enumerate(syms): res = topology.add_residue("mol_{}".format(i), chain) topology.add_atom(s, mdtraj.element.get_by_symbol(s), res)
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] try: element = elem.get_by_symbol(element_symbol) except KeyError: element = None topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def _traj_from_xyza(xyz, atomic_numbers, units='nm'): """ Parameters ---------- xyz : np.array, float, shape( num_atom, 3) array of x,y,z,a atomic_numbers : np.array, int, shape( num_atom, 1 ) the atomic numbers of each of the atoms. Optional Parameters ------------------- units : str if units == 'nm' then nothing happens. if units == 'ang' then we convert them to nm. Returns ------- structure : mdtraj.trajectory A meta-data minimal mdtraj instance """ if units == 'ang': xyz /= 10. top = Topology() chain = top.add_chain() residue = top.add_residue('XXX', chain) for i in range(xyz.shape[0]): element_symb = periodic_table[atomic_numbers[i]][1] # should give symbol element = Element.getBySymbol(element_symb) name = '%s' % element_symb top.add_atom(name, element, residue) structure = Trajectory(xyz=xyz, topology=top) return structure
class Mapper: """ An object to convert an atomistic system to a CG system Attributes ---------- mappings : dict A dictionary containing the {name : mapping} for each residue solvent_mapping : int, default=4 Number of solvent molecules to map to a single bead via k-means clustering solvent_name : string, default='tip3p' Name of solvent residue in atomistic system aa_traj : mdtraj.Trajectory The atomistic trajectory to convert to CG aa_top: mdtraj.Topology The atomistic topology to convert to CG cg_traj : mdtraj.Trajectory The CG trajectory to converted from atomistic cg_top: mdtraj.Topology The CG topology to convert from atomistic """ def __init__(self, solvent_name='tip3p', solvent_mapping=4): self._mappings = dict() self._solvent_mapping = solvent_mapping self._solvent_name = solvent_name self._cg_traj = None self._cg_top = None @property def mappings(self): return deepcopy(self._mappings) @mappings.setter def mappings(self): raise TypeError("'mappings' attribute does not support assignment") def load_trajectory(self, trajectory): self._aa_traj = trajectory self._aa_top = trajectory.top def load_mapping_dir(self, mapping_dir=None, **kwargs): """ Load all mapping files from a directory. Arguments: ---------- mapping_dir : string, default=None Path to the directory containing mapping files. Loads from the internal `mappings` directory by default **kwargs : keyword arguments Keyword arguments to pass to mapping_dir. Namely the ff arg. """ if mapping_dir is None: mapping_dir = default_mapping_dir(**kwargs) assert path.exists(mapping_dir) for filename in glob.glob("{}/*map".format(mapping_dir)): self.load_mapping(filename) def load_mapping(self, filename_or_mapping): """ Load a single mapping file from disk. Arguments: ---------- filename : string or ResMapping Path to the mapping file or ResMapping object to add to library """ if isinstance(filename_or_mapping, ResMapping): self._mappings.update({ filename_or_mapping.name : filename_or_mapping}) else: assert path.exists(filename_or_mapping) name = path.basename(filename_or_mapping).split(".")[0] mapping = ResMapping.load(name, filename_or_mapping) self._mappings.update({name : mapping}) def cg_map(self): """ Execute full CG mapping pipeline and return the CG trajectory """ if self._cg_traj is None: self._map_topology() self._convert_xyz() self._construct_traj() return self._cg_traj def _map_topology(self): """ Create CG topology from given topology and mapping """ # Ensure that a trajectory has been loaded if self._aa_traj is None: raise OutOfOrderError("An atomistic trajectory has not " "been loaded into this Mapper yet.") self._atom_bead_mapping = dict() self._cg_top = Topology() self._solvent_counter = 0 # Loop over all residues for residue in self._aa_top.residues: if residue.name == self._solvent_name: self._map_solvent_top(residue) else: self._map_nonsolvent_top(residue) def _map_solvent_top(self, residue): """ Create CG solvent residue from given residue and add it to the CG topology. Arguments: ---------- residue: mdtraj.topology.Residue The atomistic residue to be mapped to CG """ self._solvent_counter += 1 if self._solvent_counter % self._solvent_mapping == 0: cg_residue = self._cg_top.add_residue(self._solvent_name, self._cg_top.add_chain()) cg_bead = CGBead(bead_type=self._solvent_name) mdtraj_bead = self._cg_top.add_atom(self._solvent_name, None, cg_residue) self._atom_bead_mapping[mdtraj_bead] = cg_bead return cg_residue def _map_nonsolvent_top(self, residue): """ Create CG non-solvent residue from given residue and add it to the CG topology. Arguments: ---------- residue: mdtraj.topology.Residue The atomistic residue to be mapped to CG """ # Obtain the correct molecule mapping based on the residue res_mapping = self._mappings[residue.name] # Add an empty residue to the CG topology cg_residue = self._cg_top.add_residue( residue.name, self._cg_top.add_chain()) # Make a list of atoms in the residue atoms = np.array([atom.index for atom in residue.atoms]) # Make an empty list to store beads cg_beads = [] # Create CG beads for each bead in the mapping for bead in res_mapping.beads: bead_atoms = atoms.take(bead.mapping_indices) cg_bead = CGBead(bead_type=bead.name, atom_indices=bead_atoms) mdtraj_bead = self._cg_top.add_atom(cg_bead.bead_type, None, cg_residue) cg_beads.append(mdtraj_bead) self._atom_bead_mapping[mdtraj_bead] = cg_bead # Add bonds to topology for index_i, index_j in res_mapping.bonds: self._cg_top.add_bond(cg_beads[int(index_i)], cg_beads[int(index_j)]) return cg_residue def _convert_xyz(self): """ Take atomistic trajectory and convert to CG trajectory """ cg_xyz = [] for bead in self._cg_top.atoms: if bead.name == self._solvent_name: bead_xyz = np.zeros((self._aa_traj.n_frames,3)) else: atom_indices = self._atom_bead_mapping[bead].atom_indices masses = np.array([self._aa_top.atom(i).element.mass for i in atom_indices]) bead_xyz = (np.sum((self._aa_traj.xyz[:,atom_indices,:] * masses[None,:,None]), axis=1) / np.sum(masses)) cg_xyz.append(bead_xyz) cg_xyz = np.array(cg_xyz) cg_xyz = np.swapaxes(cg_xyz, 0, 1) # Figure out at which coarse grain index the waters start # Perform kmeans, frame-by-frame, over all water residues # Workers will return centers of masses of clusters, frame index, and cg index # Master will assign to CG_xyz if self._solvent_counter > 0: with Pool(cpu_count()) as pool: chunksize = int(self._aa_traj.n_frames / cpu_count()) + 1 args = list(zip(self._aa_traj, [self._solvent_mapping]*self._aa_traj.n_frames, [self._solvent_name]*self._aa_traj.n_frames)) coms = pool.starmap(_map_solvent, args, chunksize) pool.join() coms = np.squeeze(np.array(coms)) cg_xyz[:,self._cg_top.select(f"name {self._solvent_name}"),:] = coms self._cg_xyz = cg_xyz def _construct_traj(self): """ Create an mdtraj.Trajectory from the CG topology and xyz. """ cg_traj = Trajectory(self._cg_xyz, self._cg_top, time=self._aa_traj.time, unitcell_lengths=self._aa_traj.unitcell_lengths, unitcell_angles=self._aa_traj.unitcell_angles) self._cg_traj = cg_traj
def to_mdtraj_Topology(item, atom_indices='all', check=True): if check: digest_item(item, 'molsysmt.Topology') atom_indices = digest_atom_indices(atom_indices) try: from mdtraj import Topology from mdtraj.core import element except: raise LibraryNotFound('mdtraj') n_atoms = item.atoms_dataframe.shape[0] atom_index_array = item.atoms_dataframe["atom_index"].to_numpy() atom_name_array = item.atoms_dataframe["atom_name"].to_numpy() atom_id_array = item.atoms_dataframe["atom_id"].to_numpy() atom_type_array = item.atoms_dataframe["atom_type"].to_numpy() group_index_array = item.atoms_dataframe["group_index"].to_numpy() group_name_array = item.atoms_dataframe["group_name"].to_numpy() group_id_array = item.atoms_dataframe["group_id"].to_numpy() group_type_array = item.atoms_dataframe["group_type"].to_numpy() chain_index_array = item.atoms_dataframe["chain_index"].to_numpy() chain_name_array = item.atoms_dataframe["chain_name"].to_numpy() chain_id_array = item.atoms_dataframe["chain_id"].to_numpy() chain_type_array = item.atoms_dataframe["chain_type"].to_numpy() bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy() bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy() tmp_item = Topology() former_group_index = -1 former_chain_index = -1 list_new_atoms = [] for ii in range(n_atoms): atom_index = atom_index_array[ii] atom_name = atom_name_array[ii] atom_id = atom_id_array[ii] atom_type = atom_type_array[ii] group_index = group_index_array[ii] chain_index = chain_index_array[ii] new_group = (former_group_index != group_index) new_chain = (former_chain_index != chain_index) if new_chain: chain = tmp_item.add_chain() former_chain_index = chain_index if new_group: residue_name = group_name_array[ii] residue_id = group_id_array[ii] residue = tmp_item.add_residue(residue_name, chain, resSeq=str(residue_id)) former_group_index = group_index elem = element.get_by_symbol(atom_type) atom = tmp_item.add_atom(atom_name, elem, residue) list_new_atoms.append(atom) for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2): tmp_item.add_bond( list_new_atoms[atom_1], list_new_atoms[atom_2]) # falta bond type and bond order return tmp_item