def _guess_element(self, atom_name, residue): "Try to guess the element name" upper = atom_name.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif len(residue) == 1 and upper.startswith('CA'): element = elem.calcium # TJL has edited this. There are a few issues here. First, # parsing for the element is non-trivial, so I do my best # below. Second, there is additional parsing code in # pdbstructure.py, and I am unsure why it doesn't get used # here... elif len(residue) > 1 and upper.startswith('CE'): element = elem.carbon # (probably) not Celenium... elif len(residue) > 1 and upper.startswith('CD'): element = elem.carbon # (probably) not Cadmium... elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS' ] and upper.startswith('NE'): element = elem.nitrogen # (probably) not Neon... elif residue.name in ['ASN'] and upper.startswith('ND'): element = elem.nitrogen # (probably) not ND... elif residue.name == 'CYS' and upper.startswith('SG'): element = elem.sulfur # (probably) not SG... else: try: element = elem.get_by_symbol(atom_name[0]) except KeyError: try: symbol = atom_name[0:2].strip().rstrip( "AB0123456789").lstrip("0123456789") element = elem.get_by_symbol(symbol) except KeyError: element = None return element
def from_openmm(cls, value): """Create a mdtraj topology from an OpenMM topology Parameters ---------- value : simtk.openmm.app.Topology An OpenMM topology that you wish to convert to a mdtraj topology. """ app = import_('simtk.openmm.app') if not isinstance(value, app.Topology): raise TypeError('value must be an OpenMM Topology. ' 'You supplied a %s' % type(value)) out = cls() atom_mapping = {} for chain in value.chains(): c = out.add_chain() for residue in chain.residues(): r = out.add_residue(residue.name, c, residue.segment_id) for atom in residue.atoms(): if atom.element is None: element = elem.virtual else: element = elem.get_by_symbol(atom.element.symbol) a = out.add_atom(atom.name, element, r) atom_mapping[atom] = a for a1, a2 in value.bonds(): out.add_bond(atom_mapping[a1], atom_mapping[a2]) return out
def from_openmm(cls, value): """Create a mdtraj topology from an OpenMM topology Parameters ---------- value : simtk.openmm.app.Topology An OpenMM topology that you wish to convert to a mdtraj topology. """ app = import_('simtk.openmm.app') if not isinstance(value, app.Topology): raise TypeError('value must be an OpenMM Topology. ' 'You supplied a %s' % type(value)) out = cls() atom_mapping = {} for chain in value.chains(): c = out.add_chain() for residue in chain.residues(): r = out.add_residue(residue.name, c) for atom in residue.atoms(): if atom.element is None: element = elem.virtual else: element = elem.get_by_symbol(atom.element.symbol) a = out.add_atom(atom.name, element, r) atom_mapping[atom] = a for a1, a2 in value.bonds(): out.add_bond(atom_mapping[a1], atom_mapping[a2]) return out
def json_to_mdtraj_topology(json_string): """ Copied in part from MDTraj.formats.hdf5 topology property.""" topology_dict = json.loads(json_string) topology = mdj.Topology() for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')): chain = topology.add_chain() for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')): try: resSeq = residue_dict["resSeq"] except KeyError: resSeq = None warn('No resSeq information found in HDF file, defaulting to zero-based indices') try: segment_id = residue_dict["segmentID"] except KeyError: segment_id = "" residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id) for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')): try: element = elem.get_by_symbol(atom_dict['element']) except KeyError: element = elem.virtual topology.add_atom(atom_dict['name'], element, residue) atoms = list(topology.atoms) for index1, index2 in topology_dict['bonds']: topology.add_bond(atoms[index1], atoms[index2]) return topology
def _guess_element(self, atom_name, residue): "Try to guess the element name" upper = atom_name.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif len(residue) == 1 and upper.startswith('CA'): element = elem.calcium # TJL has edited this. There are a few issues here. First, # parsing for the element is non-trivial, so I do my best # below. Second, there is additional parsing code in # pdbstructure.py, and I am unsure why it doesn't get used # here... elif len(residue) > 1 and upper.startswith('CE'): element = elem.carbon # (probably) not Celenium... elif len(residue) > 1 and upper.startswith('CD'): element = elem.carbon # (probably) not Cadmium... elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'] and upper.startswith('NE'): element = elem.nitrogen # (probably) not Neon... elif residue.name in ['ASN'] and upper.startswith('ND'): element = elem.nitrogen # (probably) not ND... elif residue.name == 'CYS' and upper.startswith('SG'): element = elem.sulfur # (probably) not SG... else: try: element = elem.get_by_symbol(atom_name[0]) except KeyError: try: symbol = atom_name[0:2].strip().rstrip("AB0123456789").lstrip("0123456789") element = elem.get_by_symbol(symbol) except KeyError: element = None return element
def _read_topology(self): if not self._open: raise ValueError('I/O operation on closed file') if not self._mode == 'r': raise ValueError('file not opened for reading') pdb.PDBTrajectoryFile._loadNameReplacementTables() n_atoms = None topology = md.Topology() chain = topology.add_chain() residue = None atomReplacements = {} for ln, line in enumerate(self._file): if ln == 1: n_atoms = int(line.strip()) elif ln > 1 and ln < n_atoms + 2: (thisresnum, thisresname, thisatomname, thisatomnum) = \ [line[i*5:i*5+5].strip() for i in range(4)] thisresnum, thisatomnum = map(int, (thisresnum, thisatomnum)) if residue is None or residue.resSeq != thisresnum or residue.name != thisresname: if residue is not None and residue.name != thisresname: warnings.warn( "WARNING: two consecutive residues with same number (%s, %s)" % (thisresname, residue.name)) if thisresname in pdb.PDBTrajectoryFile._residueNameReplacements: thisresname = pdb.PDBTrajectoryFile._residueNameReplacements[ thisresname] residue = topology.add_residue(thisresname, chain, resSeq=thisresnum) if thisresname in pdb.PDBTrajectoryFile._atomNameReplacements: atomReplacements = pdb.PDBTrajectoryFile._atomNameReplacements[ thisresname] else: atomReplacements = {} thiselem = thisatomname if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]', '', thiselem[1:]) try: element = elem.get_by_symbol(thiselem) except KeyError: element = elem.virtual if thisatomname in atomReplacements: thisatomname = atomReplacements[thisatomname] topology.add_atom(thisatomname, element=element, residue=residue, serial=thisatomnum) topology.create_standard_bonds() return n_atoms, topology
def topology(self): """Get the topology out from the file Returns ------- topology : mdtraj.Topology A topology object """ try: raw = self._get_node(self._handle.root, name='topology')[0] if not isinstance(raw, string_types): raw = raw.decode() topology_dict = json.loads(raw) except self.tables.NoSuchNodeError: return None topology = Topology() for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')): chain = topology.add_chain() for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')): try: resSeq = residue_dict["resSeq"] except KeyError: resSeq = None warnings.warn( 'No resSeq information found in HDF file, defaulting to zero-based indices' ) try: segment_id = residue_dict["segmentID"] except KeyError: segment_id = "" residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id) for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')): try: element = elem.get_by_symbol(atom_dict['element']) except KeyError: element = elem.virtual topology.add_atom(atom_dict['name'], element, residue) atoms = list(topology.atoms) for index1, index2 in topology_dict['bonds']: topology.add_bond(atoms[index1], atoms[index2]) return topology
def _create_com_traj(xyz, time, unitcell_lengths, unitcell_angles, masses=None): top = md.Topology() chain = top.add_chain() for i, com in enumerate(xyz[0]): com_res = top.add_residue('COM{}'.format(i), chain) top.add_atom('COM', get_by_symbol('C'), residue=com_res) return md.Trajectory(xyz, topology=top, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def butane_toy_model(): # Starting configuration is close to a cis conformation phi0 = np.pi phi_ini = 2*np.pi*0.8 xyz = np.zeros((1, 4, 3), float) xyz[:,0,:] = np.array([1, -1, 0]) xyz[:,1,:] = np.array([0, -1, 0]) xyz[:,2,:] = np.array([0, 0, 0]) xyz[:,3,0] = np.cos(phi_ini) xyz[:,3,2] = np.sin(phi_ini) # Coordinates must be positive in gromacs shift = np.array([5,5,5]) xyz += shift # Create a mdtraj Topology for our butane molecule. newtop = md.Topology() chain = newtop.add_chain() for i in range(4): res = newtop.add_residue("GLY", chain, i) new_ca = newtop.add_atom('CA', get_by_symbol('C'), res, serial=i) #if i >= 1: # prev_ca = chain.atom(i - 1) # newtop.add_bond(prev_ca, new_ca) model = mdb.models.Model(newtop, bead_repr="CA") model.mapping.add_atoms(mass=10) top = model.mapping.top # Add bond interactions model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(0), top.atom(1), 100, 1) model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(1), top.atom(2), 100, 1) model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(2), top.atom(3), 100, 1) # Add angle interactions model.Hamiltonian._add_angle("HARMONIC_ANGLE", top.atom(0), top.atom(1), top.atom(2), 20, np.pi/2) model.Hamiltonian._add_angle("HARMONIC_ANGLE", top.atom(1), top.atom(2), top.atom(3), 20, np.pi/2) # Add dihedral interaction model.Hamiltonian._add_dihedral("COSINE_DIHEDRAL", top.atom(0), top.atom(1), top.atom(2), top.atom(3), 0.1, phi0, 1) return xyz, top, model
def _read_topology(self): if not self._open: raise ValueError('I/O operation on closed file') if not self._mode == 'r': raise ValueError('file not opened for reading') pdb.PDBTrajectoryFile._loadNameReplacementTables() n_atoms = None topology = md.Topology() chain = topology.add_chain() residue = None atomReplacements = {} for ln, line in enumerate(self._file): if ln == 1: n_atoms = int(line.strip()) elif ln > 1 and ln < n_atoms + 2: (thisresnum, thisresname, thisatomname, thisatomnum) = \ [line[i*5:i*5+5].strip() for i in range(4)] thisresnum, thisatomnum = map(int, (thisresnum, thisatomnum)) if residue is None or residue.resSeq != thisresnum: if thisresname in pdb.PDBTrajectoryFile._residueNameReplacements: thisresname = pdb.PDBTrajectoryFile._residueNameReplacements[thisresname] residue = topology.add_residue(thisresname, chain, resSeq=thisresnum) if thisresname in pdb.PDBTrajectoryFile._atomNameReplacements: atomReplacements = pdb.PDBTrajectoryFile._atomNameReplacements[thisresname] else: atomReplacements = {} thiselem = thisatomname element = None if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:]) try: element = elem.get_by_symbol(thiselem) except KeyError: pass if thisatomname in atomReplacements: thisatomname = atomReplacements[thisatomname] topology.add_atom(thisatomname, element=element, residue=residue, serial=thisatomnum) return n_atoms, topology
def topology(self): """Get the topology out from the file Returns ------- topology : mdtraj.Topology A topology object """ try: raw = self._get_node('/', name='topology')[0] if not isinstance(raw, string_types): raw = raw.decode() topology_dict = json.loads(raw) except self.tables.NoSuchNodeError: return None topology = Topology() for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')): chain = topology.add_chain() for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')): try: resSeq = residue_dict["resSeq"] except KeyError: resSeq = None warnings.warn('No resSeq information found in HDF file, defaulting to zero-based indices') try: segment_id = residue_dict["segmentID"] except KeyError: segment_id = "" residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id) for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')): try: element = elem.get_by_symbol(atom_dict['element']) except KeyError: element = elem.virtual topology.add_atom(atom_dict['name'], element, residue) atoms = list(topology.atoms) for index1, index2 in topology_dict['bonds']: topology.add_bond(atoms[index1], atoms[index2]) return topology
def topology(self): """Get the topology out from the file Returns ------- topology : mdtraj.Topology A topology object """ try: raw = self._get_node("/", name="topology")[0] if not isinstance(raw, string_types): raw = raw.decode() topology_dict = json.loads(raw) except self.tables.NoSuchNodeError: return None topology = Topology() for chain_dict in sorted(topology_dict["chains"], key=operator.itemgetter("index")): chain = topology.add_chain() for residue_dict in sorted(chain_dict["residues"], key=operator.itemgetter("index")): try: resSeq = residue_dict["resSeq"] except KeyError: resSeq = None warnings.warn("No resSeq information found in HDF file, defaulting to zero-based indices") residue = topology.add_residue(residue_dict["name"], chain, resSeq=resSeq) for atom_dict in sorted(residue_dict["atoms"], key=operator.itemgetter("index")): try: element = elem.get_by_symbol(atom_dict["element"]) except KeyError: element = None topology.add_atom(atom_dict["name"], element, residue) atoms = list(topology.atoms) for index1, index2 in topology_dict["bonds"]: topology.add_bond(atoms[index1], atoms[index2]) return topology
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] try: element = elem.get_by_symbol(element_symbol) except KeyError: element = elem.virtual topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames): """Build topology object from the arrays stored in the lh5 file""" # Delayed import due to wacky recursive imports in compatibilty from mdtraj import Topology topology = Topology() # assert that the ChainID is just an array of empty strings, which appears # to be the case in our test systems for this legacy format if not np.all(chainid == '' for chainid in ChainID): raise NotImplementedError('Im not prepared to parse multiple chains') chain0 = topology.add_chain() # register the residues registered_residues = {} for i in np.argsort(ResidueID): residue_name = ResidueNames[i] if not isinstance(residue_name, basestring): residue_name = residue_name.decode() if ResidueID[i] not in registered_residues: res = topology.add_residue(residue_name, chain0) registered_residues[ResidueID[i]] = res # register the atoms for i in np.argsort(AtomID): atom_name = AtomNames[i] if not isinstance(atom_name, basestring): atom_name = atom_name.decode() element_symbol = atom_name.lstrip('0123456789')[0] try: element = elem.get_by_symbol(element_symbol) except KeyError: element = None topology.add_atom(atom_name, element, registered_residues[ResidueID[i]]) topology.create_standard_bonds() return topology
def __init__(self, pdb_line): """Create a new pdb.Atom from an ATOM or HETATM line. Example line: ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C 00000000011111111112222222222333333333344444444445555555555666666666677777777778 12345678901234567890123456789012345678901234567890123456789012345678901234567890 ATOM line format description from http://deposit.rcsb.org/adit/docs/pdb_atom_format.html: COLUMNS DATA TYPE CONTENTS -------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer Atom serial number. 13 - 16 Atom Atom name. 17 Character Alternate location indicator. 18 - 20 Residue name Residue name. 22 Character Chain identifier. 23 - 26 Integer Residue sequence number. 27 AChar Code for insertion of residues. 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) Occupancy (Default = 1.0). 61 - 66 Real(6.2) Temperature factor (Default = 0.0). 73 - 76 LString(4) Segment identifier, left-justified. 77 - 78 LString(2) Element symbol, right-justified. 79 - 80 LString(2) Charge on the atom. """ # We might modify first/final status during _finalize() methods self.is_first_atom_in_chain = False self.is_final_atom_in_chain = False self.is_first_residue_in_chain = False self.is_final_residue_in_chain = False # Start parsing fields from pdb line self.record_name = pdb_line[0:6].strip() self.serial_number = int(pdb_line[6:11]) self.name_with_spaces = pdb_line[12:16] alternate_location_indicator = pdb_line[16] self.residue_name_with_spaces = pdb_line[17:20] # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in # column 21 possible_fourth_character = pdb_line[20:21] if possible_fourth_character != " ": # Fourth character should only be there if official 3 are already full if len(self.residue_name_with_spaces.strip()) != 3: raise ValueError('Misaligned residue name: %s' % pdb_line) self.residue_name_with_spaces += possible_fourth_character self.residue_name = self.residue_name_with_spaces.strip() self.chain_id = pdb_line[21] self.residue_number = int(pdb_line[22:26]) self.insertion_code = pdb_line[26] # coordinates, occupancy, and temperature factor belong in Atom.Location object x = float(pdb_line[30:38]) y = float(pdb_line[38:46]) z = float(pdb_line[46:54]) try: occupancy = float(pdb_line[54:60]) except: occupancy = 1.0 try: temperature_factor = float(pdb_line[60:66]) except: temperature_factor = 0.0 self.locations = {} loc = Atom.Location(alternate_location_indicator, np.array([x, y, z]), occupancy, temperature_factor, self.residue_name_with_spaces) self.locations[alternate_location_indicator] = loc self.default_location_id = alternate_location_indicator # segment id, element_symbol, and formal_charge are not always present self.segment_id = pdb_line[72:76].strip() self.element_symbol = pdb_line[76:78].strip() try: self.formal_charge = int(pdb_line[78:80]) except ValueError: self.formal_charge = None # figure out atom element try: # First try to find a sensible element symbol from columns 76-77 self.element = element.get_by_symbol(self.element_symbol) except KeyError: # otherwise, deduce element from first two characters of atom name # remove digits found in some hydrogen atom names symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789") try: # Some molecular dynamics PDB files, such as gromacs with ffamber force # field, include 4-character hydrogen atom names beginning with "H". # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four # characters in the atom name. This problem is the fault of molecular # dynamics code authors who feel the need to make up their own atom # nomenclature because it is too tedious to read that provided by the PDB. # These are the same folks who invent their own meanings for biochemical terms # like "dipeptide". Clowntards. if len(self.name) == 4 and self.name[0:1] == "H": self.element = element.hydrogen else: self.element = element.get_by_symbol(symbol) except KeyError: # OK, I give up self.element = None
def _to_topology(self, atom_list, chain_types=None, residue_types=None): """Create a mdtraj.Topology from a Compound. Parameters ---------- atom_list : chain_types : residue_types : Returns ------- top : mtraj.Topology """ from mdtraj.core.element import get_by_symbol from mdtraj.core.topology import Topology if isinstance(chain_types, Compound): chain_types = [Compound] if isinstance(chain_types, (list, set)): chain_types = tuple(chain_types) if isinstance(residue_types, Compound): residue_types = [Compound] if isinstance(residue_types, (list, set)): residue_types = tuple(residue_types) top = Topology() atom_mapping = {} default_chain = top.add_chain() default_residue = top.add_residue('RES', default_chain) last_residue_compound = None last_chain_compound = None last_residue = None last_chain = None for atom in atom_list: # Chains for parent in atom.ancestors(): if chain_types and isinstance(parent, chain_types): if parent != last_chain_compound: last_chain_compound = parent last_chain = top.add_chain() last_chain_default_residue = top.add_residue( 'RES', last_chain) last_chain.compound = last_chain_compound break else: last_chain = default_chain last_chain.compound = last_chain_compound # Residues for parent in atom.ancestors(): if residue_types and isinstance(parent, residue_types): if parent != last_residue_compound: last_residue_compound = parent last_residue = top.add_residue( parent.__class__.__name__, last_chain) last_residue.compound = last_residue_compound break else: if last_chain != default_chain: last_residue = last_chain_default_residue else: last_residue = default_residue last_residue.compound = last_residue_compound # Add the actual atoms try: elem = get_by_symbol(atom.name) except KeyError: elem = get_by_symbol("VS") at = top.add_atom(atom.name, elem, last_residue) at.charge = atom.charge atom_mapping[atom] = at # Remove empty default residues. chains_to_remove = [ chain for chain in top.chains if chain.n_atoms == 0 ] residues_to_remove = [res for res in top.residues if res.n_atoms == 0] for chain in chains_to_remove: top._chains.remove(chain) for res in residues_to_remove: for chain in top.chains: try: chain._residues.remove(res) except ValueError: # Already gone. pass for atom1, atom2 in self.bonds(): # Ensure that both atoms are part of the compound. This becomes an # issue if you try to convert a sub-compound to a topology which is # bonded to a different subcompound. if all(a in atom_mapping.keys() for a in [atom1, atom2]): top.add_bond(atom_mapping[atom1], atom_mapping[atom2]) return top
def from_dataframe(cls, atoms, bonds=None): """Create a mdtraj topology from a pandas data frame Parameters ---------- atoms : pandas.DataFrame The atoms in the topology, represented as a data frame. This data frame should have columns "serial" (atom index), "name" (atom name), "element" (atom's element), "resSeq" (index of the residue) "resName" (name of the residue), "chainID" (index of the chain), and optionally "segmentID", following the same conventions as wwPDB 3.0 format. bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional The bonds in the topology, represented as an n_bonds x 2 array of the indices of the atoms involved in each bond. Specifiying bonds here is optional. To create standard protein bonds, you can use `create_standard_bonds` to "fill in" the bonds on your newly created Topology object See Also -------- create_standard_bonds """ pd = import_('pandas') if bonds is None: bonds = np.zeros((0, 2)) for col in ["name", "element", "resSeq", "resName", "chainID", "serial"]: if col not in atoms.columns: raise ValueError('dataframe must have column %s' % col) if "segmentID" not in atoms.columns: atoms["segmentID"] = "" out = cls() if not isinstance(atoms, pd.DataFrame): raise TypeError('atoms must be an instance of pandas.DataFrame. ' 'You supplied a %s' % type(atoms)) if not isinstance(bonds, np.ndarray): raise TypeError('bonds must be an instance of numpy.ndarray. ' 'You supplied a %s' % type(bonds)) if not np.all(np.arange(len(atoms)) == atoms.index): raise ValueError('atoms must be uniquely numbered ' 'starting from zero.') out._atoms = [None for i in range(len(atoms))] for ci in np.unique(atoms['chainID']): chain_atoms = atoms[atoms['chainID'] == ci] c = out.add_chain() for ri in np.unique(chain_atoms['resSeq']): residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri] rnames = residue_atoms['resName'] residue_name = np.array(rnames)[0] segids = residue_atoms['segmentID'] segment_id = np.array(segids)[0] if not np.all(rnames == residue_name): raise ValueError('All of the atoms with residue index %d ' 'do not share the same residue name' % ri) r = out.add_residue(residue_name, c, ri,segment_id) for atom_index, atom in residue_atoms.iterrows(): atom_index = int(atom_index) # Fixes bizarre hashing issue on Py3K. See #545 a = Atom(atom['name'], elem.get_by_symbol(atom['element']), atom_index, r, serial=atom['serial']) out._atoms[atom_index] = a r._atoms.append(a) for ai1, ai2 in bonds: out.add_bond(out.atom(ai1), out.atom(ai2)) out._numAtoms = out.n_atoms return out
def load_gro(filename): filename = os.path.abspath(filename) traj = md.load(filename) velxyz = np.zeros_like(traj.xyz) coord_indices = None top = md.Topology() chain = top.add_chain() residue = None with open(filename) as f: lines = iter(f) for model in count(): try: title = next(lines) except StopIteration: assert model == len(traj) break _, _, time = title.partition('t=') n_atoms = int(next(lines)) assert n_atoms == traj.n_atoms for i in range(n_atoms): line = next(lines) if model == 0: resnum = int(line[0:5]) resname = line[5:10].strip() atomname = line[10:15].strip() atomnum = int(line[15:20]) if residue is None or resnum != residue.resSeq: residue = top.add_residue(resname, chain, resSeq=resnum) if len(atomname) > 1: elem_symbol = atomname[0] + sub( '[A-Z0-9]', '', atomname[1:]) else: elem_symbol = atomname try: element = elem.get_by_symbol(elem_symbol) except KeyError: element = elem.virtual top.add_atom(atomname, element=element, residue=residue, serial=atomnum) if coord_indices is None: decs = (i for i, v in enumerate(line[20:], start=20) if v == '.') decidist = abs(next(decs) - next(decs)) coord_indices = list( pairwise(range(20, 20 + decidist * 7, decidist))) x, y, z, *vel = (float(line[a:b]) for a, b in coord_indices if b <= len(line)) assert np.isclose(traj.xyz[model, i, :], (x, y, z)).all() if vel: velxyz[model, i, :] = vel boxvecs = next(lines) traj.top = top if np.all(velxyz == 0.0): velxyz = None return traj, velxyz
def _to_topology(self, atom_list, chain_types=None, residue_types=None): """Create a mdtraj.Topology from a Compound. Parameters ---------- atom_list : chain_types : residue_types : Returns ------- top : mtraj.Topology """ if isinstance(chain_types, Compound): chain_types = [Compound] if isinstance(chain_types, (list, set)): chain_types = tuple(chain_types) if isinstance(residue_types, Compound): residue_types = [Compound] if isinstance(residue_types, (list, set)): residue_types = tuple(residue_types) top = Topology() atom_mapping = {} default_chain = top.add_chain() default_residue = top.add_residue('RES', default_chain) last_residue_compound = None last_chain_compound = None last_residue = None last_chain = None for atom in atom_list: # Chains for parent in atom.ancestors(): if chain_types and isinstance(parent, chain_types): if parent != last_chain_compound: last_chain_compound = parent last_chain = top.add_chain() last_chain_default_residue = top.add_residue('RES', last_chain) last_chain.compound = last_chain_compound break else: last_chain = default_chain last_chain.compound = last_chain_compound # Residues for parent in atom.ancestors(): if residue_types and isinstance(parent, residue_types): if parent != last_residue_compound: last_residue_compound = parent last_residue = top.add_residue(parent.__class__.__name__, last_chain) last_residue.compound = last_residue_compound break else: if last_chain != default_chain: last_residue = last_chain_default_residue else: last_residue = default_residue last_residue.compound = last_residue_compound # Add the actual atoms try: elem = get_by_symbol(atom.name) except KeyError: elem = get_by_symbol("VS") at = top.add_atom(atom.name, elem, last_residue) at.charge = atom.charge atom_mapping[atom] = at # Remove empty default residues. chains_to_remove = [chain for chain in top.chains if chain.n_atoms == 0] residues_to_remove = [res for res in top.residues if res.n_atoms == 0] for chain in chains_to_remove: top._chains.remove(chain) for res in residues_to_remove: for chain in top.chains: try: chain._residues.remove(res) except ValueError: # Already gone. pass for atom1, atom2 in self.bonds(): # Ensure that both atoms are part of the compound. This becomes an # issue if you try to convert a sub-compound to a topology which is # bonded to a different subcompound. if all(a in atom_mapping.keys() for a in [atom1, atom2]): top.add_bond(atom_mapping[atom1], atom_mapping[atom2]) return top
def from_dataframe(cls, atoms, bonds=None): """Create a mdtraj topology from a pandas data frame Parameters ---------- atoms : pandas.DataFrame The atoms in the topology, represented as a data frame. This data frame should have columns "serial" (atom index), "name" (atom name), "element" (atom's element), "resSeq" (index of the residue) "resName" (name of the residue), "chainID" (index of the chain), and optionally "segmentID", following the same conventions as wwPDB 3.0 format. bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional The bonds in the topology, represented as an n_bonds x 2 array of the indices of the atoms involved in each bond. Specifiying bonds here is optional. To create standard protein bonds, you can use `create_standard_bonds` to "fill in" the bonds on your newly created Topology object See Also -------- create_standard_bonds """ pd = import_('pandas') if bonds is None: bonds = np.zeros((0, 2)) for col in [ "name", "element", "resSeq", "resName", "chainID", "serial" ]: if col not in atoms.columns: raise ValueError('dataframe must have column %s' % col) if "segmentID" not in atoms.columns: atoms["segmentID"] = "" out = cls() if not isinstance(atoms, pd.DataFrame): raise TypeError('atoms must be an instance of pandas.DataFrame. ' 'You supplied a %s' % type(atoms)) if not isinstance(bonds, np.ndarray): raise TypeError('bonds must be an instance of numpy.ndarray. ' 'You supplied a %s' % type(bonds)) if not np.all(np.arange(len(atoms)) == atoms.index): raise ValueError('atoms must be uniquely numbered ' 'starting from zero.') out._atoms = [None for i in range(len(atoms))] for ci in np.unique(atoms['chainID']): chain_atoms = atoms[atoms['chainID'] == ci] c = out.add_chain() for ri in np.unique(chain_atoms['resSeq']): residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri] rnames = residue_atoms['resName'] residue_name = np.array(rnames)[0] segids = residue_atoms['segmentID'] segment_id = np.array(segids)[0] if not np.all(rnames == residue_name): raise ValueError('All of the atoms with residue index %d ' 'do not share the same residue name' % ri) r = out.add_residue(residue_name, c, ri, segment_id) for atom_index, atom in residue_atoms.iterrows(): atom_index = int( atom_index ) # Fixes bizarre hashing issue on Py3K. See #545 a = Atom(atom['name'], elem.get_by_symbol(atom['element']), atom_index, r, serial=atom['serial']) out._atoms[atom_index] = a r._atoms.append(a) for ai1, ai2 in bonds: out.add_bond(out.atom(ai1), out.atom(ai2)) out._numAtoms = out.n_atoms return out
def to_mdtraj_Topology(item, atom_indices='all', check=True): if check: digest_item(item, 'molsysmt.Topology') atom_indices = digest_atom_indices(atom_indices) try: from mdtraj import Topology from mdtraj.core import element except: raise LibraryNotFound('mdtraj') n_atoms = item.atoms_dataframe.shape[0] atom_index_array = item.atoms_dataframe["atom_index"].to_numpy() atom_name_array = item.atoms_dataframe["atom_name"].to_numpy() atom_id_array = item.atoms_dataframe["atom_id"].to_numpy() atom_type_array = item.atoms_dataframe["atom_type"].to_numpy() group_index_array = item.atoms_dataframe["group_index"].to_numpy() group_name_array = item.atoms_dataframe["group_name"].to_numpy() group_id_array = item.atoms_dataframe["group_id"].to_numpy() group_type_array = item.atoms_dataframe["group_type"].to_numpy() chain_index_array = item.atoms_dataframe["chain_index"].to_numpy() chain_name_array = item.atoms_dataframe["chain_name"].to_numpy() chain_id_array = item.atoms_dataframe["chain_id"].to_numpy() chain_type_array = item.atoms_dataframe["chain_type"].to_numpy() bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy() bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy() tmp_item = Topology() former_group_index = -1 former_chain_index = -1 list_new_atoms = [] for ii in range(n_atoms): atom_index = atom_index_array[ii] atom_name = atom_name_array[ii] atom_id = atom_id_array[ii] atom_type = atom_type_array[ii] group_index = group_index_array[ii] chain_index = chain_index_array[ii] new_group = (former_group_index != group_index) new_chain = (former_chain_index != chain_index) if new_chain: chain = tmp_item.add_chain() former_chain_index = chain_index if new_group: residue_name = group_name_array[ii] residue_id = group_id_array[ii] residue = tmp_item.add_residue(residue_name, chain, resSeq=str(residue_id)) former_group_index = group_index elem = element.get_by_symbol(atom_type) atom = tmp_item.add_atom(atom_name, elem, residue) list_new_atoms.append(atom) for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2): tmp_item.add_bond( list_new_atoms[atom_1], list_new_atoms[atom_2]) # falta bond type and bond order return tmp_item
def json_to_mdtraj_topology(json_string): """Convert a JSON string topology to an mdtraj.Toplogy object. This can be used to write out different topology formats from mdtraj. Parameters ---------- json_string : str JSON string of the topology. Returns ------- topology : mdtraj.Topology object Warnings -------- The topology format for wepy is subject to change in the future! Notes ----- Copied in part from MDTraj.formats.hdf5 topology property. """ topology_dict = json.loads(json_string) topology = mdj.Topology() for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')): chain = topology.add_chain() for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')): try: resSeq = residue_dict["resSeq"] except KeyError: resSeq = None warn( 'No resSeq information found in HDF file, defaulting to zero-based indices' ) try: segment_id = residue_dict["segmentID"] except KeyError: segment_id = "" residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id) for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')): try: element = elem.get_by_symbol(atom_dict['element']) except KeyError: element = elem.virtual topology.add_atom(atom_dict['name'], element, residue) atoms = list(topology.atoms) for index1, index2 in topology_dict['bonds']: topology.add_bond(atoms[index1], atoms[index2]) return topology
def __init__(self, pdb_line, pdbstructure=None): """Create a new pdb.Atom from an ATOM or HETATM line. Example line: ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C 00000000011111111112222222222333333333344444444445555555555666666666677777777778 12345678901234567890123456789012345678901234567890123456789012345678901234567890 ATOM line format description from http://deposit.rcsb.org/adit/docs/pdb_atom_format.html: COLUMNS DATA TYPE CONTENTS -------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer Atom serial number. 13 - 16 Atom Atom name. 17 Character Alternate location indicator. 18 - 20 Residue name Residue name. 22 Character Chain identifier. 23 - 26 Integer Residue sequence number. 27 AChar Code for insertion of residues. 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) Occupancy (Default = 1.0). 61 - 66 Real(6.2) Temperature factor (Default = 0.0). 73 - 76 LString(4) Segment identifier, left-justified. 77 - 78 LString(2) Element symbol, right-justified. 79 - 80 LString(2) Charge on the atom. """ # We might modify first/final status during _finalize() methods self.is_first_atom_in_chain = False self.is_final_atom_in_chain = False self.is_first_residue_in_chain = False self.is_final_residue_in_chain = False # Start parsing fields from pdb line self.record_name = pdb_line[0:6].strip() # VMD sometimes uses hex for atoms greater than 9,999 if pdbstructure is not None and pdbstructure._atom_numbers_are_hex: self.serial_number = int(pdb_line[6:11], 16) else: try: self.serial_number = int(pdb_line[6:11]) except ValueError: try: self.serial_number = int(pdb_line[6:11], 16) pdbstructure._atom_numbers_are_hex = True except ValueError: # Just give it the next number in sequence. self.serial_number = pdbstructure._next_atom_number self.name_with_spaces = pdb_line[12:16] alternate_location_indicator = pdb_line[16] self.residue_name_with_spaces = pdb_line[17:20] # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in # column 21 possible_fourth_character = pdb_line[20:21] if possible_fourth_character != " ": # Fourth character should only be there if official 3 are already full if len(self.residue_name_with_spaces.strip()) != 3: raise ValueError('Misaligned residue name: %s' % pdb_line) self.residue_name_with_spaces += possible_fourth_character self.residue_name = self.residue_name_with_spaces.strip() self.chain_id = pdb_line[21] if pdbstructure is not None and pdbstructure._residue_numbers_are_hex: self.residue_number = int(pdb_line[22:26], 16) else: try: self.residue_number = int(pdb_line[22:26]) except ValueError: try: self.residue_number = int(pdb_line[22:26], 16) pdbstructure._residue_numbers_are_hex = True except ValueError: # When VMD runs out of hex values it starts filling in the residue ID field with **** # Look at the most recent atoms to figure out whether this is a new residue or not. if pdbstructure._current_model is None or pdbstructure._current_model._current_chain is None or pdbstructure._current_model._current_chain._current_residue is None: # This is the first residue in the model. self.residue_number = pdbstructure._next_residue_number else: currentRes = pdbstructure._current_model._current_chain._current_residue if currentRes.name_with_spaces != self.residue_name_with_spaces: # The residue name has changed. self.residue_number = pdbstructure._next_residue_number elif self.name_with_spaces in currentRes.atoms_by_name: # There is already an atom with this name. self.residue_number = pdbstructure._next_residue_number else: self.residue_number = currentRes.number self.insertion_code = pdb_line[26] # coordinates, occupancy, and temperature factor belong in Atom.Location object x = float(pdb_line[30:38]) y = float(pdb_line[38:46]) z = float(pdb_line[46:54]) try: occupancy = float(pdb_line[54:60]) except ValueError: occupancy = 1.0 try: temperature_factor = float(pdb_line[60:66]) except ValueError: temperature_factor = 0.0 self.locations = {} loc = Atom.Location(alternate_location_indicator, np.array([x,y,z]), occupancy, temperature_factor, self.residue_name_with_spaces) self.locations[alternate_location_indicator] = loc self.default_location_id = alternate_location_indicator # segment id, element_symbol, and formal_charge are not always present self.segment_id = pdb_line[72:76].strip() self.element_symbol = pdb_line[76:78].strip() try: self.formal_charge = int(pdb_line[78:80]) except ValueError: self.formal_charge = None # figure out atom element try: # First try to find a sensible element symbol from columns 76-77 self.element = element.get_by_symbol(self.element_symbol) except KeyError: # otherwise, deduce element from first two characters of atom name # remove digits found in some hydrogen atom names symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789") try: # Some molecular dynamics PDB files, such as gromacs with ffamber force # field, include 4-character hydrogen atom names beginning with "H". # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four # characters in the atom name. This problem is the fault of molecular # dynamics code authors who feel the need to make up their own atom # nomenclature because it is too tedious to read that provided by the PDB. # These are the same folks who invent their own meanings for biochemical terms # like "dipeptide". Clowntards. if len(self.name) == 4 and self.name[0:1] == "H": self.element = element.hydrogen else: self.element = element.get_by_symbol(symbol) except KeyError: # OK, I give up self.element = None if pdbstructure is not None: pdbstructure._next_atom_number = self.serial_number+1 pdbstructure._next_residue_number = self.residue_number+1
def topology_from_numpy(atoms, bonds=None): """Create a mdtraj topology from numpy arrays Parameters ---------- atoms : np.ndarray The atoms in the topology, represented as a data frame. This data frame should have columns "serial" (atom index), "name" (atom name), "element" (atom's element), "resSeq" (index of the residue) "resName" (name of the residue), "chainID" (index of the chain), and optionally "segmentID", following the same conventions as wwPDB 3.0 format. bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional The bonds in the topology, represented as an n_bonds x 2 array of the indices of the atoms involved in each bond. Specifiying bonds here is optional. To create standard protein bonds, you can use `create_standard_bonds` to "fill in" the bonds on your newly created Topology object See Also -------- create_standard_bonds """ if bonds is None: bonds = np.zeros((0, 2)) for col in ["name", "element", "resSeq", "resName", "chainID", "serial"]: if col not in atoms.dtype.names: raise ValueError('dataframe must have column %s' % col) if "segmentID" not in atoms.dtype.names: atoms["segmentID"] = "" from mdtraj.core.topology import Atom from mdtraj.core import element as elem out = mdtraj.Topology() # TODO: allow for h5py data sets here, is there a way to check generic ndarray interface? #if not isinstance(bonds, np.ndarray): # raise TypeError('bonds must be an instance of numpy.ndarray. ' # 'You supplied a %s' % type(bonds)) out._atoms = [None for _ in range(len(atoms))] N = np.arange(0, len(atoms)) for ci in np.unique(atoms['chainID']): chain_atoms = atoms[atoms['chainID'] == ci] subN = N[atoms['chainID'] == ci] c = out.add_chain() for ri in np.unique(chain_atoms['resSeq']): residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri] mask = subN[chain_atoms['resSeq'] == ri] indices = N[mask] rnames = residue_atoms['resName'] residue_name = np.array(rnames)[0] segids = residue_atoms['segmentID'] segment_id = np.array(segids)[0] if not np.all(rnames == residue_name): raise ValueError('All of the atoms with residue index %d ' 'do not share the same residue name' % ri) r = out.add_residue(residue_name.decode('ascii'), c, ri, segment_id.decode('ascii')) for ix, atom in enumerate(residue_atoms): e = atom['element'].decode('ascii') a = Atom(atom['name'].decode('ascii'), elem.get_by_symbol(e), int(indices[ix]), r, serial=atom['serial']) out._atoms[indices[ix]] = a r._atoms.append(a) for ai1, ai2 in bonds: out.add_bond(out.atom(ai1), out.atom(ai2)) out._numAtoms = out.n_atoms return out
def load_prmtop(filename): """Load an AMBER prmtop topology file from disk. Parameters ---------- filename : str Path to the prmtop file on disk. Returns ------- top : md.Topology The resulting topology, as an md.Topology object. Notes ----- Deprecated fields in the prmtop file are not loaded. This includes the BOX dimensions, which should be stored in trajectory files instead of the prmtop for systems with periodic boundary conditions. Because '.binpos' files do not store box dimensions, this means that unitcell information will be lost if you use .binpos + .prmtop files with MDTraj. Examples -------- >>> topology = md.load_prmtop('mysystem.prmtop') >>> # or >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop') """ top = topology.Topology() prmtop_version = None flags = [] raw_format = {} raw_data = {} with open(filename, 'r') as f: for line in f: if line.startswith('%VERSION'): tag, prmtop_version = line.rstrip().split(None, 1) elif line.startswith('%FLAG'): tag, flag = line.rstrip().split(None, 1) flags.append(flag) raw_data[flag] = [] elif line.startswith('%FORMAT'): format = line.rstrip() index0 = format.index('(') index1 = format.index(')') format = format[index0 + 1:index1] m = FORMAT_RE_PATTERN.search(format) raw_format[flags[-1]] = (format, m.group(1), m.group(2), m.group(3), m.group(4)) elif flags \ and 'TITLE'==flags[-1] \ and not raw_data['TITLE']: raw_data['TITLE'] = line.rstrip() else: flag = flags[-1] format, numItems, itemType, itemLength, itemPrecision = raw_format[ flag] iLength = int(itemLength) line = line.rstrip() for index in range(0, len(line), iLength): item = line[index:index + iLength] if item: raw_data[flag].append(item.strip()) # Add atoms to the topology pdb.PDBTrajectoryFile._loadNameReplacementTables() previous_residue = None c = top.add_chain() n_atoms = int(_get_pointer_value('NATOM', raw_data)) # built a dictionary telling us which atom belongs to which residue residue_pointer_dict = {} res_pointers = raw_data['RESIDUE_POINTER'] first_atom = [int(p) - 1 for p in res_pointers] # minus 1 necessary first_atom.append(n_atoms) res = 0 for i in range(n_atoms): while first_atom[res + 1] <= i: res += 1 residue_pointer_dict[i] = res # add each residue/atom to the topology object for index in range(n_atoms): res_number = residue_pointer_dict[index] if res_number != previous_residue: previous_residue = res_number # check res_name = raw_data['RESIDUE_LABEL'][ residue_pointer_dict[index]].strip() if res_name in pdb.PDBTrajectoryFile._residueNameReplacements: res_name = pdb.PDBTrajectoryFile._residueNameReplacements[ res_name] r = top.add_residue(res_name, c) if res_name in pdb.PDBTrajectoryFile._atomNameReplacements: atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[ res_name] else: atom_replacements = {} atom_name = raw_data['ATOM_NAME'][index].strip() if atom_name in atom_replacements: atom_name = atom_replacements[atom_name] # Get the element from the prmtop file if available if 'ATOMIC_NUMBER' in raw_data: try: element = elem.Element.getByAtomicNumber( int(raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atom_name.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atom_name[0]) except KeyError: element = None top.add_atom(atom_name, element, r) # Add bonds to the topology bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data[ "BONDS_WITHOUT_HYDROGEN"] atoms = list(top.atoms) bond_list = [] for ii in range(0, len(bond_pointers), 3): if int(bond_pointers[ii]) < 0 or int(bond_pointers[ii + 1]) < 0: raise Exception("Found negative bonded atom pointers %s" % ((bond_pointers[ii], bond_pointers[ii + 1]), )) else: bond_list.append( (int(bond_pointers[ii]) // 3, int(bond_pointers[ii + 1]) // 3)) for bond in bond_list: top.add_bond(atoms[bond[0]], atoms[bond[1]]) return top
def load_prmtop(filename): """Load an AMBER prmtop topology file from disk. Parameters ---------- filename : str Path to the prmtop file on disk. Returns ------- top : md.Topology The resulting topology, as an md.Topology object. Notes ----- Deprecated fields in the prmtop file are not loaded. This includes the BOX dimensions, which should be stored in trajectory files instead of the prmtop for systems with periodic boundary conditions. Because '.binpos' files do not store box dimensions, this means that unitcell information will be lost if you use .binpos + .prmtop files with MDTraj. Examples -------- >>> topology = md.load_prmtop('mysystem.prmtop') >>> # or >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop') """ top = topology.Topology() prmtop_version = None flags = [] raw_format = {} raw_data = {} ignoring = False with open(filename, 'r') as f: for line in f: if line[0] == '%': if line.startswith('%VERSION'): tag, prmtop_version = line.rstrip().split(None, 1) elif line.startswith('%FLAG'): tag, flag = line.rstrip().split(None, 1) flags.append(flag) raw_data[flag] = [] ignoring = flag in ('TITLE', 'CTITLE') elif line.startswith('%FORMAT'): format = line.rstrip() index0=format.index('(') index1=format.index(')') format = format[index0+1:index1] m = FORMAT_RE_PATTERN.search(format) if m is None: ignoring = True else: raw_format[flags[-1]] = (format, m.group(1), m.group(2), m.group(3), m.group(4)) elif line.startswith('%COMMENT'): continue elif not ignoring: flag=flags[-1] format, numItems, itemType, itemLength, itemPrecision = raw_format[flag] iLength=int(itemLength) line = line.rstrip() for index in range(0, len(line), iLength): item = line[index:index+iLength] if item: raw_data[flag].append(item.strip()) # Add atoms to the topology pdb.PDBTrajectoryFile._loadNameReplacementTables() previous_residue = None c = top.add_chain() n_atoms = int(_get_pointer_value('NATOM', raw_data)) # built a dictionary telling us which atom belongs to which residue residue_pointer_dict = {} res_pointers = raw_data['RESIDUE_POINTER'] first_atom = [int(p)-1 for p in res_pointers] # minus 1 necessary first_atom.append(n_atoms) res = 0 for i in range(n_atoms): while first_atom[res+1] <= i: res += 1 residue_pointer_dict[i] = res # add each residue/atom to the topology object for index in range(n_atoms): res_number = residue_pointer_dict[index] if res_number != previous_residue: previous_residue = res_number # check res_name = raw_data['RESIDUE_LABEL'][residue_pointer_dict[index]].strip() if res_name in pdb.PDBTrajectoryFile._residueNameReplacements: res_name = pdb.PDBTrajectoryFile._residueNameReplacements[res_name] r = top.add_residue(res_name, c) if res_name in pdb.PDBTrajectoryFile._atomNameReplacements: atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[res_name] else: atom_replacements = {} atom_name = raw_data['ATOM_NAME'][index].strip() if atom_name in atom_replacements: atom_name = atom_replacements[atom_name] # Get the element from the prmtop file if available if 'ATOMIC_NUMBER' in raw_data: try: element = elem.Element.getByAtomicNumber(int(raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = elem.virtual else: # Try to guess the element from the atom name. upper = atom_name.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atom_name[0]) except KeyError: element = elem.virtual top.add_atom(atom_name, element, r) # Add bonds to the topology bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data["BONDS_WITHOUT_HYDROGEN"] atoms = list(top.atoms) bond_list = [] for ii in range(0,len(bond_pointers),3): if int(bond_pointers[ii])<0 or int(bond_pointers[ii+1])<0: raise Exception("Found negative bonded atom pointers %s" % ((bond_pointers[ii], bond_pointers[ii+1]),)) else: bond_list.append((int(bond_pointers[ii])//3, int(bond_pointers[ii+1])//3)) for bond in bond_list: top.add_bond(atoms[bond[0]], atoms[bond[1]]) return top
def _new_json_to_mdtraj_topology(json_string): """Another implementation of converting JSON to mdtraj. TODO what is the difference between this and the other one? Parameters ---------- json_string : Returns ------- """ topology_dict = json.loads(json_string) # start a new topology topology = mdj.Topology() # to not destroy the indexing in this file we will first go # through the topology and get all the atom dictionaries (adding # attributes for the residue indices), then loop through them and # make residues and chains when needed. atoms = {} residues = {} # the chains will just be collections of residues chains = [] # loop through all the chains for chain_dict in topology_dict['chains']: # get the index and add the chain then save it chain_idx = chain_dict['index'] chain = topology.add_chain() chains[chain_idx] = chain # then go through the residues here for residue_dict in chain_dict['residues']: residue_idx = residue_dict['index'] resSeq = residue_dict["resSeq"] segment_id = residue_dict["segmentID"] residue = topology.add_residue( residue_dict['name'], chain, resSeq=residue_dict['resSeq'], segment_id=residue_dict['segmentID']) for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')): try: element = elem.get_by_symbol(atom_dict['element']) except KeyError: element = elem.virtual topology.add_atom(atom_dict['name'], element, residue) for index1, index2 in topology_dict['bonds']: topology.add_bond(atoms[index1], atoms[index2]) return topology