def removeLipidsInProtein(prot, memb, lipidsel='lipids'): """ Calculates the convex hull of the protein. If a lipid lies inside the hull it gets removed. This does not work well for lipids crossing out of the hull. If even one atom of the lipid is outside it will change the hull and will not get removed. I assume it will get removed by the clashes with the protein though. """ # TODO: Do the same with Morphological Snakes from scipy.spatial import ConvexHull memb = memb.copy() # Convex hull of the protein cacoords = prot.get('coords', 'name CA') hull = ConvexHull(cacoords) sequence = sequenceID((memb.resid, memb.segid)) uqres = np.unique(sequence) toremove = np.zeros(len(sequence), dtype=bool) numlipsrem = 0 for res in uqres: # For each lipid check if it's atoms lie within the convex hull atoms = np.where(sequence == res)[0] newhull = ConvexHull( np.append(cacoords, np.squeeze(memb.coords[atoms, :, :]), axis=0)) # If the hull didn't change by adding the lipid, it lies within convex hull. Remove it. if list(hull.vertices) == list(newhull.vertices): toremove[atoms] = True numlipsrem += 1 lipids = memb.atomselect(lipidsel) # Only remove lipids, waters are ok memb.remove(toremove & lipids) return memb, numlipsrem
def _calcarrays(self, mol): mol = mol.copy() mol.filter(self.sel, _logger=False) def get_or_minus1(idx): if len(idx) != 0: return idx[0] else: return -1 mol.resid = sequenceID((mol.resid, mol.chain, mol.insertion)) ca_indices = mol.atomselect('name CA', indexes=True) chainids = mol.get('chain', sel=ca_indices) resnames = mol.get('resname', sel=ca_indices) uqchains = np.unique(chainids) chain_ids = np.zeros(len(chainids), dtype=np.int32) for i, uqc in enumerate(uqchains): idx = chainids == uqc chain_ids[idx] = i nco_indices = [] uqresid = np.unique(mol.resid) for res in uqresid: nco_indices.append([get_or_minus1(mol.atomselect('resid {} and backbone and name "N.*"'.format(res), indexes=True)), get_or_minus1(mol.atomselect('resid {} and backbone and name C'.format(res), indexes=True)), get_or_minus1(mol.atomselect('resid {} and backbone and name "O.*"'.format(res), indexes=True))]) nco_indices = np.array(nco_indices, np.int32) proline_indices = np.array(resnames == 'PRO', dtype=np.int32) ca_indices = np.array(ca_indices, dtype=np.int32) return ca_indices, nco_indices, proline_indices, chain_ids
def _processTraj(self, mol): sel = self._getSelections(mol) mol = mol.copy() mol.filter(sel, _logger=False) # Ugly conversions to feed it to sensitive C code uqresid = np.unique(mol.get('resid')) numres = len(uqresid) resatmcount = np.bincount(mol.resid) resatmcount = resatmcount[uqresid] residstr = [str(x) for x in mol.resid] atomids = np.ones(mol.numAtoms, dtype=np.int32) * -1 for i in range(numres): resatms = mol.resid == uqresid[i] atomids[resatms] = np.arange(np.sum(resatms)) sslist = stride(mol.coords.ravel(order='F'), np.array([numres], dtype=np.int32), resatmcount.astype(np.int32), np.array(sequenceID(mol.resid) - 1, dtype=np.int32), np.zeros(mol.numAtoms, dtype=np.int32), atomids.astype(np.int32), mol.chain[0], residstr, mol.resname.tolist(), mol.name.tolist(), 1, mol.numFrames, mol.numAtoms) ss = np.zeros(len(sslist), dtype=object) for i in range(len(sslist)): ss[i] = np.array(list(sslist[i])) if self._simple: return _ssmap(ss) else: return ss
def _calcRadiiMapping(self, mol): sel = mol.atomselect(self._sel) _ATOMIC_RADII = { 'C': 1.5, 'F': 1.2, 'H': 0.4, 'N': 1.10, 'O': 1.05, 'S': 1.6, 'P': 1.6 } elements = [n[0] for n in mol.name[sel]] atom_radii = np.vectorize(_ATOMIC_RADII.__getitem__)(elements) radii = np.array(atom_radii, np.float32) + self._probeRadius if self._mode == 'atom': atom_mapping = np.arange(np.sum(sel), dtype=np.int32) elif self._mode == 'residue': atom_mapping = sequenceID((mol.resid[sel], mol.chain[sel], mol.segid[sel])).astype(np.int32) else: raise ValueError( 'mode must be one of "residue", "atom". "{}" supplied'.format( self._mode)) return radii, atom_mapping, sel
def removeLipidsInProtein(prot, memb,lipidsel='lipids'): """ Calculates the convex hull of the protein. If a lipid lies inside the hull it gets removed. This does not work well for lipids crossing out of the hull. If even one atom of the lipid is outside it will change the hull and will not get removed. I assume it will get removed by the clashes with the protein though. """ # TODO: Do the same with Morphological Snakes from scipy.spatial import ConvexHull memb = memb.copy() # Convex hull of the protein cacoords = prot.get('coords', 'name CA') hull = ConvexHull(cacoords) sequence = sequenceID((memb.resid, memb.segid)) uqres = np.unique(sequence) toremove = np.zeros(len(sequence), dtype=bool) numlipsrem = 0 for res in uqres: # For each lipid check if it's atoms lie within the convex hull atoms = np.where(sequence == res)[0] newhull = ConvexHull(np.vstack((cacoords, memb.get('coords', sel=atoms)))) # If the hull didn't change by adding the lipid, it lies within convex hull. Remove it. if list(hull.vertices) == list(newhull.vertices): toremove[atoms] = True numlipsrem += 1 lipids = memb.atomselect(lipidsel) # Only remove lipids, waters are ok memb.remove(toremove & lipids) return memb, numlipsrem
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5): """ Tile a membrane in the X and Y dimensions to reach a specific size. Parameters ---------- memb xmin ymin xmax ymax buffer Returns ------- megamemb : A big membrane Molecule """ from htmd.progress.progress import ProgressBar memb = memb.copy() memb.resid = sequenceID(memb.resid) minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten() size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0) size = size.flatten() xreps = int(np.ceil((xmax - xmin) / size[0])) yreps = int(np.ceil((ymax - ymin) / size[1])) logger.info('Replicating Membrane {}x{}'.format(xreps, yreps)) from htmd.molecule.molecule import Molecule megamemb = Molecule() bar = ProgressBar(xreps * yreps, description='Replicating Membrane') k = 0 for x in range(xreps): for y in range(yreps): tmpmemb = memb.copy() xpos = xmin + x * (size[0] + buffer) ypos = ymin + y * (size[1] + buffer) tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0]) tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False) tmpmemb.set('segid', 'M{}'.format(k)) megamemb.append(tmpmemb) k += 1 bar.progress() bar.stop() # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles # Some clashes will still occur between periodic images however megamemb.remove('same fragment as water and within 1.5 of not water', _logger=False) return megamemb
def removeAtomsInHull(mol1, mol2, hullsel, removesel): """ Calculates the convex hull of an atom selection in mol1 and removes atoms within that hull in mol2. Parameters ---------- mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object Molecule for which to calculate the convex hull mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object Molecule which contains the atoms which we check if they are within the hull hullsel : str Atom selection string for atoms in mol1 from which to calculate the convex hull. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ removesel : str Atom selection string for atoms in mol2 from which to remove the ones which are within the hull. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- newmol2 : Molecule mol2 but without any atoms located within the convex hull numrem : int Number of fragments removed """ # TODO: Look into Morphological Snakes from scipy.spatial import ConvexHull mol2 = mol2.copy() # Convex hull of the protein hullcoords = mol1.get('coords', hullsel) hull = ConvexHull(hullcoords) sequence = sequenceID((mol2.resid, mol2.segid)) uqres = np.unique(sequence) toremove = np.zeros(len(sequence), dtype=bool) numlipsrem = 0 for res in uqres: # For each fragment check if it's atoms lie within the convex hull atoms = np.where(sequence == res)[0] newhull = ConvexHull( np.vstack((hullcoords, mol2.get('coords', sel=atoms)))) # If the hull didn't change by adding the fragment, it lies within convex hull. Remove it. if list(hull.vertices) == list(newhull.vertices): toremove[atoms] = True numlipsrem += 1 rematoms = mol2.atomselect(removesel) mol2.remove(toremove & rematoms) return mol2, numlipsrem
def embed(mol1, mol2, gap=1.3): '''Embeds one molecule into another removing overlaps. Will remove residues of mol1 which have collisions with atoms of mol2. Parameters ---------- mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The first Molecule object mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The second Molecule object gap : float Minimum space in A between atoms of the two molecules Return ------ newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The resulting Molecule object Example ------- >>> all = embed(memb, prot) ''' mol1 = mol1.copy() mol2 = mol2.copy() #Set different occupancy to separate atoms of mol1 and mol2 occ1 = mol1.get('occupancy') occ2 = mol2.get('occupancy') mol1.set('occupancy', 1) mol2.set('occupancy', 2) mol2.append(mol1) s1 = mol2.atomselect('occupancy 1') s2 = mol2.atomselect('occupancy 2') # Give unique "residue" beta number to all resids beta = mol2.get('beta') mol2.set('beta', sequenceID(mol2.resid)) # Calculate overlapping atoms overlaps = mol2.atomselect('(occupancy 2) and same beta as exwithin ' + str(gap) + ' of (occupancy 1)') # Restore original beta and occupancy mol2.set('beta', beta) mol2.set('occupancy', occ1, s1) mol2.set('occupancy', occ2, s2) # Remove the overlaps mol2.remove(overlaps, _logger=False) return mol2
def removeAtomsInHull(mol1, mol2, hullsel, removesel): """ Calculates the convex hull of an atom selection in mol1 and removes atoms within that hull in mol2. Parameters ---------- mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object Molecule for which to calculate the convex hull mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object Molecule which contains the atoms which we check if they are within the hull hullsel : str Atom selection string for atoms in mol1 from which to calculate the convex hull. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ removesel : str Atom selection string for atoms in mol2 from which to remove the ones which are within the hull. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- newmol2 : Molecule mol2 but without any atoms located within the convex hull numrem : int Number of fragments removed """ # TODO: Look into Morphological Snakes from scipy.spatial import ConvexHull mol2 = mol2.copy() # Convex hull of the protein hullcoords = mol1.get('coords', hullsel) hull = ConvexHull(hullcoords) sequence = sequenceID((mol2.resid, mol2.segid)) uqres = np.unique(sequence) toremove = np.zeros(len(sequence), dtype=bool) numlipsrem = 0 for res in uqres: # For each fragment check if it's atoms lie within the convex hull atoms = np.where(sequence == res)[0] newhull = ConvexHull(np.vstack((hullcoords, mol2.get('coords', sel=atoms)))) # If the hull didn't change by adding the fragment, it lies within convex hull. Remove it. if list(hull.vertices) == list(newhull.vertices): toremove[atoms] = True numlipsrem += 1 rematoms = mol2.atomselect(removesel) mol2.remove(toremove & rematoms) return mol2, numlipsrem
def _calcRadiiMapping(self, mol): sel = mol.atomselect(self._sel) _ATOMIC_RADII = {'C': 1.5, 'F': 1.2, 'H': 0.4, 'N': 1.10, 'O': 1.05, 'S': 1.6, 'P': 1.6} elements = [n[0] for n in mol.name[sel]] atom_radii = np.vectorize(_ATOMIC_RADII.__getitem__)(elements) radii = np.array(atom_radii, np.float32) + self._probeRadius if self._mode == 'atom': atom_mapping = np.arange(np.sum(sel), dtype=np.int32) elif self._mode == 'residue': atom_mapping = sequenceID((mol.resid[sel], mol.chain[sel], mol.segid[sel])).astype(np.int32) else: raise ValueError('mode must be one of "residue", "atom". "{}" supplied'.format(self._mode)) return radii, atom_mapping, sel
def embed(mol1, mol2, gap=1.3): '''Embeds one molecule into another removing overlaps. Will remove residues of mol1 which have collisions with atoms of mol2. Parameters ---------- mol1 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The first Molecule object mol2 : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The second Molecule object gap : float Minimum space in A between atoms of the two molecules Return ------ newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The resulting Molecule object Example ------- >>> all = embed(memb, prot) ''' mol1 = mol1.copy() mol2 = mol2.copy() #Set different occupancy to separate atoms of mol1 and mol2 occ1 = mol1.get('occupancy') occ2 = mol2.get('occupancy') mol1.set('occupancy', 1) mol2.set('occupancy', 2) mol2.append(mol1) s1 = mol2.atomselect('occupancy 1') s2 = mol2.atomselect('occupancy 2') # Give unique "residue" beta number to all resids beta = mol2.get('beta') mol2.set('beta', sequenceID(mol2.resid)) # Calculate overlapping atoms overlaps = mol2.atomselect('(occupancy 2) and same beta as exwithin '+ str(gap) + ' of (occupancy 1)') # Restore original beta and occupancy mol2.set('beta', beta) mol2.set('occupancy', occ1, s1) mol2.set('occupancy', occ2, s2) # Remove the overlaps mol2.remove(overlaps, _logger=False) return mol2
def project(self, mol): """ Project molecule. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` A :class:`Molecule <htmd.molecule.molecule.Molecule>` object to project. Returns ------- data : np.ndarray An array containing the projected data. """ coords = super().project(mol) # TODO: Could precalculate some stuff like this to speed it up resids = sequenceID(self._refmol.resid) if self._refpos == 'mean': refcoords = np.mean(coords, axis=0) elif self._refpos == 'refmol': refcoords = _MetricCoordinate(self._refmol, self._atomsel).project(self._refmol) else: raise RuntimeError('Wrong refpos option') mapping = super().getMapping(self._refmol) xyzgroups = mapping.groupby('atomIndexes').groups numatoms = len(xyzgroups) atomfluct = np.zeros((coords.shape[0], numatoms)) squarediff = (coords - refcoords)**2 atomresids = np.zeros(numatoms, dtype=int) for i, atom in enumerate(sorted(xyzgroups.values(), key=lambda x: x[0])): assert len(np.unique(mapping.atomIndexes[atom])) == 1 atomfluct[:, i] = squarediff[:, atom].sum(axis=1) atomresids[i] = resids[int(mapping.atomIndexes[atom[0]])] if self._groupsel == 'residue': numres = len(np.unique(atomresids)) meanresfluct = np.zeros((coords.shape[0], numres)) for i, r in enumerate(np.unique(atomresids)): meanresfluct[:, i] = atomfluct[:, atomresids == r].mean(axis=1) return meanresfluct elif self._groupsel is None: return atomfluct
def project(self, mol): """ Project molecule. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` A :class:`Molecule <htmd.molecule.molecule.Molecule>` object to project. Returns ------- data : np.ndarray An array containing the projected data. """ coords = super().project(mol) # TODO: Could precalculate some stuff like this to speed it up resids = sequenceID(self._refmol.resid) if self._refpos == 'mean': refcoords = np.mean(coords, axis=0) elif self._refpos == 'refmol': refcoords = _MetricCoordinate(self._refmol, self._atomsel).project(self._refmol) else: raise RuntimeError('Wrong refpos option') mapping = super().getMapping(self._refmol) xyzgroups = mapping.groupby('atomIndexes').groups numatoms = len(xyzgroups) atomfluct = np.zeros((coords.shape[0], numatoms)) squarediff = (coords - refcoords) ** 2 atomresids = np.zeros(numatoms, dtype=int) for i, atom in enumerate(sorted(xyzgroups.values(), key=lambda x: x[0])): assert len(np.unique(mapping.atomIndexes[atom])) == 1 atomfluct[:, i] = squarediff[:, atom].sum(axis=1) atomresids[i] = resids[int(mapping.atomIndexes[atom[0]])] if self._groupsel == 'residue': numres = len(np.unique(atomresids)) meanresfluct = np.zeros((coords.shape[0], numres)) for i, r in enumerate(np.unique(atomresids)): meanresfluct[:, i] = atomfluct[:, atomresids == r].mean(axis=1) return meanresfluct elif self._groupsel is None: return atomfluct
def _viewStatesNGL(self, states, statetype, protein, ligand, mols, numsamples, gui=False): from htmd.molecule.util import sequenceID if states is None: states = range(self.macronum) if isinstance(states, int): states = [states] if mols is None: mols = self.getStates(states, statetype, numsamples=min(numsamples, 15)) colors = [0, 1, 3, 4, 5, 6, 7, 9] hexcolors = {0: '#0000ff', 1: '#ff0000', 2: '#333333', 3: '#ff6600', 4: '#ffff00', 5: '#4c4d00', 6: '#b2b2cc', 7: '#33cc33', 8: '#ffffff', 9: '#ff3399', 10: '#33ccff'} if protein is None and ligand is None: raise NameError('Please provide either the "protein" or "ligand" parameter for viewStates.') k = 0 from nglview import NGLWidget, HTMDTrajectory view = NGLWidget(gui=gui) ref = mols[0].copy() for i, s in enumerate(states): if protein: mol = Molecule() if ligand: mol = ref.copy() mol.remove(ligand, _logger=False) mol.dropFrames(keep=0) mols[i].filter(ligand, _logger=False) mols[i].set('chain', '{}'.format(s)) tmpcoo = mols[i].coords for j in range(mols[i].numFrames): mols[i].coords = np.atleast_3d(tmpcoo[:, :, j]) if ligand: mols[i].set('segid', sequenceID(mols[i].resid)+k) k = int(mols[i].segid[-1]) mol.append(mols[i]) view.add_trajectory(HTMDTrajectory(mol)) # Setting up representations if ligand: view[i].add_cartoon('protein', color='sstruc') view[i].add_hyperball(':{}'.format(s), color=hexcolors[np.mod(i, len(hexcolors))]) if protein: view[i].add_cartoon('protein', color='residueindex') self._nglButtons(view, statetype, states) return view
def tileMembrane(memb, xmin, ymin, xmax, ymax): """ Tile the membrane in the X and Y dimensions to reach a specific size. Returns ------- megamemb : A big membrane Molecule """ from htmd.progress.progress import ProgressBar memb = memb.copy() memb.resid = sequenceID(memb.resid) minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten() size = np.max(memb.get('coords', 'water'), axis=0) - np.min( memb.get('coords', 'water'), axis=0) size = size.flatten() xreps = int(np.ceil((xmax - xmin) / size[0])) yreps = int(np.ceil((ymax - ymin) / size[1])) logger.info('Replicating Membrane {}x{}'.format(xreps, yreps)) from htmd.molecule.molecule import Molecule megamemb = Molecule() bar = ProgressBar(xreps * yreps, description='Replicating Membrane') k = 0 for x in range(xreps): for y in range(yreps): tmpmemb = memb.copy() xpos = xmin + x * size[0] ypos = ymin + y * size[1] tmpmemb.moveBy( [-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0]) sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax) tmpmemb.remove(sel, _logger=False) tmpmemb.set('segid', 'M{}'.format(k)) megamemb.append(tmpmemb) k += 1 bar.progress() bar.stop() return megamemb
def tileMembrane(memb, xmin, ymin, xmax, ymax): """ Tile the membrane in the X and Y dimensions to reach a specific size. Returns ------- megamemb : A big membrane Molecule """ from htmd.progress.progress import ProgressBar memb = memb.copy() memb.resid = sequenceID(memb.resid) minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten() size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0) size = size.flatten() xreps = int(np.ceil((xmax - xmin) / size[0])) yreps = int(np.ceil((ymax - ymin) / size[1])) logger.info('Replicating Membrane {}x{}'.format(xreps, yreps)) from htmd.molecule.molecule import Molecule megamemb = Molecule() bar = ProgressBar(xreps * yreps, description='Replicating Membrane') k = 0 for x in range(xreps): for y in range(yreps): tmpmemb = memb.copy() xpos = xmin + x * size[0] ypos = ymin + y * size[1] tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0]) sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax) tmpmemb.remove(sel, _logger=False) tmpmemb.set('segid', 'M{}'.format(k)) megamemb.append(tmpmemb) k += 1 bar.progress() bar.stop() return megamemb
def _calcarrays(self, mol): mol = mol.copy() mol.filter(self.sel, _logger=False) residues = sequenceID((mol.resid, mol.chain, mol.insertion)) backbone = mol.atomselect('backbone') ca_indices = np.where(mol.name == 'CA')[0].astype(np.int32) chainids = mol.chain[ca_indices] resnames = mol.resname[ca_indices] proline_indices = np.array(resnames == 'PRO', dtype=np.int32) _, chain_ids = np.unique(chainids, return_inverse=True) nco_indices = np.ones((residues.max()+1, 3), dtype=np.int32) * -1 natriums = np.where((mol.name == 'N') & backbone)[0] carbons = np.where((mol.name == 'C') & backbone)[0] oxygens = np.where((mol.name == 'O') & backbone)[0] nco_indices[residues[natriums], 0] = natriums nco_indices[residues[carbons], 1] = carbons nco_indices[residues[oxygens], 2] = oxygens return ca_indices, nco_indices, proline_indices, chain_ids.astype(np.int32)
def _calcarrays(self, mol): mol = mol.copy() mol.filter(self.sel, _logger=False) residues = sequenceID((mol.resid, mol.chain, mol.insertion)) backbone = mol.atomselect('backbone') ca_indices = np.where(mol.name == 'CA')[0].astype(np.int32) chainids = mol.chain[ca_indices] resnames = mol.resname[ca_indices] proline_indices = np.array(resnames == 'PRO', dtype=np.int32) _, chain_ids = np.unique(chainids, return_inverse=True) nco_indices = np.ones((residues.max() + 1, 3), dtype=np.int32) * -1 natriums = np.where((mol.name == 'N') & backbone)[0] carbons = np.where((mol.name == 'C') & backbone)[0] oxygens = np.where((mol.name == 'O') & backbone)[0] nco_indices[residues[natriums], 0] = natriums nco_indices[residues[carbons], 1] = carbons nco_indices[residues[oxygens], 2] = oxygens return ca_indices, nco_indices, proline_indices, chain_ids.astype( np.int32)
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4.0, field="segid"): """ Detects resid gaps in a selection and assigns incrementing segid to each fragment !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment', use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object sel : str Atom selection string on which to check for gaps. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ basename : str The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ... spatial : bool Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom spatialgap : float The size of a spatial gap which validates a discontinuity (A) field : str Field to fix. Can be "segid" (default), "chain", or "both" Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with modified segids Example ------- >>> newmol = autoSegment(mol,'chain B','P') """ mol = mol.copy() idx = mol.atomselect(sel, indexes=True) rid = mol.get('resid', sel) residiff = np.diff(rid) gappos = np.where((residiff != 1) & (residiff != 0))[0] # Points to the index before the gap! # Letters to be used for chains, if free: 0123456789abcd...ABCD..., minus chain symbols already used used_chains = set(mol.chain) chain_alphabet = list(string.digits + string.ascii_letters) available_chains = [x for x in chain_alphabet if x not in used_chains] idxstartseg = [idx[0]] + idx[gappos + 1].tolist() idxendseg = idx[gappos].tolist() + [idx[-1]] mol.set('segid', basename, sel) if len(gappos) == 0: mol.set('segid', basename+'0', sel) return mol if spatial: residbackup = mol.get('resid') mol.set('resid', sequenceID(mol.resid)) # Assigning unique resids to be able to do the distance selection todelete = [] i = 0 for s, e in zip(idxstartseg[1:], idxendseg[:-1]): coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s])) if np.shape(coords) == (2, 3): dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2)) if dist < spatialgap: todelete.append(i) i += 1 # Join the non-real gaps into segments idxstartseg = np.delete(idxstartseg, np.array(todelete)+1) idxendseg = np.delete(idxendseg, todelete) mol.set('resid', residbackup) # Restoring the original resids i = 0 for s, e in zip(idxstartseg, idxendseg): # Fixup segid if field in ['segid', 'both']: newsegid = basename + str(i) if np.any(mol.segid == newsegid): raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid)) logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e])) mol.segid[s:e+1] = newsegid # Fixup chain if field in ['chain', 'both']: newchainid = available_chains[i] logger.info('Set chain {} between resid {} and {}.'.format(newchainid, mol.resid[s], mol.resid[e])) mol.chain[s:e+1] = newchainid i += 1 return mol
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4): """ Detects resid gaps in a selection and assigns incrementing segid to each fragment !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment', use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object sel : str Atom selection on which to check for gaps. basename : str The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ... spatial : bool Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom spatialgap : float The size of a spatial gap which validates a discontinuity Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with modified segids Example ------- >>> newmol = autoSegment(mol,'chain B','P') """ mol = mol.copy() idx = mol.atomselect(sel, indexes=True) rid = mol.get('resid', sel) residiff = np.diff(rid) gappos = np.where((residiff != 1) & (residiff != 0))[0] # Points to the index before the gap! idxstartseg = [idx[0]] + idx[gappos + 1].tolist() idxendseg = idx[gappos].tolist() + [idx[-1]] mol.set('segid', basename, sel) if len(gappos) == 0: return mol if spatial: residbackup = mol.get('resid') mol.set('resid', sequenceID(mol.resid)) # Assigning unique resids to be able to do the distance selection todelete = [] i = 0 for s, e in zip(idxstartseg[1:], idxendseg[:-1]): coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s])) if np.shape(coords) == (2, 3): dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2)) if dist < spatialgap: todelete.append(i) i += 1 # Join the non-real gaps into segments idxstartseg = np.delete(idxstartseg, np.array(todelete)+1) idxendseg = np.delete(idxendseg, todelete) mol.set('resid', residbackup) # Restoring the original resids i = 0 for s, e in zip(idxstartseg, idxendseg): newsegid = basename + str(i) if np.any(mol.segid == newsegid): raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid)) logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e])) mol.segid[s:e+1] = newsegid i += 1 return mol
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5): """ Tile a membrane in the X and Y dimensions to reach a specific size. Parameters ---------- memb : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The membrane to be tiled xmin : float Minimum x coordinate ymin : float Minimum y coordinate xmax : float Maximum x coordinate ymax : float Maximum y coordinate buffer : float Buffer distance between tiles Returns ------- megamemb : A big membrane Molecule """ from tqdm import tqdm memb = memb.copy() memb.resid = sequenceID((memb.resid, memb.insertion, memb.chain, memb.segid)) minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten() size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0) size = size.flatten() xreps = int(np.ceil((xmax - xmin) / size[0])) yreps = int(np.ceil((ymax - ymin) / size[1])) logger.info('Replicating Membrane {}x{}'.format(xreps, yreps)) from htmd.molecule.molecule import Molecule megamemb = Molecule() bar = tqdm(total=xreps * yreps, desc='Replicating Membrane') k = 0 for x in range(xreps): for y in range(yreps): tmpmemb = memb.copy() xpos = xmin + x * (size[0] + buffer) ypos = ymin + y * (size[1] + buffer) tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0]) tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False) if tmpmemb.numAtoms == 0: continue tmpmemb.set('segid', 'M{}'.format(k), sel='not water') tmpmemb.set('segid', 'MW{}'.format(k), sel='water') megamemb.append(tmpmemb) k += 1 bar.update(1) bar.close() # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles # Some clashes will still occur between periodic images however megamemb.remove('same resid as water and within 1.5 of not water', _logger=False) return megamemb
def ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation, dfrom=5, dbetween=5, segname=None): newmol = mol.copy() logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A') logger.info('Min distance between ions: ' + str(dbetween) + 'A') logger.info('Placing ' + str(nanion + ncation) + ' ions.') if (nanion + ncation) == 0: return newmol segname = _getSegname(newmol, segname) nions = nanion + ncation betabackup = newmol.beta newmol.set('beta', sequenceID((newmol.resid, newmol.segid))) # Find water oxygens to replace with ions ntries = 0 maxtries = 10 while True: ionlist = np.empty(0, dtype=int) watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True) watsize = len(watindex) if watsize == 0: raise NameError( 'No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box.' ) while len(ionlist) < nions: if len(watindex) == 0: break randwat = np.random.randint(len(watindex)) thision = watindex[randwat] addit = True if len(ionlist) != 0: # Check for distance from precious ions ionspos = newmol.get('coords', sel=ionlist) thispos = newmol.get('coords', sel=thision) dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean') if np.any(dists < dbetween): addit = False if addit: ionlist = np.append(ionlist, thision) watindex = np.delete(watindex, randwat) if len(ionlist) == nions: break ntries += 1 if ntries == maxtries: raise NameError( 'Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ' 'from' ' and ' 'between' ' parameters, decreasing ion concentration or making a larger water box.' ) # Delete waters but keep their coordinates waterpos = np.atleast_2d(newmol.get('coords', ionlist)) stringsel = 'beta' for x in newmol.beta[ionlist]: stringsel += ' ' + str(int(x)) atmrem = np.sum(newmol.atomselect(stringsel)) atmput = 3 * len(ionlist) assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format( atmrem, atmput) sel = newmol.remove(stringsel, _logger=False) assert np.size( sel ) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format( np.size(sel), atmput) betabackup = np.delete(betabackup, sel) # Add the ions randidx = np.random.permutation(np.size(waterpos, 0)) atom = Molecule() atom.record = 'ATOM' atom.chain = 'I' atom.segid = 'I' atom.occupancy = 0 atom.beta = 0 atom.insertion = '' atom.element = '' atom.altloc = '' for i in range(nanion): atom.name = anionatom atom.resname = anion atom.resid = newmol.resid[-1] + 1 atom.coords = waterpos[randidx[i], :] newmol.insert(atom, len(newmol.name)) for i in range(ncation): atom.name = cationatom atom.resname = cation atom.resid = newmol.resid[-1] + 1 atom.coords = waterpos[randidx[i + nanion], :] newmol.insert(atom, len(newmol.name)) # Restoring the original betas sel = np.ones(len(betabackup) + nions, dtype=bool) sel[len(betabackup)::] = False newmol.set('beta', betabackup, sel=sel) return newmol
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4.0, field="segid"): """ Detects resid gaps in a selection and assigns incrementing segid to each fragment !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment', use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object sel : str Atom selection on which to check for gaps. basename : str The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ... spatial : bool Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom spatialgap : float The size of a spatial gap which validates a discontinuity (A) field : str Field to fix. Can be "segid" (default), "chain", or "both" Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with modified segids Example ------- >>> newmol = autoSegment(mol,'chain B','P') """ mol = mol.copy() idx = mol.atomselect(sel, indexes=True) rid = mol.get('resid', sel) residiff = np.diff(rid) gappos = np.where((residiff != 1) & (residiff != 0))[0] # Points to the index before the gap! # Letters to be used for chains, if free: 0123456789abcd...ABCD..., minus chain symbols already used used_chains = set(mol.chain) chain_alphabet = list(string.digits + string.ascii_letters) available_chains = [x for x in chain_alphabet if x not in used_chains] idxstartseg = [idx[0]] + idx[gappos + 1].tolist() idxendseg = idx[gappos].tolist() + [idx[-1]] mol.set('segid', basename, sel) if len(gappos) == 0: mol.set('segid', basename+'0', sel) return mol if spatial: residbackup = mol.get('resid') mol.set('resid', sequenceID(mol.resid)) # Assigning unique resids to be able to do the distance selection todelete = [] i = 0 for s, e in zip(idxstartseg[1:], idxendseg[:-1]): coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format(mol.resid[e], mol.resid[s])) if np.shape(coords) == (2, 3): dist = np.sqrt(np.sum((coords[0, :] - coords[1, :]) ** 2)) if dist < spatialgap: todelete.append(i) i += 1 # Join the non-real gaps into segments idxstartseg = np.delete(idxstartseg, np.array(todelete)+1) idxendseg = np.delete(idxendseg, todelete) mol.set('resid', residbackup) # Restoring the original resids i = 0 for s, e in zip(idxstartseg, idxendseg): # Fixup segid if field in ['segid', 'both']: newsegid = basename + str(i) if np.any(mol.segid == newsegid): raise RuntimeError('Segid {} already exists in the molecule. Please choose different prefix.'.format(newsegid)) logger.info('Created segment {} between resid {} and {}.'.format(newsegid, mol.resid[s], mol.resid[e])) mol.segid[s:e+1] = newsegid # Fixup chain if field in ['chain', 'both']: newchainid = available_chains[i] logger.info('Set chain {} between resid {} and {}.'.format(newchainid, mol.resid[s], mol.resid[e])) mol.chain[s:e+1] = newchainid i += 1 return mol
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = np.empty((0, 2), dtype=np.uint32) if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyProteinCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Loading user parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info("Path {:s} not found, assuming a standard AmberTools file.". format(p)) f.write('\n') # Printing out topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Detect disulfide bonds if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if len(disulfide) != 0: for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove('name HG and segid {} and resid {}'.format(d.segid1, d.resid1), _logger=False) mol.remove('name HG and segid {} and resid {}'.format(d.segid2, d.resid2), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.info('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Printing out patches for the disulfide bridges if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int(np.unique(uqseqid[mol.atomselect('segid {} and resid {}'.format(d.segid2, d.resid2))])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = path.abspath(path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff if path.isfile(path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join(outdir, 'structure.crd')) != 0 and path.getsize(path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.prmtop')) molbuilt.read(path.join(outdir, 'structure.crd')) else: raise NameError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) molbuilt.write(path.join(outdir, 'structure.pdb')) return molbuilt
def ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation, dfrom=5, dbetween=5, segname=None): newmol = mol.copy() logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A') logger.info('Min distance between ions: ' + str(dbetween) + 'A') logger.info('Placing ' + str(nanion+ncation) + ' ions.') if (nanion + ncation) == 0: return newmol segname = _getSegname(newmol, segname) nions = nanion + ncation betabackup = newmol.beta newmol.set('beta', sequenceID((newmol.resid, newmol.segid))) # Find water oxygens to replace with ions ntries = 0 maxtries = 10 while True: ionlist = np.empty(0, dtype=int) watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True) watsize = len(watindex) if watsize == 0: raise NameError('No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.') while len(ionlist) < nions: if len(watindex) == 0: break randwat = np.random.randint(len(watindex)) thision = watindex[randwat] addit = True if len(ionlist) != 0: # Check for distance from precious ions ionspos = newmol.get('coords', sel=ionlist) thispos = newmol.get('coords', sel=thision) dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean') if np.any(dists < dbetween): addit = False if addit: ionlist = np.append(ionlist, thision) watindex = np.delete(watindex, randwat) if len(ionlist) == nions: break ntries += 1 if ntries == maxtries: raise NameError('Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ''from'' and ''between'' parameters, decreasing ion concentration or making a larger water box.') # Delete waters but keep their coordinates waterpos = np.atleast_2d(newmol.get('coords', ionlist)) stringsel = 'beta' for x in newmol.beta[ionlist]: stringsel += ' ' + str(int(x)) atmrem = np.sum(newmol.atomselect(stringsel)) atmput = 3 * len(ionlist) # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput) sel = newmol.atomselect(stringsel, indexes=True) newmol.remove(sel, _logger=False) # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput) betabackup = np.delete(betabackup, sel) # Add the ions randidx = np.random.permutation(np.size(waterpos, 0)) atom = Molecule() atom.empty(1) atom.set('chain', 'I') atom.set('segid', 'I') for i in range(nanion): atom.set('name', anionatom) atom.set('resname', anion) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i], :] newmol.insert(atom, len(newmol.name)) for i in range(ncation): atom.set('name', cationatom) atom.set('resname', cation) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i+nanion], :] newmol.insert(atom, len(newmol.name)) # Restoring the original betas sel = np.ones(len(betabackup) + nions, dtype=bool) sel[len(betabackup)::] = False newmol.set('beta', betabackup, sel=sel) return newmol
def autoSegment(mol, sel='all', basename='P', spatial=True, spatialgap=4): """ Detects resid gaps in a selection and assigns incrementing segid to each fragment !!!WARNING!!! If you want to use atom selections like 'protein' or 'fragment', use this function on a Molecule containing only protein atoms, otherwise the protein selection can fail. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object sel : str Atom selection on which to check for gaps. basename : str The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ... spatial : bool Only considers a discontinuity in resid as a gap of the CA atoms have distance more than `spatialgap` Angstrom spatialgap : float The size of a spatial gap which validates a discontinuity Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with modified segids Example ------- >>> newmol = autoSegment(mol,'chain B','P') """ mol = mol.copy() idx = mol.atomselect(sel, indexes=True) rid = mol.get( 'resid', sel ) # TODO:maybe easier without sel, rid = mol.get('resid') and then no need of idx idxdiff = np.diff(idx) residiff = np.diff(rid) gappos = np.where((residiff != 1) & (residiff != 0))[ 0] # Points to the index before the gap! idxstartseg = [idx[0]] + idx[gappos + 1].tolist() idxendseg = idx[gappos].tolist() + [idx[-1]] mol.set('segid', basename, sel) if len(gappos) == 0: return mol if spatial: residbackup = mol.get('resid') mol.set( 'resid', sequenceID(mol.resid) ) # Assigning unique resids to be able to do the distance selection todelete = [] i = 0 for s, e in zip(idxstartseg[1:], idxendseg[:-1]): coords = mol.get('coords', sel='resid "{}" "{}" and name CA'.format( mol.resid[e], mol.resid[s])) if np.shape(coords) == (2, 3): dist = np.sqrt(np.sum((coords[0, :] - coords[1, :])**2)) if dist < spatialgap: todelete.append(i) i += 1 # Join the non-real gaps into segments idxstartseg = np.delete(idxstartseg, np.array(todelete) + 1) idxendseg = np.delete(idxendseg, todelete) mol.set('resid', residbackup) # Restoring the original resids i = 0 for s, e in zip(idxstartseg, idxendseg): newsegid = basename + str(i) if np.any(mol.segid == newsegid): raise RuntimeError( 'Segid {} already exists in the molecule. Please choose different prefix.' .format(newsegid)) logger.info('Created segment {} between resid {} and {}.'.format( newsegid, mol.resid[s], mol.resid[e])) mol.segid[s:e + 1] = newsegid i += 1 return mol
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Default: ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] topo : list of str A list of topology `prepi` files. param : list of str A list of parameter `frcmod` files. prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory caps : dict A dictionary with keys segids and values lists of strings describing the caps of that segment. e.g. caps['P'] = ['ACE', 'NME']. Default: will apply ACE and NME caps to proteins and no caps to the rest. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : np.ndarray If None it will guess disulfide bonds. Otherwise provide a 2D array where each row is a pair of atom indexes that makes a disulfide bond tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> ffs = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] >>> molbuilt = amber.build(mol, ff=ffs, outdir='/tmp/build', saltconc=0.15) """ # Remove pdb bonds! mol = mol.copy() mol.bonds = np.empty((0, 2), dtype=np.uint32) if shutil.which(tleap) is None: raise NameError('Could not find executable: `' + tleap + '` in the PATH. Cannot build for AMBER.') if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = ['leaprc.lipid14', 'leaprc.ff14SB', 'leaprc.gaff'] if topo is None: topo = [] if param is None: param = [] if caps is None: caps = _defaultCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) logger.info('Converting CHARMM membranes to AMBER.') mol = _charmmLipid2Amber(mol) #_checkProteinGaps(mol) _applyCaps(mol, caps) f = open(path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Loading TIP3P water parameters f.write('# Loading ions and TIP3P water parameters\n') f.write('loadamberparams frcmod.ionsjc_tip3p\n\n') # Loading user parameters f.write('# Loading parameter files\n') for p in param: shutil.copy(p, outdir) f.write('loadamberparams ' + path.basename(p) + '\n') f.write('\n') # Printing out topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + path.basename(t) + '\n') f.write('\n') # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.info('Writing PDB file for input to tleap.') pdbname = path.join(outdir, 'input.pdb') mol.write(pdbname) if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') # Printing out patches for the disulfide bridges if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) if not ionize and len( disulfide) != 0: # Only make disu bonds after ionizing! f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] - 1 uqres1 = int( np.unique(uqseqid[mol.atomselect( 'segid {} and resid {}'.format(d.segid1, d.resid1))])) uqres2 = int( np.unique(uqseqid[mol.atomselect( 'segid {} and resid {}'.format(d.segid2, d.resid2))])) # Rename the CYS to CYX if there is a disulfide bond mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid1, d.resid1)) mol.set('resname', 'CYX', sel='segid {} and resid {}'.format(d.segid2, d.resid2)) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs htmdamberdir = path.join(home(), 'builder', 'amberfiles') sourcepaths = [htmdamberdir] sourcepaths += [path.join(htmdamberdir, path.dirname(f)) for f in ff] extrasource = '' for p in sourcepaths: extrasource += '-I {} '.format(p) logpath = os.path.abspath('{}/log.txt'.format(outdir)) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: call([tleap, extrasource, '-f', './tleap.in'], stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if path.getsize(path.join( outdir, 'structure.crd')) != 0 and path.getsize( path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(path.join(outdir, 'structure.prmtop')) molbuilt.read(path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(path.join(outdir, 'structure.crd'), path.join(outdir, 'structure.noions.crd')) shutil.move(path.join(outdir, 'structure.prmtop'), path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap) molbuilt.write(path.join(outdir, 'structure.pdb')) return molbuilt
def PDBread(filename, mode='pdb', frame=None, topoloc=None): from pandas import read_fwf import io tempfile = False if not os.path.isfile(filename) and len(filename) == 4: # Could be a PDB id. Try to load it from the PDB website filename, tempfile = _getPDB(filename) """ COLUMNS DATA TYPE FIELD DEFINITION ------------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer serial Atom serial number. 13 - 16 Atom name Atom name. 17 Character altLoc Alternate location indicator. 18 - 20 Residue name resName Residue name. 22 Character chainID Chain identifier. 23 - 26 Integer resSeq Residue sequence number. 27 AChar iCode Code for insertion of residues. 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) occupancy Occupancy. 61 - 66 Real(6.2) tempFactor Temperature factor. 77 - 78 LString(2) element Element symbol, right-justified. 79 - 80 LString(2) charge Charge on the atom. """ if mode == 'pdb': topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27), (54, 60), (60, 66), (72, 76), (76, 78), (78, 80)] toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion', 'occupancy', 'beta', 'segid', 'element', 'charge') elif mode == 'pdbqt': # http://autodock.scripps.edu/faqs-help/faq/what-is-the-format-of-a-pdbqt-file # The rigid root contains one or more PDBQT-style ATOM or HETATM records. These records resemble their # traditional PDB counterparts, but diverge in columns 71-79 inclusive (where the first character in the line # corresponds to column 1). The partial charge is stored in columns 71-76 inclusive (in %6.3f format, i.e. # right-justified, 6 characters wide, with 3 decimal places). The AutoDock atom-type is stored in columns 78-79 # inclusive (in %-2.2s format, i.e. left-justified and 2 characters wide.. topocolspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 21), (21, 22), (22, 26), (26, 27), (54, 60), (60, 66), (70, 76), (77, 79)] toponames = ('record', 'serial', 'name', 'altloc', 'resname', 'chain', 'resid', 'insertion', 'occupancy', 'beta', 'charge', 'element') topodtypes = { 'record': str, 'serial': np.int, 'name': str, 'altloc': str, 'resname': str, 'chain': str, 'resid': np.int, 'insertion': str, 'occupancy': np.float32, 'beta': np.float32, 'segid': str, 'element': str, 'charge': np.float32, 'chargesign': str, } coordcolspecs = [(30, 38), (38, 46), (46, 54)] coordnames = ('x', 'y', 'z') """ COLUMNS DATA TYPE FIELD DEFINITION ------------------------------------------------------------------------- 1 - 6 Record name "CONECT" 7 - 11 Integer serial Atom serial number 12 - 16 Integer serial Serial number of bonded atom 17 - 21 Integer serial Serial number of bonded atom 22 - 26 Integer serial Serial number of bonded atom 27 - 31 Integer serial Serial number of bonded atom """ bondcolspecs = [(6, 11), (11, 16), (16, 21), (21, 26), (26, 31)] bondnames = ('serial1', 'serial2', 'serial3', 'serial4', 'serial5') """ COLUMNS DATA TYPE FIELD DEFINITION ------------------------------------------------------------- 1 - 6 Record name "CRYST1" 7 - 15 Real(9.3) a a (Angstroms). 16 - 24 Real(9.3) b b (Angstroms). 25 - 33 Real(9.3) c c (Angstroms). 34 - 40 Real(7.2) alpha alpha (degrees). 41 - 47 Real(7.2) beta beta (degrees). 48 - 54 Real(7.2) gamma gamma (degrees). 56 - 66 LString sGroup Space group. 67 - 70 Integer z Z value. """ cryst1colspecs = [(6, 15), (15, 24), (24, 33), (33, 40), (40, 47), (47, 54), (55, 66), (66, 70)] cryst1names = ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 'sGroup', 'z') """ Guessing columns for REMARK 290 SMTRY from the example since the specs don't define them 1 2 3 4 5 6 7 01234567890123456789012345678901234567890123456789012345678901234567890 REMARK 290 SMTRY1 1 1.000000 0.000000 0.000000 0.00000 REMARK 290 SMTRY2 1 0.000000 1.000000 0.000000 0.00000 REMARK 290 SMTRY3 1 0.000000 0.000000 1.000000 0.00000 REMARK 290 SMTRY1 2 -1.000000 0.000000 0.000000 36.30027 REMARK 290 SMTRY2 2 0.000000 -1.000000 0.000000 0.00000 REMARK 290 SMTRY3 2 0.000000 0.000000 1.000000 59.50256 REMARK 290 SMTRY1 3 -1.000000 0.000000 0.000000 0.00000 REMARK 290 SMTRY2 3 0.000000 1.000000 0.000000 46.45545 REMARK 290 SMTRY3 3 0.000000 0.000000 -1.000000 59.50256 REMARK 290 SMTRY1 4 1.000000 0.000000 0.000000 36.30027 REMARK 290 SMTRY2 4 0.000000 -1.000000 0.000000 46.45545 REMARK 290 SMTRY3 4 0.000000 0.000000 -1.000000 0.00000 Guessing columns for REMARK 350 BIOMT from the example since the specs don't define them REMARK 350 BIOMT1 1 -0.981559 0.191159 0.000000 0.00000 REMARK 350 BIOMT2 1 -0.191159 -0.981559 0.000000 0.00000 REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 -34.13878 REMARK 350 BIOMT1 2 -0.838088 0.545535 0.000000 0.00000 REMARK 350 BIOMT2 2 -0.545535 -0.838088 0.000000 0.00000 REMARK 350 BIOMT3 2 0.000000 0.000000 1.000000 -32.71633 """ symmetrycolspecs = [(20, 23), (23, 33), (33, 43), (43, 53), (53, 68)] symmetrynames = ('idx', 'rot1', 'rot2', 'rot3', 'trans') def concatCoords(coords, coorddata): if coorddata.tell() != 0: # Not empty coorddata.seek(0) parsedcoor = read_fwf(coorddata, colspecs=coordcolspecs, names=coordnames, na_values=_NA_VALUES, keep_default_na=False) if coords is None: coords = np.zeros((len(parsedcoor), 3, 0), dtype=np.float32) currcoords = np.vstack((parsedcoor.x, parsedcoor.y, parsedcoor.z)).T if coords.shape[0] != currcoords.shape[0]: logger.warning('Different number of atoms read in different MODELs in the PDB file. ' 'Keeping only the first {} model(s)'.format(coords.shape[2])) return coords coords = np.append(coords, currcoords[:, :, np.newaxis], axis=2) return coords teridx = [] currter = 0 topoend = False cryst1data = io.StringIO() topodata = io.StringIO() conectdata = io.StringIO() coorddata = io.StringIO() symmetrydata = io.StringIO() coords = None with open(filename, 'r') as f: for line in f: if line.startswith('CRYST1'): cryst1data.write(line) if line.startswith('ATOM') or line.startswith('HETATM'): coorddata.write(line) if (line.startswith('ATOM') or line.startswith('HETATM')) and not topoend: topodata.write(line) teridx.append(str(currter)) if line.startswith('TER'): currter += 1 if (mode == 'pdb' and line.startswith('END')) or \ (mode == 'pdbqt' and line.startswith('ENDMDL')): # pdbqt should not stop reading at ENDROOT or ENDBRANCH topoend = True if line.startswith('CONECT'): conectdata.write(line) if line.startswith('MODEL'): coords = concatCoords(coords, coorddata) coorddata = io.StringIO() if line.startswith('REMARK 290 SMTRY'): # TODO: Support BIOMT fields. It's a bit more complicated. Can't be done with pandas symmetrydata.write(line) cryst1data.seek(0) topodata.seek(0) conectdata.seek(0) symmetrydata.seek(0) coords = concatCoords(coords, coorddata) parsedbonds = read_fwf(conectdata, colspecs=bondcolspecs, names=bondnames, na_values=_NA_VALUES, keep_default_na=False) parsedcryst1 = read_fwf(cryst1data, colspecs=cryst1colspecs, names=cryst1names, na_values=_NA_VALUES, keep_default_na=False) parsedtopo = read_fwf(topodata, colspecs=topocolspecs, names=toponames, na_values=_NA_VALUES, keep_default_na=False) #, dtype=topodtypes) parsedsymmetry = read_fwf(symmetrydata, colspecs=symmetrycolspecs, names=symmetrynames, na_values=_NA_VALUES, keep_default_na=False) # if 'chargesign' in parsedtopo and not np.all(parsedtopo.chargesign.isnull()): # parsedtopo.loc[parsedtopo.chargesign == '-', 'charge'] *= -1 # Fixing PDB format charges which can come after the number if parsedtopo.charge.dtype == 'object': minuses = np.where(parsedtopo.charge.str.match('\d\-') == True)[0] pluses = np.where(parsedtopo.charge.str.match('\d\+') == True)[0] for m in minuses: parsedtopo.loc[m, 'charge'] = int(parsedtopo.charge[m][0]) * -1 for p in pluses: parsedtopo.loc[p, 'charge'] = int(parsedtopo.charge[p][0]) parsedtopo.loc[parsedtopo.charge.isnull(), 'charge'] = 0 # Fixing hexadecimal index and resids # Support for reading hexadecimal if parsedtopo.serial.dtype == 'object': logger.warning('Non-integer values were read from the PDB "serial" field. Dropping PDB values and assigning new ones.') parsedtopo.serial = sequenceID(parsedtopo.serial) if parsedtopo.resid.dtype == 'object': logger.warning('Non-integer values were read from the PDB "resid" field. Dropping PDB values and assigning new ones.') parsedtopo.resid = sequenceID(parsedtopo.resid) if parsedtopo.insertion.dtype == np.float64 and not np.all(np.isnan(parsedtopo.insertion)): # Trying to minimize damage from resids overflowing into insertion field logger.warning('Integer values detected in "insertion" field. Your resids might be overflowing. Take care') parsedtopo.insertion = [str(int(x)) if not np.isnan(x) else '' for x in parsedtopo.insertion] if len(parsedtopo) > 99999: logger.warning('Reading PDB file with more than 99999 atoms. Bond information can be wrong.') crystalinfo = {} if len(parsedcryst1): crystalinfo = parsedcryst1.iloc[0].to_dict() if isinstance(crystalinfo['sGroup'], str) or not np.isnan(crystalinfo['sGroup']): crystalinfo['sGroup'] = crystalinfo['sGroup'].split() if len(parsedsymmetry): numcopies = int(len(parsedsymmetry)/3) crystalinfo['numcopies'] = numcopies crystalinfo['rotations'] = parsedsymmetry[['rot1', 'rot2', 'rot3']].as_matrix().reshape((numcopies, 3, 3)) crystalinfo['translations'] = parsedsymmetry['trans'].as_matrix().reshape((numcopies, 3)) topo = Topology(parsedtopo) # Bond formatting part # TODO: Speed this up. This is the slowest part for large PDB files. From 700ms to 7s serials = parsedtopo.serial.as_matrix() # if isinstance(serials[0], str) and np.any(serials == '*****'): # logger.info('Non-integer serials were read. For safety we will discard all bond information and serials will be assigned automatically.') # topo.serial = np.arange(1, len(serials)+1, dtype=np.int) if np.max(parsedbonds.max()) > np.max(serials): logger.info('Bond indexes in PDB file exceed atom indexes. For safety we will discard all bond information.') else: mapserials = np.empty(np.max(serials)+1) mapserials[:] = np.NAN mapserials[serials] = list(range(len(serials))) for i in range(len(parsedbonds)): row = parsedbonds.loc[i].tolist() for b in range(1, 5): if not np.isnan(row[b]): topo.bonds.append([int(row[0]), int(row[b])]) topo.bonds = np.array(topo.bonds, dtype=np.uint32) if topo.bonds.size != 0: mappedbonds = mapserials[topo.bonds[:]] wrongidx, _ = np.where(np.isnan(mappedbonds)) # Some PDBs have bonds to non-existing serials... go figure if len(wrongidx): logger.info('Discarding {} bonds to non-existing indexes in the PDB file.'.format(len(wrongidx))) mappedbonds = np.delete(mappedbonds, wrongidx, axis=0) topo.bonds = np.array(mappedbonds, dtype=np.uint32) if len(topo.segid) == 0 and currter != 0: # If no segid was read, use the TER rows to define segments topo.segid = teridx if tempfile: os.unlink(filename) topo.crystalinfo = crystalinfo traj = Trajectory(coords=coords) return topo, traj
def _charmmLipid2Amber(mol): """ Convert a CHARMM lipid membrane to AMBER format Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the membrane Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with the membrane converted to AMBER """ resdict = _readcsvdict( os.path.join(home(), 'builder', 'charmmlipid2amber.csv')) natoms = mol.numAtoms neworder = np.array( list(range(natoms) )) # After renaming the atoms and residues I have to reorder them begs = np.zeros(natoms, dtype=bool) fins = np.zeros(natoms, dtype=bool) begters = np.zeros(natoms, dtype=bool) finters = np.zeros(natoms, dtype=bool) # Iterate over the translation dictionary mol = mol.copy() incrresids = sequenceID((mol.resid, mol.insertion, mol.segid)) for res in resdict.keys(): molresidx = mol.resname == res if not np.any(molresidx): continue names = mol.name.copy( ) # Need to make a copy or I accidentally double-modify atoms atommap = resdict[res] for atom in atommap.keys(): rule = atommap[atom] molatomidx = np.zeros(len(names), dtype=bool) molatomidx[molresidx] = names[molresidx] == atom mol.set('resname', rule.replaceresname, sel=molatomidx) mol.set('name', rule.replaceatom, sel=molatomidx) neworder[molatomidx] = rule.order if rule.order == 0: # First atom (with or without ters) begs[molatomidx] = True if rule.order == rule.natoms - 1: # Last atom (with or without ters) fins[molatomidx] = True if rule.order == 0 and rule.ter: # First atom with ter begters[molatomidx] = True if rule.order == rule.natoms - 1 and rule.ter: # Last atom with ter finters[molatomidx] = True uqresids = np.unique(incrresids[begs]) residuebegs = np.ones(len(uqresids), dtype=int) * -1 residuefins = np.ones(len(uqresids), dtype=int) * -1 for i in range(len(uqresids)): residuebegs[i] = np.where(incrresids == uqresids[i])[0][0] residuefins[i] = np.where(incrresids == uqresids[i])[0][-1] for i in range(len(residuebegs)): beg = residuebegs[i] fin = residuefins[i] + 1 neworder[beg:fin] = neworder[beg:fin] + beg idx = np.argsort(neworder) _reorderMol(mol, idx) begters = np.where(begters[idx])[0] # Sort the begs and ters finters = np.where(finters[idx])[0] #if len(begters) > 999: # raise NameError('More than 999 lipids. Cannot define separate segments for all of them.') for i in range(len(begters)): map = np.zeros(len(mol.resid), dtype=bool) map[begters[i]:finters[i] + 1] = True mol.set('resid', sequenceID(mol.get('resname', sel=map)), sel=map) mol.set('segid', 'L{}'.format(i % 2), sel=map) return mol
def ionizePlace(mol, anion_resname, cation_resname, anion_name, cation_name, nanion, ncation, dfrom=5, dbetween=5, segname=None): """Place a given number of negative and positive ions in the solvent. Replaces water molecules al long as they respect the given distance criteria. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object anion_resname : str Resname of the added anions cation_resname : str Resname of the added cations anion_name : str Name of the added anions cation_name : str Name of the added cations nanion : int Number of anions to add ncation : int Number of cations to add dfrom : float Min distance of ions from molecule dbetween : float Min distance between ions segname : str Segment name to add Returns ------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The molecule with the ions added """ newmol = mol.copy() logger.info('Min distance of ions from molecule: ' + str(dfrom) + 'A') logger.info('Min distance between ions: ' + str(dbetween) + 'A') logger.info('Placing {:d} anions and {:d} cations.'.format( nanion, ncation)) if (nanion + ncation) == 0: return newmol segname = _getSegname(newmol, segname) nions = nanion + ncation betabackup = newmol.beta newmol.set('beta', sequenceID((newmol.resid, newmol.segid))) # Find water oxygens to replace with ions ntries = 0 maxtries = 10 while True: ionlist = [] watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True) watsize = len(watindex) if watsize == 0: raise NameError( 'No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.' ) while len(ionlist) < nions: if len(watindex) == 0: break randwat = np.random.randint(len(watindex)) thision = watindex[randwat] addit = True if len(ionlist) != 0: # Check for distance from precious ions ionspos = newmol.get('coords', sel=ionlist) thispos = newmol.get('coords', sel=thision) dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean') if np.any(dists < dbetween): addit = False if addit: ionlist.append(thision) watindex = np.delete(watindex, randwat) if len(ionlist) == nions: break ntries += 1 if ntries == maxtries: raise NameError( 'Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ' 'from' ' and ' 'between' ' parameters, decreasing ion concentration or making a larger water box.' ) # Delete waters but keep their coordinates waterpos = np.atleast_2d(newmol.get('coords', ionlist)) betasel = np.zeros(newmol.numAtoms, dtype=bool) for b in newmol.beta[ionlist]: betasel |= newmol.beta == b atmrem = np.sum(betasel) atmput = 3 * len(ionlist) # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput) sel = np.where(betasel)[0] newmol.remove(sel, _logger=False) # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput) betabackup = np.delete(betabackup, sel) # Add the ions randidx = np.random.permutation(np.size(waterpos, 0)) atom = Molecule() atom.empty(1) atom.set('chain', 'I') atom.set('segid', 'I') for i in range(nanion): atom.set('name', anion_name) atom.set('resname', anion_resname) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i], :] newmol.insert(atom, len(newmol.name)) for i in range(ncation): atom.set('name', cation_name) atom.set('resname', cation_resname) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i + nanion], :] newmol.insert(atom, len(newmol.name)) # Restoring the original betas sel = np.ones(len(betabackup) + nions, dtype=bool) sel[len(betabackup)::] = False newmol.set('beta', betabackup, sel=sel) return newmol
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'} f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.') f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError('Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True, atomtypes=None, offlibraries=None): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP ... >>> # More complex example >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: if not isinstance(offlibraries, list) and not isinstance( offlibraries, tuple): offlibraries = [ offlibraries, ] for off in offlibraries: f.write('loadoff {}\n\n'.format(off)) # Loading frcmod parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + os.path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info( "File {:s} not found, assuming its present on the standard Amber location" .format(p)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + os.path.basename(t) + '\n') f.write('\n') # Detect disulfide bridges if not defined by user if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if not ionize and len(disulfide) != 0: for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1) atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove(atoms1 & (mol.name == 'HG'), _logger=False) mol.remove(atoms2 & (mol.name == 'HG'), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int( np.unique(uqseqid[(mol.segid == d.segid1) & (mol.resid == d.resid1)])) uqres2 = int( np.unique(uqseqid[(mol.segid == d.segid2) & (mol.resid == d.resid2)])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def _charmmLipid2Amber(mol): """ Convert a CHARMM lipid membrane to AMBER format Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the membrane Returns ------- newmol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A new Molecule object with the membrane converted to AMBER """ resdict = _readcsvdict(path.join(home(), 'builder', 'charmmlipid2amber.csv')) natoms = mol.numAtoms neworder = np.array(list(range(natoms))) # After renaming the atoms and residues I have to reorder them begs = np.zeros(natoms, dtype=bool) fins = np.zeros(natoms, dtype=bool) begters = np.zeros(natoms, dtype=bool) finters = np.zeros(natoms, dtype=bool) betabackup = mol.beta.copy() mol = mol.copy() mol.set('beta', sequenceID(mol.resid)) for res in resdict.keys(): molresidx = mol.resname == res if not np.any(molresidx): continue names = mol.name.copy() # Need to make a copy or I accidentally double-modify atoms atommap = resdict[res] for atom in atommap.keys(): rule = atommap[atom] molatomidx = np.zeros(len(names), dtype=bool) molatomidx[molresidx] = names[molresidx] == atom mol.set('resname', rule.replaceresname, sel=molatomidx) mol.set('name', rule.replaceatom, sel=molatomidx) neworder[molatomidx] = rule.order if rule.order == 0: # First atom (with or without ters) begs[molatomidx] = True if rule.order == rule.natoms - 1: # Last atom (with or without ters) fins[molatomidx] = True if rule.order == 0 and rule.ter: # First atom with ter begters[molatomidx] = True if rule.order == rule.natoms - 1 and rule.ter: # Last atom with ter finters[molatomidx] = True betas = np.unique(mol.beta[begs]) residuebegs = np.ones(len(betas), dtype=int) * -1 residuefins = np.ones(len(betas), dtype=int) * -1 for i in range(len(betas)): residuebegs[i] = np.where(mol.beta == betas[i])[0][0] residuefins[i] = np.where(mol.beta == betas[i])[0][-1] for i in range(len(residuebegs)): beg = residuebegs[i] fin = residuefins[i] + 1 neworder[beg:fin] = neworder[beg:fin] + beg idx = np.argsort(neworder) mol.beta = betabackup _reorderMol(mol, idx) begters = np.where(begters[idx])[0] # Sort the begs and ters finters = np.where(finters[idx])[0] if len(begters) > 999: raise NameError('More than 999 lipids. Cannot define separate segments for all of them.') for i in range(len(begters)): map = np.zeros(len(mol.resid), dtype=bool) map[begters[i]:finters[i]+1] = True mol.set('resid', sequenceID(mol.get('resname', sel=map)), sel=map) mol.set('segid', 'L' + str(i+1), sel=map) return mol
def ionizePlace(mol, anion_resname, cation_resname, anion_name, cation_name, nanion, ncation, dfrom=5, dbetween=5, segname=None): """Place a given number of negative and positive ions in the solvent. Replaces water molecules al long as they respect the given distance criteria. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object anion_resname : str Resname of the added anions cation_resname : str Resname of the added cations anion_name : str Name of the added anions cation_name : str Name of the added cations nanion : int Number of anions to add ncation : int Number of cations to add dfrom : float Min distance of ions from molecule dbetween : float Min distance between ions segname : str Segment name to add Returns ------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The molecule with the ions added """ newmol = mol.copy() logger.debug('Min distance of ions from molecule: ' + str(dfrom) + 'A') logger.debug('Min distance between ions: ' + str(dbetween) + 'A') logger.debug('Placing {:d} anions and {:d} cations.'.format(nanion,ncation)) if (nanion + ncation) == 0: return newmol nions = nanion + ncation betabackup = newmol.beta.copy() newmol.set('beta', sequenceID((newmol.resid, newmol.insertion, newmol.segid))) # Find water oxygens to replace with ions ntries = 0 maxtries = 10 while True: ionlist = [] watindex = newmol.atomselect('noh and water and not (within ' + str(dfrom) + ' of not water)', indexes=True) watsize = len(watindex) if watsize == 0: raise NameError('No waters could be found further than ' + str(dfrom) + ' from other molecules to be replaced by ions. You might need to solvate with a bigger box or disable the ionize property when building.') while len(ionlist) < nions: if len(watindex) == 0: break randwat = np.random.randint(len(watindex)) thision = watindex[randwat] addit = True if len(ionlist) != 0: # Check for distance from precious ions ionspos = newmol.get('coords', sel=ionlist) thispos = newmol.get('coords', sel=thision) dists = distance.cdist(np.atleast_2d(ionspos), np.atleast_2d(thispos), metric='euclidean') if np.any(dists < dbetween): addit = False if addit: ionlist.append(thision) watindex = np.delete(watindex, randwat) if len(ionlist) == nions: break ntries += 1 if ntries == maxtries: raise NameError('Failed to add ions after ' + str(maxtries) + ' attempts. Try decreasing the ''from'' and ''between'' parameters, decreasing ion concentration or making a larger water box.') # Delete waters but keep their coordinates waterpos = np.atleast_2d(newmol.get('coords', ionlist)) betasel = np.zeros(newmol.numAtoms, dtype=bool) for b in newmol.beta[ionlist]: betasel |= newmol.beta == b atmrem = np.sum(betasel) atmput = 3 * len(ionlist) # assert atmrem == atmput, 'Removing {} atoms instead of {}. Report this bug.'.format(atmrem, atmput) sel = np.where(betasel)[0] newmol.remove(sel, _logger=False) # assert np.size(sel) == atmput, 'Removed {} atoms instead of {}. Report this bug.'.format(np.size(sel), atmput) betabackup = np.delete(betabackup, sel) # Add the ions randidx = np.random.permutation(np.size(waterpos, 0)) atom = Molecule() atom.empty(1) atom.set('chain', 'I') atom.set('segid', 'I') for i in range(nanion): atom.set('name', anion_name) atom.set('resname', anion_resname) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i], :] newmol.insert(atom, len(newmol.name)) for i in range(ncation): atom.set('name', cation_name) atom.set('resname', cation_resname) atom.set('resid', newmol.resid[-1] + 1) atom.coords = waterpos[randidx[i+nanion], :] newmol.insert(atom, len(newmol.name)) # Restoring the original betas newmol.beta[:len(betabackup)] = betabackup return newmol