def test_tmscore(self): from moleculekit.molecule import Molecule expectedTMscore = np.array([ 0.21418524, 0.2367377, 0.23433833, 0.21362964, 0.20935164, 0.20279461, 0.27012895, 0.22675238, 0.21230793, 0.2372011 ]) expectedRMSD = np.array([ 3.70322128, 3.43637027, 3.188193, 3.84455877, 3.53053882, 3.46781854, 2.93777629, 2.97978692, 2.70792428, 2.63051318 ]) mol = Molecule(os.path.join(home(dataDir='tmscore'), 'filtered.pdb')) mol.read(os.path.join(home(dataDir='tmscore'), 'traj.xtc')) ref = Molecule(os.path.join(home(dataDir='tmscore'), 'ntl9_2hbb.pdb')) tmscore, rmsd = molTMscore(mol, ref, mol.atomselect('protein'), ref.atomselect('protein')) self.assertTrue(np.allclose(tmscore, expectedTMscore)) self.assertTrue(np.allclose(rmsd, expectedRMSD))
def setUpClass(self): from moleculekit.home import home from moleculekit.molecule import Molecule from moleculekit.tools.preparation import proteinPrepare from moleculekit.tools.autosegment import autoSegment self.testf = os.path.join(home(), 'test-data', 'test-voxeldescriptors') mol = Molecule(os.path.join(self.testf, '3PTB.pdb')) mol.filter('protein') mol = autoSegment(mol, field='both') mol = proteinPrepare(mol) mol.bonds = mol._guessBonds() self.mol = mol
def test_metricshell_simple(self): from moleculekit.molecule import Molecule from moleculekit.home import home from os import path import numpy as np mol = Molecule().empty(3) mol.name[:] = "CL" mol.resname[:] = "CL" mol.resid[:] = np.arange(3) mol.coords = np.zeros((3, 3, 1), dtype=np.float32) mol.coords[0, :, 0] = [0, 0, 0] mol.coords[1, :, 0] = [0.5, 0, 0] mol.coords[2, :, 0] = [0, 1.5, 0] metr = MetricShell("name CL", "name CL", pbc=False) data = metr.project(mol) # fmt: off refdata = np.array([[ 0.01768388256576615, 0.0, 0.0, 0.0, 0.01768388256576615, 0.0, 0.0, 0.0, 0.01768388256576615, 0.0, 0.0, 0.0 ]]) # fmt: on assert np.allclose(data, refdata) metr = MetricShell("name CL", "name CL", numshells=2, shellwidth=1, pbc=False) data = metr.project(mol) refdata = np.array( [[0.23873241, 0.03410463, 0.23873241, 0.03410463, 0.0, 0.06820926]]) assert np.allclose(data, refdata)
def _writeInputsFunction(i, f, epoch, inputpath, coorname): regex = re.compile('(e\d+s\d+)_') frameNum = f.frame piece = f.piece if f.sim.parent is None: currSim = f.sim else: currSim = f.sim.parent traj = currSim.trajectory[piece] if currSim.input is None: raise NameError( 'Could not find input folder in simulation lists. Cannot create new simulations.' ) wuName = _simName(traj) res = regex.search(wuName) if res: # If we are running on top of adaptive, use the first name part for the next sim name wuName = res.group(1) # create new job directory newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str( piece) + 'f' + str(frameNum) newDir = path.join(inputpath, newName, '') # copy previous input directory including input files copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.coor', '*.rst', '*.out', *_IGNORE_EXTENSIONS)) # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing mol = Molecule( currSim.molfile ) # Always read the mol file, otherwise it does not work if we need to save a PDB as coorname mol.read(traj) mol.dropFrames( keep=frameNum) # Making sure only specific frame to write is kept mol.write(path.join(newDir, coorname))
def test_acemd3(self): from htmd.util import tempname from htmd.home import home from htmd.units import convert from moleculekit.molecule import Molecule from glob import glob import os from htmd.mdengine.acemd.acemd import GroupRestraint eq = Equilibration() eq.runtime = 4 eq.timeunits = "ns" eq.temperature = 300 eq.restraintsteps = convert( eq.timeunits, "timesteps", eq.runtime, timestep=eq.acemd.timestep ) ligres = GroupRestraint( "resname MOL and noh", [42, 38, 1], [(5, 0)], fbcentre=[-0.178, -0.178, 29.195], ) mol = Molecule( home( dataDir=os.path.join( "test-protocols", "build", "protLig", "structure.pdb" ) ) ) eq.restraints = eq.defaultEquilRestraints(1000000, mol=mol) + [ligres] tmpdir = tempname() eq.write( home(dataDir=os.path.join("test-protocols", "build", "protLig")), tmpdir ) # Compare with reference refdir = home( dataDir=os.path.join( "test-protocols", "equilibration", "acemd3", "protLig", "prerun" ) ) files = [os.path.basename(f) for f in glob(os.path.join(refdir, "*"))] self._compareResultFolders(refdir, tmpdir, "protLig")
def _loadMolecules(lipids, files): from moleculekit.util import rotationMatrix # Create Molecules for ll in lipids: randidx = np.random.randint(len(files[ll.resname])) mol = Molecule(files[ll.resname][randidx]) mol.filter("not water", _logger=False) if ll.xyz[2] < 0: mol.rotateBy( rotationMatrix([1, 0, 0], np.deg2rad(180)) ) # Rotate the lower leaflet lipids upside down ll.mol = mol ll.rot = np.random.random() * 360 - 180 # Random starting rotation
def setUpClass(self): from moleculekit.molecule import Molecule from moleculekit.home import home from os import path mol = Molecule( path.join(home(dataDir='test-projections'), 'trajectory', 'filtered.pdb')) mol.read( path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc')) mol.dropFrames(keep=[0, 1 ]) # Keep only two frames because it's super slow self.mol = mol
def write_largest_cluster(g: Graph, inputmolfile: str, outputname: str) -> None: """ This function prints the clusters to the terminal and outputs them into a VMD session :param g: A Graph object :param outputname: The pdb filename. :param outputname: The file name to output the VMD session to. :return: """ mol = Molecule(f"{inputmolfile[:-4]}-chainA.pdb") mol.reps.add(sel='protein', style='NewCartoon', color=8) l = label_largest_component(g) sub = GraphView(g, vfilt=l) resid_cluster = [g.vp.resid[i] for i in sub.vertices()] mol.reps.add('chain A and noh and not backbone and resid %s' % ' '.join(map(str, resid_cluster)), style="VDW", color=1) # red vmdobject = viewer(dispdev='text') vmdobject.loadMol(mol, name=inputmolfile) vmdobject.send("save_state " + outputname) vmdobject.close()
def _createLipids(lipidratio, area, lipiddb, files, leaflet=None): lipiddb = lipiddb.to_dict(orient='index') lipidnames = list(lipidratio.keys()) ratiosAPL = np.array([lipidratio[lipn] * lipiddb[lipn]['APL'] for lipn in lipidnames]) # Calculate the total areas per lipid type areaspl = area * (ratiosAPL / ratiosAPL.sum()) # Calculate the counts from the total areas counts = np.round(areaspl / np.array([lipiddb[lipn]['APL'] for lipn in lipidnames])).astype(int) lipids = [] for i in range(len(lipidnames)): resname = lipidnames[i] rings = _detectRings(Molecule(files[resname][0])) for k in range(counts[i]): if leaflet == 'upper': xyz = np.array([np.nan, np.nan, lipiddb[resname]['Thickness'] / 2]) elif leaflet == 'lower': xyz = np.array([np.nan, np.nan, - lipiddb[resname]['Thickness'] / 2]) lipids.append(_Lipid(resname=resname, headname=lipiddb[resname]['Head'], rings=rings, area=lipiddb[resname]['APL'], xyz=xyz)) return lipids
def __init__(self, obs_config: Config): """ :param obs_config """ super().__init__(obs_config) self.obs_config = obs_config prot = Molecule(self.obs_config.pdb) prot = prepareProteinForAtomtyping(prot, verbose=False) prot_vox, prot_centers, prot_N = getVoxelDescriptors( prot, buffer=0, voxelsize=self.obs_config.voxelsize, boxsize=self.obs_config.boxsize, center=self.obs_config.centers, method=self.obs_config.method, validitychecks=self.obs_config.validity_check) self.prot_vox_t = self.reshape(prot_vox, prot_N)
def setUpClass(self): from moleculekit.molecule import Molecule from moleculekit.home import home import numpy as np from os import path mol = Molecule( path.join(home(dataDir="test-projections"), "trajectory", "filtered.pdb")) mol.read( path.join(home(dataDir="test-projections"), "trajectory", "traj.xtc")) ref = mol.copy() ref.dropFrames(keep=0) mol.dropFrames(keep=np.arange( mol.numFrames - 20, mol.numFrames)) # Keep only last 20 frames self.ref = ref self.mol = mol
def _readOutput(self, directory): result = QMResult() result.completed = True with open(os.path.join(directory, "terachem.out")) as fd: for line in fd.readlines(): if line.startswith("FINAL ENERGY:"): result.energy = float(line.split()[2]) result.energy *= const.physical_constants[ "Hartree energy"][0] / ( const.kilo * const.calorie / const.Avogadro ) # Hartree --> kcal/mol if line.startswith("DIPOLE MOMENT:"): tokens = line.split() result.dipole = [ float(tokens[2][1:-1]), float(tokens[3][:-1]), float(tokens[4][:-1]), float(tokens[7][:-1]), ] mullFile = os.path.join(directory, "scr", "charge_mull.xls") if os.path.exists(mullFile): result.mulliken = list(np.loadtxt(mullFile, usecols=(2, ))) if result.energy is None or result.dipole is None: result.errored = True geomFile = os.path.join(directory, "scr", "optim.xyz" if self.optimize else "xyz.xyz") if os.path.exists(geomFile): result.coords = Molecule(geomFile).coords[:, :, -1][:, :, None] else: result.errored = True if self.esp_points is not None: raise NotImplemented("ESP is not available") return result
def test_tmscore(self): from moleculekit.molecule import Molecule from moleculekit.home import home import numpy as np from os import path mol = Molecule(path.join(home(dataDir='test-projections'), 'trajectory', 'filtered.pdb')) mol.read(path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc')) ref = mol.copy() ref.dropFrames(keep=0) mol.dropFrames(keep=np.arange(mol.numFrames-20, mol.numFrames)) metr = MetricTMscore(ref, 'protein and name CA') data = metr.project(mol) lasttm = np.array([0.9633381, 0.96441294, 0.96553609, 0.96088852, 0.96288511, 0.95677591, 0.96544727, 0.96359811, 0.95658912, 0.96893117, 0.96623924, 0.96064913, 0.96207041, 0.95947848, 0.96657048, 0.95993426, 0.96543296, 0.96806875, 0.96437248, 0.96144066], dtype=np.float32) assert np.all(np.abs(data.flatten() - lasttm) < 0.001), 'TMscore calculation is broken'
def write_clusters(g: Graph, components: PropertyArray, inputmolfile: str, outputname: str) -> None: """ This function prints the clusters to the terminal and outputs them into a VMD session :param g: A Graph object :param outputname: The pdb filename. :param outputname: The file name to output the VMD session to. :return: """ f = open(outputname[:-4] + ".txt", "w") mol = Molecule(f"{inputmolfile[:-4]}-chainA.pdb") mol.reps.add(sel='protein', style='NewCartoon', color=8) for cluster_index in range(max(components) + 1): cluster = [i for i, x in enumerate(components) if x == cluster_index] if len(cluster) < 2: continue vfilt = g.new_vertex_property('bool') for i in cluster: vfilt[i] = True sub = GraphView(g, vfilt) area = np.sum([g.ep.area[edge] for edge in sub.edges()]) f.write(f"Cluster index {cluster_index}:\t" f"Residues {len(cluster)}. Total area {area} A^2.\t " f"Number of contacts: {sub.num_edges()}.\t " f"Contacts / Residue: {sub.num_edges() / len(cluster) }.\t " f"Area / Residue {area / sub.num_edges()}\n" ) # sum reverse and inverse resid_cluster = [g.vp.resid[i] for i in cluster] mol.reps.add('chain A and noh and not backbone and resid %s' % ' '.join(map(str, resid_cluster)), style="VDW", color=cluster_index) vmdobject = viewer(dispdev='text') vmdobject.loadMol(mol, name=inputmolfile) vmdobject.send("save_state " + outputname) vmdobject.close() f.close()
def _init_mol(self, molName, ffTypeMethod, chargetuple): from moleculekit.molecule import Molecule molFile = os.path.join(self.refDir, '{}.mol2'.format(molName)) if chargetuple == 'None': acCharges = None netcharge = None else: acCharges = chargetuple[0] netcharge = chargetuple[1] mol = Molecule(molFile) with TemporaryDirectory() as tmpDir: self.testParameters, self.testMolecule = fftype( mol, method=ffTypeMethod, acCharges=acCharges, netcharge=netcharge, tmpDir=tmpDir) self.testIntermediaryFiles = sorted(os.listdir(tmpDir))
def write_networks(g: Graph, components: PropertyArray, inputmolfile: str, outputname: str) -> None: """ :param g: A graph-tool graph object :param components: the components of the graph :param inputmolfile: the pdb of the input protein :param outputname: a path wehre to write the output :return: Writes a file named outputname + "hh-networks.txt" """ f = open(outputname[:-4] + "hh-networks.txt", "w") mol = Molecule(inputmolfile) mol.reps.add(sel='protein', style='NewCartoon', color=8) f.write(f"Atom index\tAtom index\tresid\tresid\n") for network_index in range(max(components) + 1): f.write(f"Network index {network_index}\n") network = [i for i, x in enumerate(components) if x == network_index] # these are the vertices resid_network = [g.vp.resid[i] for i in network] # these are the resids vfilt = g.new_vertex_property('bool') for i in network: vfilt[i] = True sub = GraphView(g, vfilt) # print for each edge the atoms and resid of the two pairs for edge in sub.edges(): f.write(f"{sub.vp.atom[edge.source()]}\t" f"{sub.vp.atom[edge.target()]}\t" f"{sub.vp.resid[edge.source()]}\t" f"{sub.vp.resid[edge.target()]}\n") mol.reps.add('chain A and noh and resid %s' % ' '.join(map(str, resid_network)), style="Licorice", color=network_index) mol.reps.add('chain A and noh and resid %s' % ' '.join(map(str, resid_network)), style="HBonds", color=network_index) vmdobject = viewer(dispdev='text') vmdobject.loadMol(mol, name=inputmolfile) vmdobject.send("save_state " + outputname) vmdobject.close() f.close()
def test_metricsphericalcoordinate(self): from moleculekit.molecule import Molecule from moleculekit.home import home from os import path mol = Molecule( path.join(home(dataDir='test-projections'), 'trajectory', 'filtered.pdb')) ref = mol.copy() mol.read( path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc')) mol.bonds = mol._guessBonds() res = MetricSphericalCoordinate(ref, 'resname MOL', 'within 8 of resid 98').project(mol) _ = MetricSphericalCoordinate(ref, 'resname MOL', 'within 8 of resid 98').getMapping(mol) ref_array = np.load( path.join(home(dataDir='test-projections'), 'metricsphericalcoordinate', 'res.npy')) assert np.allclose(res, ref_array, rtol=0, atol=1e-04)
def testWithProteinPrepare(self): from moleculekit.tools.preparation import proteinPrepare from htmd.builder.solvate import solvate # Test with proteinPrepare pdbids = ['3PTB'] # pdbids = ['3PTB', '1A25', '1GZM'] # '1U5U' out because it has AR0 (no parameters) for pid in pdbids: np.random.seed(1) mol = Molecule(pid) mol.filter('protein') mol = proteinPrepare(mol) mol.filter( 'protein') # Fix for bad proteinPrepare hydrogen placing smol = solvate(mol) ffs = defaultFf() tmpdir = os.path.join(self.testDir, 'withProtPrep', pid) _ = build(smol, ff=ffs, outdir=tmpdir) refdir = home(dataDir=join('test-amber-build', 'pp', pid)) _TestAmberBuild._compareResultFolders(refdir, tmpdir, pid)
def _filtSim(i, sims, outFolder, filterSel): name = _simName(sims[i].trajectory[0]) directory = path.join(outFolder, name) if not path.exists(directory): makedirs(directory) logger.debug('Processing trajectory ' + name) fmolfile = [path.join(outFolder, 'filtered.psf'), path.join(outFolder, 'filtered.pdb')] (traj, outtraj) = _renameSims(sims[i].trajectory, name, outFolder) if not traj: ftrajectory = _autoDetectTrajectories(path.join(outFolder, name)) numframes = _getNumFrames(sims[i], ftrajectory) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes) try: from moleculekit.molecule import Molecule mol = Molecule(sims[i].molfile) except Exception as e: logger.warning('Error! Skipping simulation ' + name + ' due to error: ' + str(e)) return sel = mol.atomselect(filterSel) for j in range(0, len(traj)): try: mol.read(traj[j]) except IOError as e: logger.warning('{}, skipping trajectory'.format(e)) break mol.write(outtraj[j], sel) ftrajectory = _autoDetectTrajectories(path.join(outFolder, name)) numframes = _getNumFrames(sims[i], ftrajectory) return Sim(simid=sims[i].simid, parent=sims[i], input=None, trajectory=ftrajectory, molfile=fmolfile, numframes=numframes)
def toMolecule(self, formalcharges=False, ids=None): """ Return the moleculekit.molecule.Molecule Parameters ---------- formalcharges: bool If True,the formal charges are used instead of partial ones ids: list The list of conformer ids to store in the moleculekit Molecule object- If None, all are returned Default: None Returns ------- mol: moleculekit.molecule.Molecule The moleculekit Molecule object """ from moleculekit.molecule import Molecule class NoConformerError(Exception): pass if ids is None: ids = np.arange(self.numFrames) if self.numFrames == 0: raise NoConformerError("No Conformers are found in the molecule. Generate at least one conformer.") elif not isinstance(ids, list) and not isinstance(ids, np.ndarray): raise ValueError('The argument ids should be a list of confomer ids') mol = Molecule() mol.empty(self.numAtoms) mol.record[:] = 'HETATM' mol.resname[:] = self.ligname[:3] mol.resid[:] = self._resid mol.coords = self._coords[:, :, ids] mol.name[:] = self._name mol.element[:] = self._element if formalcharges: mol.charge[:] = self._formalcharge else: mol.charge[:] = self._charge mol.box = np.zeros((3, self.numFrames), dtype=np.float32) mol.viewname = self.ligname mol.bonds = self._bonds mol.bondtype = self._bondtype mol.atomtype = self._atomtype return mol
class _TestMetricDistance(unittest.TestCase): @classmethod def setUpClass(self): from moleculekit.molecule import Molecule from os import path self.mol = Molecule( path.join(home(dataDir='test-projections'), 'trajectory', 'filtered.pdb')) self.mol_skipped = self.mol.copy() self.mol.read( path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc')) self.mol_skipped.read(path.join(home(dataDir='test-projections'), 'trajectory', 'traj.xtc'), skip=10) def test_contacts(self): metr = MetricDistance('protein and name CA', 'resname MOL and noh', metric='contacts') data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'contacts.npy')) assert np.allclose(data, refdata, atol=1e-3), 'Contact calculation is broken' def test_distances(self): metr = MetricDistance('protein and name CA', 'resname MOL and noh', metric='distances') data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'distances.npy')) assert np.allclose(data, refdata, atol=1e-3), 'Distance calculation is broken' def test_mindistances(self): metr = MetricDistance('protein and noh', 'resname MOL and noh', groupsel1='residue', groupsel2='all') data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'mindistances.npy')) assert np.allclose(data, refdata, atol=1e-3), 'Minimum distance calculation is broken' def test_mindistances_truncate(self): metr = MetricDistance('protein and noh', 'resname MOL and noh', groupsel1='residue', groupsel2='all', truncate=3) data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'mindistances.npy')) assert np.allclose(data, np.clip(refdata, 0, 3), atol=1e-3), 'Minimum distance calculation is broken' def test_selfmindistance_manual(self): metr = MetricDistance('protein and resid 1 to 50 and noh', 'protein and resid 1 to 50 and noh', groupsel1='residue', groupsel2='residue') data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'selfmindistance.npy')) assert np.allclose(data, refdata, atol=1e-3), 'Manual self-distance is broken' def test_selfmindistance_auto(self): metr = MetricSelfDistance('protein and resid 1 to 50 and noh', groupsel='residue') data = metr.project(self.mol) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'selfmindistance.npy')) assert np.allclose(data, refdata, atol=1e-3), 'Automatic self-distance is broken' def test_mindistances_skip(self): metr = MetricSelfDistance('protein and resid 1 to 50 and noh', groupsel='residue') data = metr.project(self.mol_skipped) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'selfmindistance.npy')) assert np.allclose( data, refdata[::10, :], atol=1e-3), 'Minimum distance calculation with skipping is broken' def test_reconstruct_contact_map(self): from moleculekit.util import tempname from moleculekit.molecule import Molecule import matplotlib matplotlib.use('Agg') mol = Molecule('1yu8') metr = MetricSelfDistance('protein and name CA', metric='contacts', pbc=False) data = metr.project(mol) mapping = metr.getMapping(mol) tmpfile = tempname(suffix='.svg') cm, newmapping, uqAtomGroups = contactVecToMatrix( data, mapping.atomIndexes) reconstructContactMap(data, mapping, outfile=tmpfile) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'contactvectomatrix.npz')) assert np.array_equal(refdata['cm'], cm) assert np.array_equal(refdata['newmapping'], newmapping) assert np.array_equal(refdata['uqAtomGroups'], uqAtomGroups) def test_description(self): metr = MetricDistance('protein and noh', 'resname MOL and noh', truncate=3) data = metr.project(self.mol) atomIndexes = metr.getMapping(self.mol).atomIndexes.values refdata = np.load(os.path.join(home(dataDir='test-projections'), 'metricdistance', 'description.npy'), allow_pickle=True) assert np.array_equal(refdata, atomIndexes)
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5): """ Tile a membrane in the X and Y dimensions to reach a specific size. Parameters ---------- memb : :class:`Molecule <moleculekit.molecule.Molecule>` object The membrane to be tiled xmin : float Minimum x coordinate ymin : float Minimum y coordinate xmax : float Maximum x coordinate ymax : float Maximum y coordinate buffer : float Buffer distance between tiles Returns ------- megamemb : A big membrane Molecule """ from tqdm import tqdm memb = memb.copy() memb.resid = sequenceID( (memb.resid, memb.insertion, memb.chain, memb.segid)) minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten() size = np.max(memb.get('coords', 'water'), axis=0) - np.min( memb.get('coords', 'water'), axis=0) size = size.flatten() xreps = int(np.ceil((xmax - xmin) / size[0])) yreps = int(np.ceil((ymax - ymin) / size[1])) logger.info('Replicating Membrane {}x{}'.format(xreps, yreps)) from moleculekit.molecule import Molecule megamemb = Molecule() bar = tqdm(total=xreps * yreps, desc='Replicating Membrane') k = 0 for x in range(xreps): for y in range(yreps): tmpmemb = memb.copy() xpos = xmin + x * (size[0] + buffer) ypos = ymin + y * (size[1] + buffer) tmpmemb.moveBy( [-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0]) tmpmemb.remove('same resid as (x > {} or y > {})'.format( xmax, ymax), _logger=False) if tmpmemb.numAtoms == 0: continue tmpmemb.set('segid', 'M{}'.format(k), sel='not water') tmpmemb.set('segid', 'MW{}'.format(k), sel='water') megamemb.append(tmpmemb) k += 1 bar.update(1) bar.close() # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles # Some clashes will still occur between periodic images however megamemb.remove('same resid as water and within 1.5 of not water', _logger=False) return megamemb
# Cartesian to polar coordinates. Rho is the rotation angle rho = np.sqrt(x**2 + y**2) phi = np.arctan2(y, x) return rho, phi angle, _ = cart2pol(xa, ya) return angle + np.radians(45) if __name__ == "__main__": from moleculekit.molecule import Molecule from moleculekit.tools.autosegment import autoSegment from htmd.home import home from os import path p = Molecule( path.join(home(), 'data', 'building-protein-membrane', '4dkl.pdb')) p.filter('(chain B and protein) or water') p = autoSegment(p, 'protein', 'P') m = Molecule( path.join(home(), 'data', 'building-protein-membrane', 'membrane.pdb')) a = embed(p, m) print(np.unique(m.get('segid'))) mol = Molecule( path.join(home(), 'data', 'building-protein-membrane', '1ITG_clean.pdb')) ref = Molecule( path.join(home(), 'data', 'building-protein-membrane', '1ITG.pdb')) mol = autoSegment(mol, sel='protein') assert np.all(mol.segid == ref.segid)
def sampleRegion( self, point=None, radius=None, limits=None, nsamples=20, singlemol=False ): """Samples conformations from a region in the projected space. Parameters ---------- point : list or np.ndarray A point in the projected space. Undefined dimensions should have None value. radius : float The radius in around the point in which to sample conformations. limits : np.ndarray A (2, ndim) dimensional array containing the min (1st row) and max (2nd row) limits for each dimension. None values will be interpreted as no limit in that dimension, or min/max value. nsamples : int The number of conformations to sample. singlemol : bool If True it will return all samples within a single Molecule instead of a list of Molecules. Returns ------- absFrames : list A list of the absolute frame indexes sampled relFrames : list of tuples A list of (trajNum, frameNum) tuples sampled mols : Molecule or list of Molecules The conformations stored in a Molecule or a list of Molecules Examples -------- >>> # Working with 4 dimensional data for example >>> abs, rel, mols = data.sampleRegion(point=(0.5, 3, None, None), radius=0.1) # Point undefined in dim 3, 4 >>> minlims = [-1, None, None, 4] # No min limit for 2, 3 dim >>> maxlims = [2, 3, None, 7] # No max limit for 3 dim >>> abs, rel, mols = data.sampleRegion(limits=np.array([minlims, maxlims])) """ from scipy.spatial.distance import cdist datconcat = np.concatenate(self.dat) numdim = datconcat.shape[1] if point is not None: if radius is None: raise RuntimeError("You must define a radius with a point.") point = np.array(point) if len(point) != numdim: raise RuntimeError( "Argument `point` should be same dimensionality as your data ({} dimensions)".format( numdim ) ) keepdim = np.array([p is not None for p in point]) dists = cdist(datconcat[:, keepdim], [point[keepdim]]) confs = np.where(dists < radius)[0] elif limits is not None: if limits.shape != (2, numdim): raise RuntimeError( "Argument `limits` should be of shape (2, {})".format(numdim) ) mask = np.ones(datconcat.shape[0], dtype=bool) for i in range(numdim): if limits[0, i] is not None: mask &= datconcat[:, i] > limits[0, i] if limits[1, i] is not None: mask &= datconcat[:, i] < limits[1, i] confs = np.where(mask)[0] if len(confs) > nsamples: confs = np.random.choice(confs, nsamples, replace=False) sims = self.abs2sim(confs) from moleculekit.molecule import Molecule if singlemol: mol = Molecule(sims[0]) for i in range(1, len(sims)): m = Molecule(sims[i]) mol.appendFrames(m) else: mol = [] for s in sims: mol.append(Molecule(s)) return confs, self.abs2rel(confs), mol
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ from moleculekit.molecule import Molecule # Do version consistency check if (self._version == 2 and not isinstance(self.acemd, Acemd2)) and \ (self._version == 3 and not isinstance(self.acemd, Acemd)): raise RuntimeError( 'Acemd object version ({}) inconsistent with protocol version at instantiation ' '({})'.format(type(self.acemd), self._version)) self._findFiles(inputdir) self._amberFixes() if self._version == 2: self.acemd.temperature = str(self.temperature) self.acemd.langevintemp = str(self.temperature) elif self._version == 3: self.acemd.temperature = self.temperature self.acemd.thermostattemp = self.temperature from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) if self._version == 3: self.acemd.run = str(numsteps) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning( 'Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self._version == 2: if self.restraints: raise RuntimeWarning( 'restraints are only available on {}(_version=3)'.format( self.__class__.__name__)) if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' if isinstance(self.acemd.TCL, tuple): tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format( NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join( map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join( map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: logger.warning( '{} default TCL was already formatted.'.format( self.__class__.__name__)) else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n'.format( NUMSTEPS=numsteps) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = '1.0' else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) elif self._version == 3: if self.restraints is not None: logger.info( 'Using user-provided restraints and ignoring constraints and fb_potential' ) self.acemd.restraints = self.restraints else: restraints = list() if self.fb_k > 0: logger.warning( 'Converting fb_potential to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._fb_potential2restraints(inputdir) if self.useconstraints: logger.warning( 'Converting constraints to restraints. This is a convenience ' 'functional conversion. We recommend start using restraints with ' '{}(_version=3)'.format(self.__class__.__name__)) restraints += self._constraints2restraints() else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define ' 'useconstraints=True'.format(self.constraints)) if len(restraints) != 0: self.acemd.restraints = restraints if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) if self._version == 2: # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError( 'You have set the production to use constraints (useconstraints=True), but have ' 'not defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)
def fftype( mol, rtfFile=None, prmFile=None, method="GAFF2", acCharges=None, tmpDir=None, netcharge=None, ): """ Assing atom types and force field parameters for a given molecule. Additionally, atom masses and improper dihedral are set. Optionally, atom charges can be set if `acCharges` is set (see below). The assignment can be done: 1. For CHARMM CGenFF_2b6 with MATCH (method = 'CGenFF_2b6'); 2. For AMBER GAFF with antechamber (method = 'GAFF'); 3. For AMBER GAFF2 with antechamber (method = 'GAFF2'); Parameters ---------- mol : Molecule Molecule to use for the assignment rtfFile : str Path to a RTF file from which to read the topology prmFile : str Path to a PRM file from which to read the parameters method : str Atomtyping assignment method. Use :func:`fftype.listFftypemethods <parameterize.parameterization.fftype.listFftypemethods>` to get a list of available methods. Default: :func:`fftype.defaultFftypemethod <parameterize.parameterization.fftype.defaultFftypemethod>` acCharges : str Optionally assign charges with antechamber. Check `antechamber -L` for available options. Note: only works for GAFF and GAFF2. tmpDir: str Directory for temporary files. If None, a directory is created and deleted automatically. netcharge : float The net charge of the molecule. Returns ------- prm : :class:`ParameterSet <parmed.parameters.ParameterSet>` object Returns a parmed ParameterSet object with the parameters. mol : :class:`Molecule <moleculekit.molecule.Molecule>` object The modified Molecule object with the matching atom types for the ParameterSet """ import parmed if method not in fftypemethods: raise ValueError( "Invalid method {}. Available methods {}".format( method, ",".join(fftypemethods) ) ) if method == "CGenFF_2b6" and acCharges: raise ValueError("acCharges") if netcharge is None: netcharge = int(round(np.sum(mol.charge))) logger.warning( "Molecular charge is set to {} by adding up the atomic charges".format( netcharge ) ) if rtfFile and prmFile: from parameterize.parameterization.readers import readRTF logger.info("Reading FF parameters from {} and {}".format(rtfFile, prmFile)) prm = parmed.charmm.CharmmParameterSet(rtfFile, prmFile) names, elements, atomtypes, charges, masses, impropers = readRTF(rtfFile) else: logger.info("Assigning atom types with {}".format(method)) renamed_mol = _canonicalizeAtomNames(mol) # Create a temporary directory with TemporaryDirectory() as tmpdir: # HACK to keep the files tmpdir = tmpdir if tmpDir is None else tmpDir logger.debug("Temporary directory: {}".format(tmpdir)) if method in ("GAFF", "GAFF2"): from moleculekit.molecule import Molecule from parameterize.parameterization.readers import readPREPI, readFRCMOD # Write the molecule to a file renamed_mol.write(os.path.join(tmpdir, "mol.mol2")) atomtype = method.lower() # Set arguments cmd = [ "antechamber", "-at", atomtype, "-nc", str(netcharge), "-fi", "mol2", "-i", "mol.mol2", "-fo", "prepi", "-o", "mol.prepi", ] if acCharges is not None: cmd += ["-c", acCharges] # Run antechamber logger.info('Running "antechamber"') with TemporaryFile() as stream: status = subprocess.call(cmd, cwd=tmpdir, stdout=stream, stderr=stream) stream.seek(0) for line in stream.readlines(): logger.info(line) if status != 0: stream.seek(0) raise RuntimeError('"antechamber" failed: {}'.format(stream.read())) # Set arguments cmd = [ "parmchk2", "-f", "prepi", "-s", atomtype, "-i", "mol.prepi", "-o", "mol.frcmod", "-a", "Y", ] # Run parmchk2 logger.info('Running "parmchk2"') with TemporaryFile() as stream: status = subprocess.call(cmd, cwd=tmpdir, stdout=stream, stderr=stream) stream.seek(0) for line in stream.readlines(): logger.info(line) if status != 0: stream.seek(0) raise RuntimeError('"parmchk2" failed: {}'.format(stream.read())) # Check if antechamber did changes in atom names (and suggest the user to fix the names) acmol = Molecule(os.path.join(tmpdir, "NEWPDB.PDB"), type="pdb") acmol.name = np.array([n.upper() for n in acmol.name]).astype(np.object) changed_mol_acmol = np.setdiff1d(renamed_mol.name, acmol.name) changed_acmol_mol = np.setdiff1d(acmol.name, renamed_mol.name) if len(changed_mol_acmol) != 0 or len(changed_acmol_mol) != 0: raise RuntimeError( "Initial atom names {} were changed by antechamber to {}. " "This probably means that the start of the atom name does not match " "element symbol. " "Please check the molecule." "".format( ",".join(changed_mol_acmol), ",".join(changed_acmol_mol) ) ) # Read the results prm = parmed.amber.AmberParameterSet(os.path.join(tmpdir, "mol.frcmod")) names, atomtypes, charges, impropers = readPREPI( renamed_mol, os.path.join(tmpdir, "mol.prepi") ) masses, elements = readFRCMOD( atomtypes, os.path.join(tmpdir, "mol.frcmod") ) elif method == "CGenFF_2b6": from parameterize.parameterization.readers import readRTF # Write the molecule to a file renamed_mol.write(os.path.join(tmpdir, "mol.pdb")) # Set arguments cmd = [ "match-typer", "-charge", str(netcharge), "-forcefield", "top_all36_cgenff_new", "mol.pdb", ] # Run match-type logger.info('Running "match-typer"') with TemporaryFile() as stream: status = subprocess.call(cmd, cwd=tmpdir, stdout=stream, stderr=stream) stream.seek(0) for line in stream.readlines(): logger.info(line) if status != 0: stream.seek(0) raise RuntimeError('"match-typer" failed: {}'.format(stream.read())) prm = parmed.charmm.CharmmParameterSet( os.path.join(tmpdir, "mol.rtf"), os.path.join(tmpdir, "mol.prm") ) names, elements, atomtypes, charges, masses, impropers = readRTF( os.path.join(tmpdir, "mol.rtf") ) else: raise ValueError("Invalid method {}".format(method)) assert np.all(renamed_mol.name == names) assert np.all(mol.element == elements) mol = mol.copy() mol.atomtype = atomtypes mol.masses = masses mol.impropers = impropers if acCharges is not None: mol.charge = charges return prm, mol
md.datapath = path.join(home(), 'data', 'adaptive', 'data') md.run() # Cleaning up inputodel = glob(path.join(home(), 'data', 'adaptive', 'input', 'e2*')) for i in inputodel: shutil.rmtree(i, ignore_errors=True, acemd='/shared/acemd/bin/acemd') os.remove(path.join(home(), 'data', 'adaptive', 'input', 'e2_writeinputs.log'))''' import htmd import os import shutil from htmd.queues.localqueue import LocalGPUQueue from htmd.simlist import Frame, simlist from htmd.util import tempname filedir = htmd.home.home() + '/data/adaptive/' sims = simlist(glob(os.path.join(filedir, 'data', '*', '')), glob(os.path.join(filedir, 'input', '*', '')), glob(os.path.join(filedir, 'input', '*', ''))) outf = tempname() os.makedirs(outf) f = Frame(sims[0], 0, 5) _writeInputsFunction(1, f, 2, outf, 'input.coor') mol = Molecule(sims[0]) mol.read(os.path.join(outf, 'e2s2_e1s1p0f5', 'input.coor')) shutil.rmtree(outf)
def _mol_chimera_wrapper(molecule: Molecule, chimera: Chimera) -> Chimera: molecule.write("/tmp/molecule.pdb") new_chimera = Chimera(filename="/tmp/molecule.pdb") os.remove("/tmp/molecule.pdb") return new_chimera
def reconstructAdaptiveTraj(simlist, trajID): """ Reconstructs a long trajectory out of short adaptive runs. Parameters ---------- simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function trajID : int The id of the trajectory from which to start going back. Returns ------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object A Molecule object containing the reconstructed trajectory chain : np.ndarray The simulation IDs of all simulations involved pathlist : np.ndarray of str The names of all simulations involved. Examples -------- >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52) """ sim = None for s in simlist: if s.simid == trajID: sim = s break if sim is None: raise NameError( 'Could not find sim with ID {} in the simlist.'.format(trajID)) pathlist = [] pathlist.append(sim.trajectory[0]) chain = [] chain.append((sim, -1, -1)) epo = None while epo != 1: [sim, piece, frame, epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0])) pathlist.append(sim.trajectory[piece]) chain.append((sim, piece, frame)) pathlist = pathlist[::-1] chain = chain[::-1] mol = Molecule(sim.molfile) mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32) mol.fileloc = [] mol.box = np.zeros((3, 0)) for i, c in enumerate(chain): tmpmol = Molecule(sim.molfile) tmpmol.read(c[0].trajectory) endpiece = c[1] fileloc = np.vstack(tmpmol.fileloc) filenames = fileloc[:, 0] pieces = np.unique(filenames) firstpieceframe = np.where(filenames == pieces[endpiece])[0][0] endFrame = firstpieceframe + c[2] if endFrame != -1: tmpmol.coords = tmpmol.coords[:, :, 0:endFrame + 1] # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1] tmpmol.box = tmpmol.box[:, 0:endFrame + 1] mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2) mol.box = np.concatenate((mol.box, tmpmol.box), axis=1) mol.fileloc += tmpmol.fileloc #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0)) return mol, chain, pathlist
def write(self, inputdir, outputdir): """ Writes the production protocol and files into a folder. Parameters ---------- inputdir : str Path to a directory containing the files produced by a equilibration process. outputdir : str Directory where to write the production setup files. """ self._findFiles(inputdir) self._amberFixes() from htmd.units import convert numsteps = convert(self.timeunits, 'timesteps', self.runtime, timestep=self.acemd.timestep) pdbfile = os.path.join(inputdir, self.acemd.coordinates) inmol = Molecule(pdbfile) if np.any(inmol.atomselect('lipids')) and not self.useconstantratio: logger.warning( 'Lipids detected in input structure. We highly recommend setting useconstantratio=True ' 'for membrane simulations.') if self.fb_k > 0: # use TCL only for flatbottom self.acemd.tclforces = 'on' tcl = list(self.acemd.TCL) tcl[0] = tcl[0].format( NUMSTEPS=numsteps, TEMPERATURE=self.temperature, KCONST=self.fb_k, REFINDEX=' '.join( map(str, inmol.get('index', self.fb_reference))), SELINDEX=' '.join( map(str, inmol.get('index', self.fb_selection))), BOX=' '.join(map(str, self.fb_box))) self.acemd.TCL = tcl[0] + tcl[1] else: self.acemd.TCL = 'set numsteps {NUMSTEPS}\n' \ 'set temperature {TEMPERATURE}\n'.format(NUMSTEPS=numsteps, TEMPERATURE=self.temperature) if self.useconstraints: # Turn on constraints self.acemd.constraints = 'on' self.acemd.constraintscaling = '1.0' else: if len(self.constraints) != 0: logger.warning( 'You have setup constraints to {} but constraints are turned off. ' 'If you want to use constraints, define useconstraints=True' .format(self.constraints)) if self.useconstantratio: self.acemd.useconstantratio = 'on' if self.adaptive: self.acemd.binvelocities = None self.acemd.setup(inputdir, outputdir, overwrite=True) # Adding constraints by writing them to the consref file if self.useconstraints: inconsreffile = os.path.join(inputdir, self.acemd.consref) consrefmol = Molecule(inconsreffile) consrefmol.set('occupancy', 0) consrefmol.set('beta', 0) if len(self.constraints) == 0: raise RuntimeError( 'You have set the production to use constraints (useconstraints=True), but have not ' 'defined any constraints (constraints={}).') else: for sel in self.constraints: consrefmol.set('beta', self.constraints[sel], sel) outconsreffile = os.path.join(outputdir, self.acemd.consref) consrefmol.write(outconsreffile)