def MDTRAJwrite(mol, filename): try: import mdtraj as md except ImportError: raise ImportError('To support extension {} please install the `mdtraj` package'.format(os.path.splitext(filename)[1])) try: from moleculekit.util import tempname ext = os.path.splitext(filename)[1][1:] if ext == 'gz': pieces = filename.split('.') ext = '{}.{}'.format(pieces[-2], pieces[-1]) if ext in _MDTRAJ_TOPOLOGY_SAVERS: tmppdb = tempname(suffix='.pdb') mol.write(tmppdb) traj = md.load(tmppdb) os.remove(tmppdb) elif ext in _MDTRAJ_TRAJECTORY_SAVERS: tmppdb = tempname(suffix='.pdb') tmpxtc = tempname(suffix='.xtc') mol.write(tmppdb) mol.write(tmpxtc) traj = md.load(tmpxtc, top=tmppdb) os.remove(tmppdb) os.remove(tmpxtc) else: raise ValueError('Unknown file type for file {}'.format(filename)) # traj.xyz = np.swapaxes(np.swapaxes(self.coords, 1, 2), 0, 1) / 10 # traj.time = self.time # traj.unitcell_lengths = self.box.T / 10 traj.save(filename) except Exception as e: raise ValueError('MDtraj reader failed for file {} with error "{}"'.format(filename, e))
def testUpdateDataFile(self): """Test case - update data file""" self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # Create a initial data file -- # myDataList = [] curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append([9, 2, 3, 4, 5, 6, 7]) aCat.append([10, 2, 3, 4, 5, 6, 7]) aCat.append([11, 2, 3, 4, 5, 6, 7]) aCat.append([12, 2, 3, 4, 5, 6, 7]) # self.lfh.write("Assigned data category state-----------------\n") # aCat.dumpIt(fh=self.lfh) tmpf = tempname(suffix=".cif") curContainer.append(aCat) myDataList.append(curContainer) ofh = open(tmpf, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # # # Read and update the data - # myDataList = [] ifh = open(tmpf, "r") pRd = PdbxReader(ifh) pRd.read(myDataList) ifh.close() # myBlock = myDataList[0] myBlock.printIt() myCat = myBlock.getObj("pdbx_seqtool_mapping_ref") myCat.printIt() for iRow in range(0, myCat.getRowCount()): myCat.setValue("some value", "ref_mon_id", iRow) myCat.setValue(100, "ref_mon_num", iRow) ofh = open(tempname(suffix=".cif"), "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() # except: traceback.print_exc(file=self.lfh) self.fail()
def _view(mol, viewname): topo = tempname(suffix=".cif") xtc = tempname(suffix=".xtc") mol.write(topo) mol.write(xtc) cmd.delete(viewname) cmd.load(topo, viewname) cmd.load_traj(xtc, viewname, state=1) cmd.dss() # Guess SS os.remove(topo) os.remove(xtc) showing[viewname] = True
def sdfReader(file, removeHs, fixHs, sanitize, isgzip=False): from tqdm import tqdm from moleculekit.util import tempname if isgzip: with gzip.open(file, "rb") as f: # SDMolSupplier does not support file handles, need to write temp file file = tempname(suffix=".sdf") with open(file, "wb") as fout: fout.write(f.read()) supplier = Chem.SDMolSupplier(file, removeHs=removeHs, sanitize=sanitize) mols = [] countfailed = 0 for mol in tqdm(supplier): if mol is None: countfailed += 1 continue try: mols.append(SmallMol(mol, removeHs=removeHs, fixHs=fixHs)) except Exception: if mol.HasProp("_Name"): name = mol.GetProp("_Name") countfailed += 1 logger.warning( f"Failed to load molecule with name {name}. Skipping to next molecule." ) if countfailed: logger.info(f"Failed to load {countfailed}/{len(supplier)} molecules") return mols
def sdfReader(file, removeHs, fixHs, sanitize, isgzip=False): from tqdm import tqdm from moleculekit.util import tempname if isgzip: with gzip.open(file, 'rb') as f: # SDMolSupplier does not support file handles, need to write temp file file = tempname(suffix='.sdf') with open(file, 'wb') as fout: fout.write(f.read()) supplier = Chem.SDMolSupplier(file, removeHs=removeHs, sanitize=sanitize) mols = [] countfailed = 0 for mol in tqdm(supplier): if mol is None: countfailed += 1 continue try: mols.append(SmallMol(mol, removeHs=removeHs, fixHs=fixHs)) except: if mol.HasProp('_Name'): name = mol.GetProp('_Name') countfailed += 1 logger.warning( 'Failed to load molecule{}. Skipping to next molecule.'.format( ' with name {}'.format(name))) if countfailed: logger.info('Failed to load {}/{} molecules'.format( countfailed, len(supplier))) return mols
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist( glob(join(home(dataDir="adaptive"), "data", "*", "")), glob(join(home(dataDir="adaptive"), "input", "*")), ) fsims = simfilter(sims, tempname(), "not water") metr = Metric(fsims) metr.set( MetricDistance( "protein and resid 10 and name CA", "resname BEN and noh", periodic="selections", metric="contacts", groupsel1="residue", threshold=4, ) ) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def test_saving_loading(self): from moleculekit.util import tempname def checkCorrectness(newdata): assert newdata.numTrajectories == 2, 'Failed to load trajectories' assert newdata.description.shape == ( 9, 3), 'Failed to load pandas data' assert newdata.trajectories[0].projection.shape == ( 6, 9), 'Wrong data size' savefile = tempname(suffix='.dat') self.data1.save(savefile) newdata = MetricData(file=savefile) checkCorrectness(newdata) newdata = MetricData() newdata.load(savefile) checkCorrectness(newdata) # Saving with a parent data1 = self.data1.copy() data1.parent = self.data2.copy() data1.save(savefile) newdata = MetricData(file=savefile) checkCorrectness(newdata)
def test_reconstruct_contact_map(self): from moleculekit.util import tempname from moleculekit.molecule import Molecule import matplotlib matplotlib.use('Agg') mol = Molecule('1yu8') metr = MetricSelfDistance('protein and name CA', metric='contacts', pbc=False) data = metr.project(mol) mapping = metr.getMapping(mol) tmpfile = tempname(suffix='.svg') cm, newmapping, uqAtomGroups = contactVecToMatrix( data, mapping.atomIndexes) reconstructContactMap(data, mapping, outfile=tmpfile) refdata = np.load( os.path.join(home(dataDir='test-projections'), 'metricdistance', 'contactvectomatrix.npz')) assert np.array_equal(refdata['cm'], cm) assert np.array_equal(refdata['newmapping'], newmapping) assert np.array_equal(refdata['uqAtomGroups'], uqAtomGroups)
def setUp(self): self.lfh = sys.stderr self.verbose = False self.pathPdbxDataFile = os.path.join( home(dataDir="molecule-readers"), "1kip.cif" ) self.pathOutputFile = tempname(suffix=".cif")
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def test_reconstruct_contact_map(self): from moleculekit.util import tempname from moleculekit.molecule import Molecule import matplotlib matplotlib.use("Agg") mol = Molecule("1yu8") metr = MetricSelfDistance("protein and name CA", metric="contacts", pbc=False) data = metr.project(mol)[0] mapping = metr.getMapping(mol) tmpfile = tempname(suffix=".svg") cm, newmapping, uqAtomGroups = contactVecToMatrix( data, mapping.atomIndexes) reconstructContactMap(data, mapping, outfile=tmpfile) refdata = np.load( os.path.join( home(dataDir="test-projections"), "metricdistance", "contactvectomatrix.npz", )) assert np.array_equal(refdata["cm"], cm) assert np.array_equal(refdata["newmapping"], newmapping) assert np.array_equal(refdata["uqAtomGroups"], uqAtomGroups)
def testWriteDataFile(self): """Test case - write data file""" self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name) ) try: # myDataList = [] ofh = open(tempname(suffix=".cif"), "w") curContainer = DataContainer("myblock") aCat = DataCategory("pdbx_seqtool_mapping_ref") aCat.appendAttribute("ordinal") aCat.appendAttribute("entity_id") aCat.appendAttribute("auth_mon_id") aCat.appendAttribute("auth_mon_num") aCat.appendAttribute("pdb_chain_id") aCat.appendAttribute("ref_mon_id") aCat.appendAttribute("ref_mon_num") aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) aCat.append((1, 2, 3, 4, 5, 6, 7)) curContainer.append(aCat) myDataList.append(curContainer) pdbxW = PdbxWriter(ofh) pdbxW.write(myDataList) ofh.close() except: traceback.print_exc(file=sys.stderr) self.fail()
def test_writers(self): from moleculekit.util import tempname from moleculekit.home import home import filecmp # Skip file-comparing binary filetypes # TODO: Remove SDF. Currently skipping it due to date in second line skipcomparison = ( "ncrst", "rst7", "dcd", "h5", "nc", "netcdf", "ncdf", "tng", "pdb.gz", "xyz.gz", ) for ext in _WRITERS: with self.subTest(extension=ext): tmpfile = tempname(suffix="." + ext) if ext == "pdbqt": mol = self.mol.copy() mol.atomtype[:] = "NA" mol.write(tmpfile) elif ext == "mol2": self.mol.write(tmpfile, sel="resid 1") else: self.mol.write(tmpfile) if ext in skipcomparison: continue reffile = os.path.join(home(dataDir="molecule-writers"), "mol." + ext) try: with open(tmpfile, "r") as f: filelines = f.readlines() if ext == "sdf": filelines = filelines[2:] except UnicodeDecodeError: print( "Could not compare file {} due to not being unicode".format( reffile ) ) continue with open(reffile, "r") as f: reflines = f.readlines() if ext == "sdf": reflines = reflines[2:] print("Testing file", reffile, tmpfile) self.assertEqual( filelines, reflines, msg=f"Failed comparison of {reffile} {tmpfile}" )
def getOpenBabelProperties(mol): import subprocess import os from moleculekit.util import tempname pdbfile = tempname(suffix=".pdb") outfile = tempname(suffix=".csv") mol.write(pdbfile) obabelcli = os.path.join( os.path.dirname(os.path.realpath(__file__)), "obabel_cli.py" ) try: output = subprocess.check_output( [sys.executable, obabelcli, pdbfile, outfile], stderr=subprocess.STDOUT, universal_newlines=True, ).strip() except subprocess.CalledProcessError as exc: print( "Failed to call getOpenBabelProperties with error", exc.returncode, exc.output, ) else: if len(output): print(output) os.remove(pdbfile) atoms = [] with open(outfile, "r") as f: for line in f: pieces = line.split(",") pieces[0] = int(pieces[0]) pieces[2] = int(pieces[2]) pieces[5] = float(pieces[5]) atoms.append(pieces) atoms.sort(key=lambda x: x[0]) # Sort by index return atoms
def testSimpleInitialization(self): """Test case - Simple initialization of a data category and data block """ self.lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: # fn = tempname(suffix='.cif') attributeNameList = [ 'aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen' ] rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] nameCat = 'myCategory' # # curContainer = DataContainer("myblock") aCat = DataCategory(nameCat, attributeNameList, rowList) aCat.printIt() curContainer.append(aCat) curContainer.printIt() # myContainerList = [] myContainerList.append(curContainer) ofh = open(fn, "w") pdbxW = PdbxWriter(ofh) pdbxW.write(myContainerList) ofh.close() myContainerList = [] ifh = open(fn, "r") pRd = PdbxReader(ifh) pRd.read(myContainerList) ifh.close() for container in myContainerList: for objName in container.getObjNameList(): name, aList, rList = container.getObj(objName).get() self.lfh.write("Recovered data category %s\n" % name) self.lfh.write("Attribute list %r\n" % repr(aList)) self.lfh.write("Row list %r\n" % repr(rList)) except: traceback.print_exc(file=self.lfh) self.fail()
def __init__(self, arr, vecMin, vecRes, color=8, isovalue=0.5, name=None, draw='solid'): """ Displays an isosurface in VMD The function returns an instance of VMDGraphicsObject. To delete it, use the delete() method. Parameters ---------- arr: np.ndarray 3D array with volumetric data. vecMin: np.ndarray 3D vector denoting the minimal corner of the grid vecRes: np.ndarray 3D vector denoting the resolution of the grid in each dimension color: str The color to be used for the isosurface name: str A name for the representation draw: str ('solid', 'wireframe') Drawing style for the isosurface """ super().__init__(arr) from moleculekit.util import tempname, writeVoxels import numpy as np import os filename = tempname(suffix='.cube') writeVoxels(arr, filename, vecMin, vecRes) vmd = getCurrentViewer() drawmapping = {'solid': 0, 'wireframe': 1} vmd.send( 'mol new {} type cube first 0 last -1 step 1 waitfor 1 volsets {{0 }}' .format(filename)) vmd.send('mol modstyle top top Isosurface {} 0 2 {} 2 1'.format( isovalue, drawmapping[draw])) vmd.send('mol modcolor top top ColorID {}'.format(color)) vmd.send('set htmd_graphics_mol({:d}) [molinfo top]'.format(self.n)) if name is not None: vmd.send('mol rename top {{{}}}'.format(name)) if os.path.exists(filename): os.unlink(filename)
def openbabelConvert(input_file, input_format, output_format, extra_args=()): """ Converts the file from the input format to the output format specified. It uses the openbabel features Parameters ---------- input_file: str The path of the input file to convert input_format: str The input file format output_format: str The output file format Returns ------- outfile: str The output file generated """ import subprocess from moleculekit.util import tempname input_format = input_format[1:] if input_format.startswith(".") else input_format output_format = ( output_format[1:] if output_format.startswith(".") else output_format ) outfile = tempname(suffix=f".{output_format}") try: output = subprocess.check_output( [ "obabel", f"-i{input_format}", input_file, f"-o{output_format}", f"-O{outfile}", *extra_args, ], stderr=subprocess.STDOUT, universal_newlines=True, ) except subprocess.CalledProcessError as exc: print("Failed to call openbabel with error", exc.returncode, exc.output) else: print(output) return outfile
def test_writers(self): from moleculekit.util import tempname from moleculekit.home import home import filecmp # Skip file-comparing binary filetypes # TODO: Remove SDF. Currently skipping it due to date in second line skipcomparison = ('ncrst', 'rst7', 'dcd', 'h5', 'nc', 'netcdf', 'ncdf', 'tng', 'pdb.gz', 'xyz.gz') for ext in _WRITERS: with self.subTest(extension=ext): tmpfile = tempname(suffix='.' + ext) if ext == 'pdbqt': mol = self.mol.copy() mol.atomtype[:] = 'NA' mol.write(tmpfile) elif ext == 'mol2': self.mol.write(tmpfile, sel='resid 1') else: self.mol.write(tmpfile) if ext in skipcomparison: continue reffile = os.path.join(home(dataDir='molecule-writers'), 'mol.' + ext) try: with open(tmpfile, 'r') as f: filelines = f.readlines() if ext == 'sdf': filelines = filelines[2:] except UnicodeDecodeError: print('Could not compare file {} due to not being unicode'. format(reffile)) continue with open(reffile, 'r') as f: reflines = f.readlines() if ext == 'sdf': reflines = reflines[2:] print('Testing file', reffile, tmpfile) self.assertEqual( filelines, reflines, msg=f"Failed comparison of {reffile} {tmpfile}")
if __name__ == '__main__': from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2) # Testing dimensions assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct' assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct' assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct'
def dock( protein, ligand, center=None, extent=None, numposes=20, babelexe="obabel", vinaexe=None, ): """Molecular docking, using Vina If centre and extent are not provided, docking will be performed over the whole protein Parameters ---------- protein : :class:`Molecule <moleculekit.molecule.Molecule>` object Molecule object representing the receptor ligand : :class:`Molecule <moleculekit.molecule.Molecule>` object Molecule object representing the ligand to dock center : list 3-vec centre of of the search bounding box (optional) extent : list 3-vec linear extent of the search bounding box (optional) numposes : int Number of poses to return. Vina cannot return more than 20 poses. babelexe : str Path to babel executable. vinaexe : str Path to AutoDock Vina executable. Returns ------- poses Molecule object representing the N<10 best poses scores 3x num_poses matrix containing kcal, rmsd lb, rmsd ub Examples -------- >>> poses, scoring = dock(protein, ligand) >>> poses, scoring = dock(protein, ligand, center=[ 10., 5., 12. ], extent=[ 15., 15., 15. ] ) """ if np.size(protein.coords, 2) != 1 or np.size(ligand.coords, 2) != 1: raise RuntimeError("Protein and ligand Molecules should be single frames") buffer = 10.0 # Angstrom buffer around protein for whole-protein docking c_min = np.min(protein.coords, 0).reshape((1, 3))[0] c_max = np.max(protein.coords, 0).reshape((1, 3))[0] if center is None: center = (buffer + (c_max + c_min)) / 2 if extent is None: extent = (c_max - c_min) + buffer # babel -i pdb protein.pdb -o pdbqt protein.pdbqt -xr # babel -i pdb ligand.pdb -o pdbqt ligand.pdbqt -xhn # vina --ligand ligand.pdbqt --receptor protein.pdbqt --center_x 0. --center_y 0. --center_z 0. --size_x 60. --size_y 60. --size_z 60 --exhaustiveness 10 # babel -m -i pdbqt ligand_out.pdbqt -o pdb out_.pdb -xhn protein_pdb = tempname(suffix=".pdb") ligand_mol2 = tempname(suffix=".mol2") output_pdb = tempname(suffix="_.pdb") output_prefix = path.splitext(output_pdb)[0] protein_pdbqt = tempname(suffix=".pdbqt") ligand_pdbqt = tempname(suffix=".pdbqt") output_pdbqt = tempname(suffix=".pdbqt") protein.write(protein_pdb) lig2 = ligand.copy() # babel does not understand mol2 atomtypes and requires elements instead lig2.atomtype = lig2.element lig2.write(ligand_mol2) # Dirty hack to remove the 'END' line from the PDBs since babel hates it with open(protein_pdb, "r") as f: lines = f.readlines() if "END" in lines[:-1]: with open(protein_pdb, "w") as f: f.writelines(lines[:-1]) # End of dirty hack try: if vinaexe is None: suffix = "" if platform.system() != "Windows" else ".exe" vinaexe = f"{platform.system()}-vina{suffix}" vinaexe = shutil.which(vinaexe, mode=os.X_OK) if not vinaexe: raise RuntimeError( "Could not find vina, or no execute permissions are given" ) except Exception: raise RuntimeError("Could not find vina, or no execute permissions are given") try: babelexe = shutil.which(babelexe, mode=os.X_OK) if babelexe is None: raise RuntimeError( "Could not find babel, or no execute permissions are given" ) except Exception: raise RuntimeError("Could not find babel, or no execute permissions are given") call( [babelexe, "-i", "pdb", protein_pdb, "-o", "pdbqt", "-O", protein_pdbqt, "-xr"] ) babelcmd = [ babelexe, "-i", "mol2", ligand_mol2, "-o", "pdbqt", "-O", ligand_pdbqt, "-xn", "-xh", ] if np.all(ligand.charge != 0): logger.info("Charges detected in ligand and will be used for docking.") else: logger.info( "Charges were not defined for all atoms. Will guess charges using gasteiger method." ) babelcmd += ["--partialcharge", "gasteiger"] call(babelcmd) if not path.isfile(ligand_pdbqt): raise RuntimeError("Ligand could not be converted to PDBQT") if not path.isfile(protein_pdbqt): raise RuntimeError("Protein could not be converted to PDBQT") call( [ vinaexe, "--receptor", protein_pdbqt, "--ligand", ligand_pdbqt, "--out", output_pdbqt, "--center_x", str(center[0]), "--center_y", str(center[1]), "--center_z", str(center[2]), "--size_x", str(extent[0]), "--size_y", str(extent[1]), "--size_z", str(extent[2]), "--num_modes", str(numposes), ] ) call( [ babelexe, "-m", "-i", "pdbqt", output_pdbqt, "-o", "pdb", "-O", output_pdb, "-xhn", ] ) outfiles = natsorted(glob(f"{output_prefix}*.pdb")) scoring = [] poses = [] for i, ligf in enumerate(outfiles): scoring.append(_parseScoring(ligf)) ll = Molecule(ligf) ll.viewname = f"Pose {i}" poses.append(ll) os.remove(protein_pdb) os.remove(ligand_mol2) os.remove(protein_pdbqt) os.remove(ligand_pdbqt) os.remove(output_pdbqt) return poses, np.array(scoring)