def test_model_3zg0(): if not PC.MODELLER: return from ProtoCaller.Wrappers.modellerwrapper import modellerTransform with Dir(PC.TESTDIR + "/shared"): with Dir("temp", temp=True): copyfile("../3ZG0.pdb", "3ZG0.pdb") copyfile("../3ZG0.fasta", "3ZG0.fasta") obj = PDB.PDB("3ZG0.pdb") obj_mod = PDB.PDB( modellerTransform("3ZG0.pdb", "3ZG0.fasta", add_missing_atoms=False)) assert len(obj.missing_atoms) == len(obj_mod.missing_atoms) == 0 assert len(obj.missing_residues) == 7 assert len(obj_mod.missing_residues) == 0 assert len(obj.disulfide_bonds) == len( obj_mod.disulfide_bonds) == 0 assert len(obj.modified_residues) == len( obj_mod.modified_residues) == 0 assert len(obj.site_residues) == len(obj_mod.site_residues) == 120 assert len(obj_mod[0]) == 642 assert len(obj_mod[1]) == 642 assert len(obj_mod[2]) == 9 assert len(obj_mod[3]) == 9 assert len(obj_mod[4]) == 239 assert len(obj_mod[5]) == 171
def test_prepare_1bji(): with Dir(PC.TESTDIR + "/Ensemble"): with Dir("temp", temp=True): protein = Protein("1bji", ligand_ref="479G") assert isinstance(protein.ligand_ref, Ligand) assert len(protein.ligands) == 9 assert protein._pdb_obj.numberOfAtoms == 3259 protein.filter(waters=None, include_mols=["480"], ligands=None) assert isinstance(protein.ligand_ref, Ligand) assert len(protein.ligands) == 0 assert protein._pdb_obj.numberOfAtoms == 3069 # this time keep only site waters and parametrise with some end state checks protein = Protein("1bji", ligand_ref="479G") protein.filter(ligands=None, waters="site") protein.prepare() protein.parametrise(Params()) if PC.BIOSIMSPACE and isinstance(protein.complex_template, BSS._SireWrappers._system.System): assert protein.complex_template.nAtoms() == 6006 assert pytest.approx( protein.complex_template.charge().magnitude()) == 2 else: # TODO: add support for ParmEd topologies when the Morph class is fully functional pass
def test_MCS_EZ(): with Dir(PC.TESTDIR + "/shared"): # test mapping of an asymmetric double bond onto symmetric one ref = openFileAsRdkit("EZ_ref1.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol1.mol2", removeHs=False) results = getMCSMap(ref, mol) assert all([len(x) == 38 for x in results]) assert all([{(11, 12), (12, 11)}.issubset(x) for x in results]) # test mapping between two esters of different E/Z conformations ref = openFileAsRdkit("EZ_ref2.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol2.mol2", removeHs=False) results = getMCSMap(ref, mol) assert all([len(x) == 28 for x in results]) results_rev = getMCSMap(mol, ref) assert all([len(x) == 28 for x in results_rev]) # test mapping of a double bond to a single bond with unfavourable # conformation ref = openFileAsRdkit("EZ_ref3.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol3.mol2", removeHs=False) results = getMCSMap(ref, mol, timeout=1, two_way_matching=True) assert all([len(x) == 38 for x in results]) results = getMCSMap(ref, mol, timeout=1, two_way_matching=False) assert all([len(x) == 27 for x in results]) # test mapping of a double bond to a single bond with favourable # conformation ref = openFileAsRdkit("EZ_ref4.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol4.mol2", removeHs=False) results = getMCSMap(ref, mol, timeout=1, two_way_matching=True) assert all([len(x) == 41 for x in results]) results = getMCSMap(ref, mol, timeout=1, two_way_matching=False) assert all([len(x) == 27 for x in results]) # test the recursive algorithm for > 1 mismatching bonds ref = openFileAsRdkit("EZ_ref5.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol5.mol2", removeHs=False) results = getMCSMap(ref, mol, timeout=1) assert all([len(x) == 17 for x in results]) results_rev = getMCSMap(ref, mol, timeout=1) assert all([len(x) == 17 for x in results_rev]) # test the recursive algorithm for > 1 mismatching bonds and matching # a double bond onto a single bond ref = openFileAsRdkit("EZ_ref6.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol6.mol2", removeHs=False) results = getMCSMap(ref, mol, timeout=1, two_way_matching=True) assert all([len(x) == 17 for x in results]) # test mapping of mismatching amide onto an ester ref = openFileAsRdkit("EZ_ref7.mol2", removeHs=False) mol = openFileAsRdkit("EZ_mol7.mol2", removeHs=False) results = getMCSMap(ref, mol, timeout=1) assert all([len(x) == 30 for x in results])
def test_Dir(): orig_dir = os.getcwd() os.chdir(PC.TESTDIR + "/Utils") target_dirname = os.path.join(PC.TESTDIR, "Utils", "temp") f = Dir("temp", temp=True, purge_immediately=False) with f: assert os.getcwd() == target_dirname with f: assert os.getcwd() == target_dirname assert os.getcwd() == target_dirname assert os.path.exists(target_dirname) assert os.getcwd() == os.path.join(PC.TESTDIR, "Utils") target_dirname2 = os.path.join(PC.TESTDIR, "Utils", "temp2") with Dir("temp2", temp=True, purge_immediately=True): pass assert not os.path.exists(target_dirname2) os.chdir(orig_dir)
def test_model_3zg0(): with Dir(PC.TESTDIR + "/shared"): with Dir("temp", temp=True): copyfile("../3ZG0.pdb", "3ZG0.pdb") obj = PDB.PDB("3ZG0.pdb") obj_mod = PDB.PDB(charmmguiTransform("3ZG0.pdb")) assert len(obj.missing_atoms) == len(obj_mod.missing_atoms) == 0 assert len(obj.missing_residues) == 7 assert len(obj_mod.missing_residues) == 0 assert len(obj.disulfide_bonds) == len(obj_mod.disulfide_bonds) == 0 assert len(obj.modified_residues) == len(obj_mod.modified_residues) == 0 assert len(obj.site_residues) == len(obj_mod.site_residues) == 120 assert len(obj_mod[0]) == 642 assert len(obj_mod[1]) == 642 assert len(obj_mod[2]) == 9 assert len(obj_mod[3]) == 9 assert len(obj_mod[4]) == 239 assert len(obj_mod[5]) == 171
def test_protonate_1bji(): with Dir(PC.TESTDIR + "/shared"): with Dir("temp", temp=True): copyfile("../1bji.pdb", "1bji.pdb") file_pdb2pqr = pdb2pqrTransform("1bji.pdb") obj_protonated = PDB.PDB(file_pdb2pqr) assert len(obj_protonated.missing_atoms) == 0 assert len(obj_protonated.missing_residues) == 0 assert len(obj_protonated.disulfide_bonds) == 9 assert len(obj_protonated.site_residues) == 74 assert len(obj_protonated[0]) == 388 assert len(obj_protonated[1]) == 202 for i, residue in enumerate(obj_protonated[0]): # don't deal with terminal amino acids if i == 0 or i == len(obj_protonated[0]) - 1: continue if residue.resName in size_dict.keys(): assert len(residue) == size_dict[residue.resName]
def test_align(): with Dir(PC.TESTDIR + "/shared"): with Dir("temp", temp=True): lig_ref = Ligand("../toluene.sdf", protonated=True, minimise=False) lig1 = Ligand("c1ccccc1CC", minimise=False) lig2 = Ligand("c1ccccc1CCC", minimise=False) morph = Perturbation(lig1, lig2) mol, mcs = morph.alignAndCreateMorph(lig_ref) assert len(mcs) == 18 positions = { (-32.355, 7.263, 2.207), (-33.181, 6.606, 3.301), (-33.238, 7.160, 4.565), (-33.992, 6.571, 5.570), (-34.711, 5.417, 5.332), (-34.638, 4.878, 4.060), (-33.894, 5.447, 3.048), (-31.882, 8.141, 2.595), (-32.993, 7.533, 1.392), (-31.608, 6.578, 1.864), (-32.712, 8.025, 4.765), (-34.016, 7.001, 6.508), (-35.279, 4.975, 6.072), (-35.163, 4.013, 3.860), (-33.869, 5.013, 2.112), } vec1 = mol._sire_object.property("coordinates0").toVector() positions_A = {(x[0], x[1], x[2]) for x in vec1} vec2 = mol._sire_object.property("coordinates1").toVector() positions_B = {(x[0], x[1], x[2]) for x in vec2} assert positions.issubset(positions_A) assert positions.issubset(positions_B) assert len(positions_A) == len(positions_B) == 21 assert positions_A == positions_B
def test_align(): from ProtoCaller.Wrappers.rdkitwrapper import _nonMCSDihedrals with Dir(PC.TESTDIR + "/shared"): # test conservation of dihedrals ref = openFileAsRdkit("Align_ref1.mol2", removeHs=False) mol = openFileAsRdkit("Align_mol1.mol2", removeHs=False) mcs = getMCSMap(ref, mol)[0] dihedrals = _nonMCSDihedrals(mol, mcs) assert len(dihedrals) == 3 mol_new, mcs_new = alignTwoMolecules(ref, mol, minimise_score=False) assert mcs == mcs_new dihedrals_new = _nonMCSDihedrals(mol_new, mcs_new) assert dihedrals.keys() == dihedrals_new.keys() assert approx(dihedrals.values(), dihedrals.values())
def test_deepcopy_1bji(): with Dir(PC.TESTDIR + "/shared"): obj = PDB.PDB("1bji.pdb") obj2 = copy.deepcopy(obj) assert obj2.numberOfAtoms == 3398 assert len(obj2.missing_atoms) == 0 assert len(obj2.missing_residues) == 0 assert len(obj2.modified_residues) == 3 assert len(obj2.disulfide_bonds) == 9 assert len(obj2.site_residues) == 74 assert len(obj2[0]) == 388 assert len(obj2[1]) == 202 obj2.purgeResidues(obj2.filter("type=='amino_acid'"), mode="keep") assert 3067 == obj2.numberOfAtoms != obj.numberOfAtoms
def test_read_write_1bji(): with Dir(PC.TESTDIR + "/shared"): obj = PDB.PDB("1bji.pdb") filestr = open("1bji.pdb").readlines() filestr = [line.strip() for line in filestr if line[:3] in ["TER", "END"] or line[:4] == "ATOM" or line[:6] == "HETATM"] assert obj.numberOfAtoms == 3398 assert len(obj.missing_atoms) == 0 assert len(obj.missing_residues) == 0 assert len(obj.modified_residues) == 3 assert len(obj.disulfide_bonds) == 9 assert len(obj.site_residues) == 74 assert len(obj[0]) == 388 assert len(obj[1]) == 202 new_file = tempfile.NamedTemporaryFile() obj.writePDB(new_file.name) filestr_new = open(new_file.name).readlines() # filter out only the relevant sections filestr_new = [line for line in filestr_new if line[:3] in ["TER", "END"] or line[:4] == "ATOM" or line[:6] == "HETATM"] # fix a known formatting difference with the original PDB filestr_new = [line[:12] + " " + line[12:16] + line[17:] for line in filestr_new] filestr_new = [line.strip() for line in filestr_new] obj_new = PDB.PDB(new_file.name) assert obj_new.numberOfAtoms == 3398 assert len(obj_new.missing_atoms) == 0 assert len(obj_new.missing_residues) == 0 assert len(obj.modified_residues) == 3 assert len(obj_new.disulfide_bonds) == 9 assert len(obj_new.site_residues) == 74 assert len(obj_new[0]) == 388 assert len(obj_new[1]) == 202 # the ultimate test: rewriting the PDB should result in the same PDB assert filestr_new == filestr
import logging logging.basicConfig(level=logging.INFO) import numpy as np import ProtoCaller.Simulation as Simulation from ProtoCaller.Utils.fileio import Dir import argparse parser = argparse.ArgumentParser() parser.add_argument("-m", "--mode", type=str, help="Whether to run bound or solvated leg") parser.add_argument("-w", "--workdir", type=str, help="Path to where " "complex_final.* and/or morph.* are located") args = parser.parse_args() workdir = Dir(args.workdir) if args.workdir else Dir(".") if args.mode == "bound": dirname = "Run_Bound" gro = "complex_final.gro" top = "complex_final.top" elif args.mode == "solvated": dirname = "Run_Solvated" gro = "morph.gro" top = "morph.top" else: raise ValueError("Please choose either 'bound' or 'solvated'") # determine the lambda values vdw_lambdas = [0] * 9 + [round(x, 2) for x in np.linspace(0.00, 0.95, num=26)] \ + [0.97, 0.98, 0.99, 0.999, 1.00]
# import os # add an alternative default version for GROMACS. Otherwise, use bash default # os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4") import logging logging.basicConfig(level=logging.INFO) from BioSimSpace._SireWrappers import System from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ligand, Perturbation from ProtoCaller.Solvate import solvate from ProtoCaller.IO.GROMACS import saveAsGromacs from ProtoCaller.Parametrise import Params from ProtoCaller.Wrappers.biosimspacewrapper import resize with Dir("Relative_Solvation", overwrite=True): with Dir("Ligands"): # create two ligands from SMILES strings and name them toluene = Ligand("C1=CC=CC=C1C", name="toluene", workdir="Ligands") benzene = Ligand("C1=CC=CC=C1", name="benzene", workdir="Ligands") # protonate parametrise the ligands at pH 7 with GAFF2 for ligand in [toluene, benzene]: ligand.protonate(babel_parameters={"pH": 7.0}) ligand.parametrise(Params(ligand_ff="gaff2")) with Dir("toluene~benzene"): # create the perturbation from the ligands perturbation = Perturbation(toluene, benzene) # we create the mixed topology object by using the toluene as a reference ligand
# import os # add an alternative default version for GROMACS. Otherwise, use bash default # os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4") import logging logging.basicConfig(level=logging.INFO) from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ligand from ProtoCaller.Solvate import solvate from ProtoCaller.IO.GROMACS import saveAsGromacs from ProtoCaller.Parametrise import Params from ProtoCaller.Wrappers.parmedwrapper import openFilesAsParmed, resize with Dir("Absolute_Solvation", overwrite=True): with Dir("Ligands"): # create two ligands from SMILES strings and name them toluene = Ligand("C1=CC=CC=C1C", name="toluene", workdir="Ligands") benzene = Ligand("C1=CC=CC=C1", name="benzene", workdir="Ligands") # protonate parametrise the ligands at pH 7 with GAFF2 for ligand in [toluene, benzene]: ligand.protonate(babel_parameters={"pH": 7.0}) ligand.parametrise(Params(ligand_ff="gaff2")) for ligand in [toluene, benzene]: with Dir(ligand.name): # open the parametrised ligand as a ParmEd object lig_vac = openFilesAsParmed(ligand.parametrised_files) # give a dummy box length of e.g. 10 nm lig_vac = resize(lig_vac, 10) # save the vacuum leg as .gro and .top files
def test_pdbconnect(): with Dir(PC.TESTDIR + "/Utils/temp", temp=True): downloader = PDBDownloader("1BJI") downloader.getPDB() downloader.getFASTA() assert len(downloader.getLigands()) == 10
# import os # add an alternative default version for GROMACS. Otherwise, use bash default # os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4") import logging logging.basicConfig(level=logging.INFO) from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ligand, Protein, Ensemble with Dir("Sialyltransferase", overwrite=False): # create a protein and ligands using custom files lig_ref = Ligand("lig_ref.sdf", protonated=True, workdir="Ligands", name="lig_ref") lig1 = Ligand("lig1.sdf", protonated=True, workdir="Ligands", name="lig1") lig2 = Ligand("lig2.sdf", protonated=True, workdir="Ligands", name="lig2") protein = Protein("2WNB", pdb_file="protein.pdb", ligand_ref=lig_ref) # create the morphs from the ligands morphs = [[lig1, lig2], [lig2, lig1]] # create a system from the protein and the morphs and set up some default # settings regarding system preparation system = Ensemble("GROMACS", protein=protein, morphs=morphs, box_length_complex=7, ligand_ff="gaff2", workdir=protein.workdir.path) # only keep the reference ligand
# import os # add an alternative default version for GROMACS. Otherwise, use bash default # os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4") import logging logging.basicConfig(level=logging.INFO) from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ligand, Protein, Ensemble with Dir("Sialyltransferase", overwrite=True): # create a protein from its PDB code and the residue number of the ligand # we are going to use for mapping protein = Protein("2WNB", ligand_ref="1344") # delete any atoms with altLoc B for chain in protein.pdb_obj: for residue in chain: atoms_to_purge = [x for x in residue if x.altLoc == "B"] residue.purgeAtoms(atoms_to_purge, "discard") protein.pdb_obj.writePDB() # create two ligands from SMILES strings lig1 = Ligand("O([P@]([O-])(=O)CC[C@@]1(C(=O)N)C[C@@H]([C@H]([C@H]([C@@H]" "([C@@H](CO)O)O)O1)NC(=O)C)O)C[C@H]1O[C@@H](n2c(=O)nc(N)cc2)" "[C@H](O)[C@@H]1O", workdir="Ligands") lig2 = Ligand("[P@]([O-])(=O)(OC[C@H]1O[C@@H](n2ccc(nc2=O)N)[C@H](O)[C@@H]" "1O)CC[C@]1(CO)O[C@H]([C@@H]([C@H](C1)O)NC(=O)C)[C@H](O)" "[C@H](O)CO", workdir="Ligands") # create the morphs from the ligands morphs = [[lig1, lig2], [lig2, lig1]]
import logging logging.basicConfig(level=logging.INFO) import glob from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ensemble, Ligand from ProtoCaller.Parametrise import Params params = Params(ligand_ff="GAFF2") with Dir("FXA", overwrite=False): # create a ligand dictionary and populate it from the Ligands folder. # this block makes sure we don't reparametrise if we rerun the script. ligands = {} with Dir("Ligands") as ligdir: lignames = [x.split(".")[0] for x in glob.glob("*.sdf")] for ligname in lignames: sdf = "{}.sdf".format(ligname) prmtop = "{}.prmtop".format(ligname) inpcrd = "{}.inpcrd".format(ligname) if len(glob.glob(prmtop)) and len(glob.glob(inpcrd)): parametrised_files = [prmtop, inpcrd] else: parametrised_files = None # here we initialise the ligand with custom structures. ligands[ligname] = Ligand(sdf, protonated=True, minimise=False,
# import os # add an alternative default version for GROMACS. Otherwise, use bash default # os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4") import logging logging.basicConfig(level=logging.INFO) from ProtoCaller.Utils.fileio import Dir from ProtoCaller.Ensemble import Ligand, Protein, Ensemble with Dir("T4-lysozyme", overwrite=True): # create a protein from its PDB code and the residue number of the ligand # we are going to use for mapping protein = Protein("181L", ligand_ref="400") # create two ligands from SMILES strings and name them benzol = Ligand("C1=CC=CC=C1", name="benzol", workdir="Ligands") o_xylene = Ligand("CC1=CC=CC=C1C", name="o-xylene", workdir="Ligands") # create the morphs from the ligands morphs = [[benzol, o_xylene], [o_xylene, benzol]] # create a system from the protein and the morphs and set up some default # settings regarding system preparation system = Ensemble("GROMACS", protein=protein, morphs=morphs, box_length_complex=7, ligand_ff="gaff2", workdir=protein.workdir.path) # only keep the reference ligand and keep all crystallographic waters system.protein.filter(ligands=None, waters="all")