def test_add_ligand(self): test_class = protein.Protein(protein_id="test") test_class.add_ligand("test") self.assertEqual(test_class._ligands, ["test"])
def test_init(self): test_class = protein.Protein(protein_id="protein_id", pdb_file=None, structure="structure", substitutions_dict="substitutions_dict", sulf_bonds="sulf_bonds", seqres="seqres", file_type='cif', model='0', chain='a', cys_dict="cys_dict", gro_file="gro_file", top_file="top_file", tpg_file="tpg_file", prm_file="prm_file") self.assertEqual(test_class.protein_id, "protein_id") self.assertEqual(test_class.pdb_file, f"{test_class.protein_id}.{test_class.file_type}") self.assertEqual(test_class.structure, "structure") self.assertEqual(test_class.substitutions_dict, "substitutions_dict") self.assertEqual(test_class.sulf_bonds, "sulf_bonds") self.assertEqual(test_class.seqres, "seqres") self.assertEqual(test_class.file_type, "cif") self.assertEqual(test_class.model, 0) self.assertEqual(test_class.chain, 'A') self.assertEqual(test_class.cys_dict, "cys_dict") self.assertEqual(test_class.gro_file, "gro_file") self.assertEqual(test_class.top_file, "top_file") self.assertEqual(test_class.tpg_file, "tpg_file") self.assertEqual(test_class.prm_file, "prm_file") self.assertEqual(test_class._ligands, [])
def test_get_ligand_list(self): test_class = protein.Protein(protein_id="test") test_class._ligands = ["a", "b", "c"] output = test_class.get_ligand_list() self.assertIs(output, test_class._ligands)
def separate_protein_ligand(self, Protein = None, Ligand = None): """Takes a Protein instance whose filename is the pdbfile with both the protein and the ligands together and returns the separated biopython structures""" def update_ligand_resnumbers(Protein = self.Protein, Ligand = self.Ligand): """Updates the ligand resnumbers that may be changed because of previous calculations""" if Ligand == None or len(get_iterable.get_iterable(Ligand)) == 0: print("I found no ligand, returning None") return None ligand_resnames = [] for lgand in get_iterable.get_iterable(Ligand): if lgand.resname not in ligand_resnames: ligand_resnames.append(lgand.resname) c = file_manipulation.SubstitutionParser() ligand_resnames = c.get_ligand_resnum(Protein = Protein, ligand_resnames = ligand_resnames, chain_model_selection = False) #now ligand resnames is in the form [[ligand_resname, ligand_resnumber], [..., ...], ...] #Creating a completely new Ligand list Ligand = [] for lgand in ligand_resnames: Ligand.append(ligand.Ligand(resname = lgand[0], pdb_file = None, structure = None, file_type = 'pdb', tpg_file = None, prm_file = None, resnum = lgand[1])) return Ligand if Protein == None: Protein = self.Protein if Ligand == None: Ligand = self.Ligand Ligand = update_ligand_resnumbers(Protein = Protein, Ligand = Ligand) #Extract the protein with ProDy, write a new pdb #and then get the Biopython protein structure tmp_protein = protein.Protein(protein_id= Protein.protein_id, pdb_file = Protein.pdb_file, file_type = Protein.file_type) tmp_protein.update_structure(struct_type = "prody") prody_select = prody.ProdySelect(structure = tmp_protein.structure) tmp_protein.structure = prody_select.protein_and_ions() tmp_protein.write(file_name = f"{Protein.protein_id}_protein.pdb", struct_type = 'prody') Protein.update_structure(struct_type = "biopython") #doing the same thing for every ligand ligand_structures = [] for i, lgand in enumerate(get_iterable.get_iterable(Ligand)): lgand.structure = prody_select.resnum(resnum = lgand.resnum) lgand.write(file_name = f"{Protein.protein_id}_lgand{i}.pdb", struct_type = 'prody') lgand.update_structure(struct_type = "biopython") ligand_structures.append(lgand.structure) return Protein.structure, ligand_structures
def __init__(self, tpg_file, prm_file, solvent_box=None, MD_program_path="orac", chain="A"): self.tpg_file = tpg_file self.prm_file = prm_file self.solvent_box = solvent_box if self.solvent_box is None: with importlib_resources.path('HPC_Drug.lib', 'only_water.pdb') as path: shutil.copy(str(path.resolve()), os.getcwd()) self.solvent_box = os.getcwd() + "/" + "only_water.pdb" self.MD_program_path = MD_program_path self.chain = chain self.orac_in_file = os.getcwd() + "/only_water_orac.in" self.output_pdb_file = "optimized_only_solvent_box.pdb" self.Protein = protein.Protein(protein_id="slvt", pdb_file=self.solvent_box, chain=self.chain, file_type="pdb") self.orient = orient.Orient(Protein=self.Protein, Ligand=[]) self.box = self._create_selfbox() self.template = [ "#&T NTHREADS 8 CACHELINE 16", "#&T NT-LEVEL1 2 CACHELINE 16", "#&T NT-LEVEL2 4 CACHELINE 16", "###############################################################", "# Minimize Crystallographic structure from PDBank", "###############################################################", "", "! this is a comment", "!! two exclamation points: system-dependent section" "! one exclamation point: system indipendent section (same for all inputs)", "#", "# Set MD cell and read pdb coordinates", "#", "&SETUP", self._write_box(), f"READ_PDB {self.solvent_box}", "&END", "#", "# reads the force fields", "#", "&PARAMETERS", f" READ_TPG_ASCII {self.tpg_file} ! protein", f" READ_PRM_ASCII {self.prm_file} ! protein", f" WRITE_TPGPRM_BIN only_water.tpgprm", " JOIN SOLVENT ! solvent", " tip3", " END", "&END", "&SOLVENT", f"ADD_UNITS {self._get_number_of_residues()}", "&END", "&SIMULATION ! simulation parameters (same for all)", " MDSIM", " TEMPERATURE 280.0 20.0", " ISOSTRESS PRESS-EXT 0.1 BARO-MASS 30.0", " THERMOS", " solute 10.0", " solvent 10.0", " cofm 10.0", " temp_limit 1000.0", " END", "&END", "&INTEGRATOR ! integration parameters (same for all)", " TIMESTEP 9.0", " MTS_RESPA", " step intra 2", " step intra 2", " step nonbond 2 5.1", " step nonbond 5 7.8 reciprocal", " step nonbond 1 10.0", " test_times OPEN G0.tt 20", " very_cold_start 0.1", " END", "&END", "&POTENTIAL !! potential parameters", self._write_EWALD_PME(), self._write_ADD_STR_COM(), " UPDATE 60.0 1.8", self._write_LINKED_CELL(), " STRETCHING HEAVY", " QQ-FUDGE 0.83333", " LJ-FUDGE 0.50", "&END", "&RUN ! run lenght (same for all)", " CONTROL 0", " PROPERTY 20000.0", " REJECT 20000.0", " TIME 6000.0", " STEER 0.0 30000.0", " PRINT 300.0", "&END", "", "#", "# write restart file every 60.0 (approximately)", "#", "&INOUT ! files I/O", " RESTART", f" write 15000.0 OPEN {self.output_pdb_file.rsplit('.', 1)[0].strip()}.rst", " END", f" ASCII 3000.0 OPEN {self.output_pdb_file}", f" PLOT STEER_ANALYTIC 500.0 OPEN {self.output_pdb_file.rsplit('.', 1)[0].strip()}.dat", "&END" ]
def test_init_raise(self): with self.assertRaises(ValueError): protein.Protein()
def execute(self): """ A pipeline that returns a clean and repaired "protein and ions" PDB and a PDB file for any not trash organic lingand starting from both a PDB, an mmCIF file or a protein id returns a Protein instance """ #If requested in input will download pdb file #If the given local file doesn't exist raises FileNotFounfError #otherwise updates self.protein_filename with the given path #all the paths are converted to absolute paths self.get_protein_file() # creating protein instance Protein = protein.Protein(protein_id = self.protein_id, pdb_file = self.protein_filename, model = self.model, chain = self.chain, file_type = self.protein_filetype, tpg_file = self.protein_tpg_file, prm_file = self.protein_prm_file) #Get Protein.substitutions_dict Protein.sulf_bonds #repairs the Protein.pdb_file #returns a list containing the resnames and resnumbers of organic ligands # [[resname, resnum], [...], ...] #if there are none will be None item Info_rep = structural_information_and_repair.InfoRepair(Protein = Protein, repairing_method = self.repairing_method) Protein, ligand_resnames_resnums = Info_rep.get_info_and_repair() #remove still present disordered atoms (if any) Protein = remove_disordered_atoms.remove_disordered_atoms(Protein = Protein) #selects only a selected model and chain (Protein.model Protein.chain) Protein = select_model_chain.select_model_chain(Protein = Protein) #if the protein was a mmCIF I convert it to PDB Protein = mmcif2pdb.mmcif2pdb(Protein = Protein) #create the Ligand instances and add them to Protein._ligands Protein = get_ligands.get_ligands(Protein = Protein, ligand_resnames_resnums = ligand_resnames_resnums) Protein.update_structure(struct_type = "prody") prody_select = prody.ProdySelect(structure = Protein.structure) #gets the protein's structure from the pdb #The only HETATM remaining are the metal ions Protein.structure = prody_select.protein_and_ions() #Write Protein only pdb Protein.write(file_name = f"{Protein.protein_id}_protein.pdb", struct_type = 'prody') #removes the remaining trash ions Protein = remove_trash_metal_ions.remove_trash_metal_ions(Protein = Protein) #quick patch, will do it better #The structure is put in the reference system of the #inertia tensor orient_obj = orient.Orient(Protein = Protein) _, _, Rot_matrix = orient_obj.calculate_moment_of_intertia_tensor() Protein.structure = orient_obj.base_change_structure() Protein.write() Ligand = Protein.get_ligand_list() for i in range(len(Ligand)): Ligand[i].update_structure(struct_type = "biopython") Ligand[i].structure = orient_obj.base_change_structure(structure = Ligand[i].structure, rot_matrix = Rot_matrix) Ligand[i].write() return Protein