def clean_pdb_files(decoy_dir, file_list): cwd = os.getcwd() os.chdir(decoy_dir) for pdb_file in file_list: cleanATOM(pdb_file) #outputs file_name.clean.pdb for file_name.pdb os.chdir(cwd)
def get_pose(pdb): ''' Return Pyrosetta pose ''' toolbox.cleanATOM(pdb) pose = pose_from_pdb(pdb[:-4] + '.clean.pdb') return pose
def controlMut(): from pyrosetta.toolbox import generate_resfile_from_pdb, generate_resfile_from_pose # generate_resfile_from_pdb('2AVX_pyrrolysine.pdb', 'my.resfile') # ^ causes error EXCN_Base::what() # likely due to uncleaned file? # # what if we clean first ... from pyrosetta.toolbox import cleanATOM cleanATOM('2AVX_pyrrolysine.pdb') generate_resfile_from_pdb('2AVX_pyrrolysine.clean.pdb','my2.resfile') # ^ doesnt work either causes error: ERROR: Unrecognized residue: LG1 # The following does work. generate_resfile_from_pose(pose, 'my.resfile')
def extract(self): print("Starting Rosetta environment. Please wait....") init() if self.args.pdb is None or len(self.args.pdb) <= 0: self.p.print_help() return print("Cleaning PDB. Please Wait...") filename, _ = os.path.splitext(os.path.basename(self.args.pdb)) cleanATOM(self.args.pdb) print("Loading cleaned PDB file. Please wait....") full_file_path = os.path.join(self.args.output,"{0}.clean.pdb".format(filename)) self.pose = pose_from_pdb(full_file_path) if not isinstance(self.pose, Pose): print("PDB file doesn't contain a valid pose object.") self.p.print_help() return self.info = self.pose.pdb_info() print("Building ligand and the interacting protein.") if ',' in str(self.args.ligand): ligand = str(self.args.ligand).split(',') else: ligand = str(self.args.ligand) if ',' in str(self.args.protein): protein = str(self.args.protein).split(',') else: protein = str(self.args.protein) for resId in range(1, self.pose.total_residue() + 1): res = self.pose.residue(resId) chainid = self.info.chain(resId) if str(chainid) in ligand: self.ligand.append((resId, res)) elif str(chainid) in protein: self.protein.append((resId, res)) else: continue print("Building Fragment. Please Wait....") self.fragment = ["-" for i in range(self.pose.total_residue())] for i in range(0, len(self.protein)): fr_pos, first_residue = self.protein[i] for j in range(0, len(self.ligand)): sec_pos, second_residue = self.ligand[j] if self.has_noncovalent_bonding(first_residue, second_residue): if not self.args.getligand: self.fragment[fr_pos-1] = first_residue else: self.fragment[sec_pos-1] = second_residue self.print_fragment()
def add_hydrogens(pdb_file): cleaned_pdb = cleanATOM(pdb_file) pose = pose_from_pdb(pdb_file.split(".")[0]+".clean.pdb") os.remove(pdb_file) os.remove(pdb_file.split(".")[0]+".clean.pdb") pose.dump_pdb(pdb_file) return
def getScore(path): """Calculate pyRosetta score""" # Clean PDB files cleanATOM(path) # Clean struct name cleanName = path[0:-3] + 'clean' + '.pdb' pose = pr.pose_from_pdb(cleanName) tmp = scorefxn(pose) energy = eTerms(pose) energy['ref15'] = tmp os.remove(cleanName) return energy
def preprocess(path: str, config: dict) -> dict: poses = {} rosetta_init() objects = os.listdir('./obj/') if 'base_poses.pkl' in objects: file = open('./obj/base_poses.pkl', 'rb') poses = pk.load(file) else: file = open('./obj/base_poses.pkl', 'wb') for pdb in config.keys(): cleanATOM(path + pdb + '.pdb') pose = pose_from_pdb(path + pdb + '.clean.pdb') relax.relax_pose(pose=pose, scorefxn=get_fa_scorefxn(), tag='') poses[pdb] = pose pk.dump(poses, file) return poses
def treat_template_structure(self, mhc_chain, peptide_chain, trim_mhc, mhc_trim_length, idealize_relax=False): cleanATOM(self.template_pdb) self.append_clean_pdb() if trim_mhc: self.trim_pdb(mhc_chain, peptide_chain, mhc_trim_length) self.template_pose = pose_from_pdb(self.template_pdb) if idealize_relax == True: template_pose = self.template_pose self.template_pose = IDEALIZE().idealize_pdb(template_pose) self.template_pose = RELAX().relax_pdb(template_pose) self.save_pdb()
def processOne(name, scorefxn): cleanATOM(name) cleanPDBName = name[:-4] + ".clean.pdb" start = pose_from_pdb(cleanPDBName) #ss ss = getSSString(start) fractionH = ss.count("H") / len(ss) fractionL = ss.count("L") / len(ss) #hbonds donors, acceptors, numberBonds = getHBondInfo(start) # fa score score = scorefxn(start) print("ON ", name) print("Secondary structure: ") print(ss) print("fraction H: ", fractionH, " fraction L ", fractionL, " number H bonds: ", numberBonds, " score: ", score) return ss, fractionH, fractionL, numberBonds, score
def main(): #End points of protein helices border1N = 3 border1C = 10 border2N = 14 border2C = 19 border3N = 22 border3C = 32 borders = [[border1N, border1C], [border2N, border2C], [border3N, border3C]] pattern_set, torsion_angles_set = create_pattern_set() sequence = input("Insert sequence:") set_number = which_pattern_set(sequence, pattern_set) pyrosetta_sequence = generate_new_sequence(sequence) #Cleaning original protein (.pdb) file cleanATOM('1wy3_ww.pdb') pose = pose_from_sequence(''.join(pyrosetta_sequence), 'fa_standard') protein = pose_from_pdb('1wy3_ww.clean.pdb') phi, psi, omega = write_torsion_angles(protein) protein_sequence = list(protein.sequence()) #Change in amino acid no. 23, NORleucine -> Leucine protein_sequence[23] = 'L' helix = int(input('Which helix do you want to swap?')) if(helix < 4): swap_helix(protein, borders, protein_sequence, pyrosetta_sequence, phi, psi, omega, torsion_angles_set, set_number, helix) else: swap_all(protein, borders, protein_sequence, pyrosetta_sequence, phi, psi, omega, torsion_angles_set, set_number, helix)
def runSuiteOfMethods(name): # try for one, clean fa pdb adn then try relaxing it cleanATOM(name) cleanPDBName = name.replace(".pdb", ".clean.pdb") start = pose_from_pdb(cleanPDBName) # fa fast relax refinement process from trRosetta faName = "fastRelax_"+name.replace(".pdb","")+"_" score1 = faRefinementFromTrRosetta(cleanPDBName, faName+"200.pdb", 200) score2 = faRefinementFromTrRosetta(cleanPDBName, faName+"500.pdb", 500) score3 = faRefinementFromTrRosetta(cleanPDBName, faName+"1000.pdb", 1000) # simple min mover vanillaName="vanilla"+name.replace(".pdb","")+"_" score4 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"200.pdb", 200) score5 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"500.pdb", 500) score6 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"1000.pdb", 1000) # classic relax (long time) score7 = classicRelax(cleanPDBName, "classicRelax_" +name.replace(".pdb","") +".pdb") #make a scores report #f = open("scoreReport"+name.replace(".pdb","")+".txt", "w") print ("original score: ") scorefxn = get_fa_scorefxn() print (scorefxn(start)) print ( " ") print ("Fast relax scores: ") print ("200: ") print (score1) print("500: ") print(score2) print("1000: ") print(score3) print (" ") print ("Vanilla min mover scores: ") print("200: ") print(score4) print("500: ") print(score5) print("1000: ") print(score6) print(" ") print ("Classic relax score: ") print (score7)
from tqdm import tqdm if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-d", "--dir", default="./", help="The directory we need to clean") parser.add_argument("-k", "--keep", default=False, action="store_true", help="If keep the original file and rename") args = parser.parse_args() pdb_list = os.listdir(args.dir) pdb_list = [ file for file in pdb_list if file.endswith(".pdb") and not file.startswith(".") ] for pdb in tqdm(pdb_list): pdb_id = pdb[:-4] cleanATOM(pdb) if args.keep: pass else: os.remove(os.path.join(args.dir, pdb)) os.rename(os.path.join(args.dir, pdb_id + ".clean.pdb"), os.path.join(args.dir, pdb_id + ".pdb"))
# Continuing on docking.py, now run multiple runs and store lowest energy # import pyrosetta from pyrosetta import * init() # import additional modules ls import rosetta.protocols.rigid as rigid_moves from rosetta.protocols.minimization_packing import * from pyrosetta import PyMOLMover # import and clean pdb (superantigen + TCR + MHC) from pyrosetta.toolbox import cleanATOM cleanATOM("2ICW_MHC.pdb") pose = pose_from_pdb("2ICW_MHC.clean.pdb") # make full-atom starting pose fa_starting = Pose() fa_starting.assign(pose) # make a full-atom working pose fa_working = Pose() fa_working.assign(pose) # make a centroid pose switch = SwitchResidueTypeSetMover("centroid") switch.apply(pose) centroid = Pose() centroid.assign(pose) # make score function scorefxn = get_fa_scorefxn()
asDict['numberHBonds'].append(numberBonds) # asDict['s'].append(ss) asDict['dataset'].append("positiveOriginalSet") asDict['structure'].append(file) # graph interesting stuff asDF = pd.DataFrame(asDict) # change to dataframe for sns plotting # save csv asDF.to_csv("relevantData.csv") sns.violinplot(x="dataset", y="score", data=asDF) plt.show() sns.violinplot(x="dataset", y="numberHBonds", data=asDF) plt.show() ax = sns.scatterplot(x="fractionH", y="fractionL", hue="dataset", data=asDF) plt.show() if __name__ == '__main__': #true positive structures: #look at multichain hbonds name = "./180312_massive_set_train_pdbs/redesigned_closed_7_7_7_9middlesbobby_1_9_S_237903.pdb_middle1.pdb-7_7_7_9middlesbobby_1_9_S_254850_0001.pdb_middle1.pdb-bobby_1_9_S_246831_padded_0001.pdb" cleanATOM(name) cleanPDBName = name[:-4] + ".clean.pdb" start = pose_from_pdb(cleanPDBName) x, y = getHBondResiduesAndAtomsV2(start) print(x) print(y) print(start.get_hbonds())
outmap = numpy.real(cupy.asnumpy(cupy.transpose(outmap))) return outmap # In[5]: from matplotlib import pyplot as plt inmrc = mrcfile.open('examples/1ye3.mrc') plt.imshow(inmrc.data[64, :, :]) mrc = mrcfile.open('examples/1ye3.mrc') from pyrosetta import * import numpy as np init() from pyrosetta.toolbox import cleanATOM cleanATOM("./examples/1ye3.pdb") pose = pose_from_pdb("./examples/1ye3.clean.pdb") etbl = read_etable('examples/etable.txt') apix = 1 omap = sim_map(pose, mrc, apix, etbl) # In[6]: from matplotlib import pyplot as plt with mrcfile.new('outputtest.mrc', overwrite=True) as mrc: mrc.set_data(numpy.float16((omap))) mrc.close()
if (args.reso != 0): a_, b_, c_, d_ = resofitter(args.resofit) else: a_, b_, c_, d_ = 0, 0, 0, 0 else: a_, b_, c_, d_ = None, None, None, None a_ = comm.bcast(a_, root=0) b_ = comm.bcast(b_, root=0) c_ = comm.bcast(c_, root=0) d_ = comm.bcast(d_, root=0) # Rank 0 process does pyrosetta work and gathers atom coordinates/type if rank == 0: init() cleanATOM(args.pdb_file) CleanFName = args.pdb_file[:-4] + '.clean.pdb' pose = pose_from_pdb(CleanFName) AtomData = CoordSet(pose) print('Total atom amount: ' + str(len(AtomData))) chunks = [[] for _ in range(size)] for i, chunk in enumerate(AtomData): chunks[i % size].append(chunk) else: AtomData = None chunks = None # Separates atom sets to each process equally AtomData = comm.scatter(chunks, root=0) comm.Barrier()
s = cp.float(1/scattering_params[1]) ampl = cp.float((1/cp.sqrt(cp.power(2*pi,3)))*(1/cp.power(s,3))) coords = None OutputArray += ((cp.float(scattering_params[0]) * cp.fft.ifftshift(ampl* cp.exp(-cp.power(pi,2)*(cp.power(ii,2)+cp.power(jj,2)+cp.power(kk,2))/(2*cp.power(s,2)) - ( (2*pi)*1j*(ii*center[0]+jj*center[1]+kk*center[2]) )) ))) center = None #t2 = time.time() #print('Atom Addition Time: ' + str(t2-t1)) #print('Current atom: ' + str(step)) ToAdd, Rem, ampl, s, center, coords, scattering_params,t1,t2 = None,None,None,None,None,None,None,None,None OutputArray = cp.asnumpy(OutputArray) #print('This is the size: ' + str(OutputArray.nbytes)) return OutputArray mrc = mrcfile.open('EMD-7638-ASYM-COM-BOX180.mrc') init() cleanATOM("6cvb-1_11-ASYM-Dehydrated_modified.pdb") pose = pose_from_pdb('6cvb-1_11-ASYM-Dehydrated_modified.clean.pdb') print('Rank ID of this Process: ' + str(rank)) Dev = int(rank/2) if rank == 0: AtomData = CoordSet(pose) print('Total atom amount: ' + str(len(AtomData))) chunks = [[] for _ in range(size)] for i, chunk in enumerate(AtomData): chunks[i%size].append(chunk) else: AtomData = None
def _preprocess(self): for mon_file in self.monomer_pdb: fname = os.path.split(mon_file)[1] if strip_pdb_ext(fname) not in self.written: shutil.copyfile(mon_file, os.path.join(self.sub_dir, fname)) cleanATOM(os.path.join(self.sub_dir, fname))
import numpy import scipy import Bio from pyrosetta import * init() from pyrosetta.toolbox import cleanATOM cleanATOM("1yy8.pdb") #from pyrosetta.toolbox import pose_from_rcsb pose = pose_from_pdb("1YY8.clean.pdb") print(pose) print(pose.sequence()) print("Protein has", pose.total_residue(), "residues.") print(pose.residue(500).name()) print(pose.pdb_info().chain(500)) print(pose.pdb_info().number(500)) print(pose.phi(5)) print(pose.psi(5)) print(pose.chi(1, 5)) R5N = AtomID(1, 5) R5CA = AtomID(2, 5) R5C = AtomID(3, 5) print(pose.conformation().bond_length(R5N, R5CA)) print(pose.conformation().bond_length(R5CA, R5C)) from pyrosetta import PyMOLMover pymol = PyMOLMover() pymol.apply(pose) # Calculating energy score print("Calculating energy score of 6Q21 protein") ras = pose_from_pdb("6q21.pdb") print(ras) print(ras.sequence())
import os import pyrosetta import rosetta import pyrosetta.toolbox as toolbox import pprint pyrosetta.init() toolbox.cleanATOM('../PDB_REDO/1DAN_HLTU.pdb') pose = pyrosetta.pose_from_file("../PDB_REDO_Stripped/1DAN_HLTU.pdb") pprint.pprint(dir(pyrosetta)) pprint.pprint(dir(pose))
(ii * center[0] + jj * center[1] + kk * center[2])))))) center = None #t2 = time.time() #print('Atom Addition Time: ' + str(t2-t1)) #print('Current atom: ' + str(step)) ToAdd, Rem, ampl, s, center, coords, t1, t2 = None, None, None, None, None, None, None, None OutputArray = cp.asnumpy(OutputArray) #print('This is the size: ' + str(OutputArray.nbytes)) return OutputArray # As the first benchmark, used apoF model (1.2 A res) from Scheres et. al. See https://doi.org/10.1101/2020.05.22.110189 mrc = mrcfile.open('apoF.mrc') # Initializes pyrosetta init() cleanATOM("apoF__Q__emd_20521.pdb") pose = pose_from_pdb('apoF__Q__emd_20521.clean.pdb') print('Rank ID of this Process: ' + str(rank)) # Lines 111-132 are the major lines changed in this compared to the original sim script # For apoF, which uses a cell of 340^3 dimensions, 3 processes can be stored onto one GPU # If using cells with greater dimensions, you'll want to decrease line 120 to Use = 2 or 1 # Otherwise, your simulation will crash. Variable "Use" is used to determine the # processes/gpu number, so adjust your SLURM workscript accordingly... RankID = rank RankID = comm.gather(RankID, root=0) if rank == 0: CudList = os.environ['CUDA_VISIBLE_DEVICES'].split(',') RankID.sort() Use = 0
dest="input", action="store", default=None, help="Path of the PDB file to optimize. It has to have .pdb extension") options = parser.parse_args() # debug the input if options.input is None: raise Exception("No input provided") if options.input.split('.')[-1] != 'pdb': raise Exception("The input has to be a .pdb file") # clean the input file to be correct for pyrosetta cleanATOM(options.input) clean_path = options.input.split('.')[0]+'.clean.pdb' # initialize the pose object that will be optimized pose = pyrosetta.pose_from_pdb(clean_path) # define a general energy score function for the energy optimization scorefxn = get_fa_scorefxn() # apply a classic relax pipeline(from P. Bradley, K. M. S. Misura & D. Baker, “Toward high-resolution de novo structure prediction for small proteins.” Science 309, 1868-1871 (2005)) relax = pyrosetta.rosetta.protocols.relax.ClassicRelax() relax.set_scorefxn(scorefxn) relax.apply(pose) # save into a pdb file pose.dump_pdb(options.outputfile)