Exemple #1
0
def clean_pdb_files(decoy_dir, file_list):
    cwd = os.getcwd()
    os.chdir(decoy_dir)

    for pdb_file in file_list:
        cleanATOM(pdb_file) #outputs file_name.clean.pdb for file_name.pdb

    os.chdir(cwd)
def get_pose(pdb):
    '''
	Return Pyrosetta pose
	'''

    toolbox.cleanATOM(pdb)
    pose = pose_from_pdb(pdb[:-4] + '.clean.pdb')
    return pose
def controlMut():
    from pyrosetta.toolbox import generate_resfile_from_pdb, generate_resfile_from_pose
    # generate_resfile_from_pdb('2AVX_pyrrolysine.pdb', 'my.resfile')
    #  ^ causes error  EXCN_Base::what()
    # likely due to uncleaned file?
    #
    # what if we clean first ...
    from pyrosetta.toolbox import cleanATOM
    cleanATOM('2AVX_pyrrolysine.pdb')
    generate_resfile_from_pdb('2AVX_pyrrolysine.clean.pdb','my2.resfile')
    # ^ doesnt work either causes error: ERROR: Unrecognized residue: LG1

    # The following does work.
    generate_resfile_from_pose(pose, 'my.resfile')
Exemple #4
0
    def extract(self):
        print("Starting Rosetta environment. Please wait....")
        init()
        if self.args.pdb is None or len(self.args.pdb) <= 0:
            self.p.print_help()
            return
        print("Cleaning PDB. Please Wait...")
        filename, _ = os.path.splitext(os.path.basename(self.args.pdb))
        cleanATOM(self.args.pdb)
        print("Loading cleaned PDB file. Please wait....")
        full_file_path = os.path.join(self.args.output,"{0}.clean.pdb".format(filename))
        self.pose = pose_from_pdb(full_file_path)
        if not isinstance(self.pose, Pose):
            print("PDB file doesn't contain a valid pose object.")
            self.p.print_help()
            return
        self.info = self.pose.pdb_info()
        print("Building ligand and the interacting protein.")
        if ',' in str(self.args.ligand):
            ligand = str(self.args.ligand).split(',')
        else:
            ligand = str(self.args.ligand)

        if ',' in str(self.args.protein):
            protein = str(self.args.protein).split(',')
        else:
            protein = str(self.args.protein)

        for resId in range(1, self.pose.total_residue() + 1):
            res = self.pose.residue(resId)
            chainid = self.info.chain(resId)
            if str(chainid) in ligand:
                self.ligand.append((resId, res))
            elif str(chainid) in protein:
                self.protein.append((resId, res))
            else:
                continue
        print("Building Fragment. Please Wait....")
        self.fragment = ["-" for i in range(self.pose.total_residue())]
        for i in range(0, len(self.protein)):
            fr_pos, first_residue = self.protein[i]
            for j in range(0, len(self.ligand)):
                sec_pos, second_residue = self.ligand[j]
                if self.has_noncovalent_bonding(first_residue, second_residue):
                    if not self.args.getligand:
                        self.fragment[fr_pos-1] = first_residue
                    else:
                        self.fragment[sec_pos-1] = second_residue
        self.print_fragment()
def add_hydrogens(pdb_file):
    cleaned_pdb = cleanATOM(pdb_file)
    pose = pose_from_pdb(pdb_file.split(".")[0]+".clean.pdb")
    os.remove(pdb_file)
    os.remove(pdb_file.split(".")[0]+".clean.pdb")
    pose.dump_pdb(pdb_file)
    return
Exemple #6
0
def getScore(path):
    """Calculate pyRosetta score"""
    # Clean PDB files
    cleanATOM(path)
    # Clean struct name
    cleanName = path[0:-3] + 'clean' + '.pdb'

    pose = pr.pose_from_pdb(cleanName)

    tmp = scorefxn(pose)
    energy = eTerms(pose)
    energy['ref15'] = tmp

    os.remove(cleanName)

    return energy
Exemple #7
0
def preprocess(path: str, config: dict) -> dict:
    poses = {}
    rosetta_init()
    objects = os.listdir('./obj/')
    if 'base_poses.pkl' in objects:
        file = open('./obj/base_poses.pkl', 'rb')
        poses = pk.load(file)
    else:
        file = open('./obj/base_poses.pkl', 'wb')
        for pdb in config.keys():
            cleanATOM(path + pdb + '.pdb')
            pose = pose_from_pdb(path + pdb + '.clean.pdb')
            relax.relax_pose(pose=pose, scorefxn=get_fa_scorefxn(), tag='')
            poses[pdb] = pose
        pk.dump(poses, file)
    return poses
 def treat_template_structure(self,
                              mhc_chain,
                              peptide_chain,
                              trim_mhc,
                              mhc_trim_length,
                              idealize_relax=False):
     cleanATOM(self.template_pdb)
     self.append_clean_pdb()
     if trim_mhc:
         self.trim_pdb(mhc_chain, peptide_chain, mhc_trim_length)
     self.template_pose = pose_from_pdb(self.template_pdb)
     if idealize_relax == True:
         template_pose = self.template_pose
         self.template_pose = IDEALIZE().idealize_pdb(template_pose)
         self.template_pose = RELAX().relax_pdb(template_pose)
     self.save_pdb()
def processOne(name, scorefxn):
    cleanATOM(name)
    cleanPDBName = name[:-4] + ".clean.pdb"
    start = pose_from_pdb(cleanPDBName)
    #ss
    ss = getSSString(start)
    fractionH = ss.count("H") / len(ss)
    fractionL = ss.count("L") / len(ss)
    #hbonds
    donors, acceptors, numberBonds = getHBondInfo(start)
    # fa score
    score = scorefxn(start)
    print("ON ", name)
    print("Secondary structure: ")
    print(ss)
    print("fraction H: ", fractionH, " fraction L ", fractionL,
          " number H bonds: ", numberBonds, " score: ", score)
    return ss, fractionH, fractionL, numberBonds, score
def main():

    #End points of protein helices
    border1N = 3
    border1C = 10

    border2N = 14
    border2C = 19

    border3N = 22
    border3C = 32

    borders = [[border1N, border1C], [border2N, border2C], [border3N, border3C]]

    pattern_set, torsion_angles_set = create_pattern_set()

    sequence = input("Insert sequence:")

    set_number = which_pattern_set(sequence, pattern_set)

    pyrosetta_sequence = generate_new_sequence(sequence)

    #Cleaning original protein (.pdb) file
    cleanATOM('1wy3_ww.pdb')
    pose = pose_from_sequence(''.join(pyrosetta_sequence), 'fa_standard')
    protein = pose_from_pdb('1wy3_ww.clean.pdb')

    phi, psi, omega = write_torsion_angles(protein)

    protein_sequence = list(protein.sequence())
    #Change in amino acid no. 23, NORleucine -> Leucine
    protein_sequence[23] = 'L'

    helix = int(input('Which helix do you want to swap?'))

    if(helix < 4):
        swap_helix(protein, borders, protein_sequence, pyrosetta_sequence,
                   phi, psi, omega, torsion_angles_set, set_number, helix)
    else:
        swap_all(protein, borders, protein_sequence, pyrosetta_sequence,
                 phi, psi, omega, torsion_angles_set, set_number, helix)
Exemple #11
0
def runSuiteOfMethods(name):
    # try for one, clean fa pdb adn then try relaxing it
    cleanATOM(name)
    cleanPDBName = name.replace(".pdb", ".clean.pdb")
    start = pose_from_pdb(cleanPDBName)
    # fa fast relax refinement process from trRosetta
    faName = "fastRelax_"+name.replace(".pdb","")+"_"
    score1 = faRefinementFromTrRosetta(cleanPDBName, faName+"200.pdb", 200)
    score2 = faRefinementFromTrRosetta(cleanPDBName, faName+"500.pdb", 500)
    score3 = faRefinementFromTrRosetta(cleanPDBName, faName+"1000.pdb", 1000)
    # simple min mover
    vanillaName="vanilla"+name.replace(".pdb","")+"_"
    score4 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"200.pdb", 200)
    score5 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"500.pdb", 500)
    score6 = vanillaMinimizationMovers(cleanPDBName, vanillaName+"1000.pdb", 1000)
    # classic relax (long time)
    score7 = classicRelax(cleanPDBName, "classicRelax_" +name.replace(".pdb","") +".pdb")
    #make a scores report
    #f = open("scoreReport"+name.replace(".pdb","")+".txt", "w")
    print ("original score: ")
    scorefxn = get_fa_scorefxn()
    print (scorefxn(start))
    print ( " ")
    print ("Fast relax scores: ")
    print ("200: ")
    print (score1)
    print("500: ")
    print(score2)
    print("1000: ")
    print(score3)
    print (" ")
    print ("Vanilla min mover scores: ")
    print("200: ")
    print(score4)
    print("500: ")
    print(score5)
    print("1000: ")
    print(score6)
    print(" ")
    print ("Classic relax score: ")
    print (score7)
Exemple #12
0
from tqdm import tqdm

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-d",
                        "--dir",
                        default="./",
                        help="The directory we need to clean")
    parser.add_argument("-k",
                        "--keep",
                        default=False,
                        action="store_true",
                        help="If keep the original file and rename")
    args = parser.parse_args()

    pdb_list = os.listdir(args.dir)
    pdb_list = [
        file for file in pdb_list
        if file.endswith(".pdb") and not file.startswith(".")
    ]

    for pdb in tqdm(pdb_list):
        pdb_id = pdb[:-4]
        cleanATOM(pdb)
        if args.keep:
            pass
        else:
            os.remove(os.path.join(args.dir, pdb))
            os.rename(os.path.join(args.dir, pdb_id + ".clean.pdb"),
                      os.path.join(args.dir, pdb_id + ".pdb"))
# Continuing on docking.py, now run multiple runs and store lowest energy

# import pyrosetta
from pyrosetta import *
init()

# import additional modules
ls
import rosetta.protocols.rigid as rigid_moves
from rosetta.protocols.minimization_packing import *
from pyrosetta import PyMOLMover

# import and clean pdb (superantigen + TCR + MHC)
from pyrosetta.toolbox import cleanATOM
cleanATOM("2ICW_MHC.pdb")
pose = pose_from_pdb("2ICW_MHC.clean.pdb")

# make full-atom starting pose
fa_starting = Pose()
fa_starting.assign(pose)
# make a full-atom working pose
fa_working = Pose()
fa_working.assign(pose)
# make a centroid pose
switch = SwitchResidueTypeSetMover("centroid")
switch.apply(pose)
centroid = Pose()
centroid.assign(pose)

# make score function
scorefxn = get_fa_scorefxn()
        asDict['numberHBonds'].append(numberBonds)
        # asDict['s'].append(ss)
        asDict['dataset'].append("positiveOriginalSet")
        asDict['structure'].append(file)
    # graph interesting stuff
    asDF = pd.DataFrame(asDict)  # change to dataframe for sns plotting
    # save csv
    asDF.to_csv("relevantData.csv")
    sns.violinplot(x="dataset", y="score", data=asDF)
    plt.show()
    sns.violinplot(x="dataset", y="numberHBonds", data=asDF)
    plt.show()
    ax = sns.scatterplot(x="fractionH",
                         y="fractionL",
                         hue="dataset",
                         data=asDF)
    plt.show()


if __name__ == '__main__':
    #true positive structures:
    #look at multichain hbonds
    name = "./180312_massive_set_train_pdbs/redesigned_closed_7_7_7_9middlesbobby_1_9_S_237903.pdb_middle1.pdb-7_7_7_9middlesbobby_1_9_S_254850_0001.pdb_middle1.pdb-bobby_1_9_S_246831_padded_0001.pdb"
    cleanATOM(name)
    cleanPDBName = name[:-4] + ".clean.pdb"
    start = pose_from_pdb(cleanPDBName)
    x, y = getHBondResiduesAndAtomsV2(start)
    print(x)
    print(y)
    print(start.get_hbonds())
Exemple #15
0
    outmap = numpy.real(cupy.asnumpy(cupy.transpose(outmap)))
    return outmap


# In[5]:

from matplotlib import pyplot as plt
inmrc = mrcfile.open('examples/1ye3.mrc')
plt.imshow(inmrc.data[64, :, :])

mrc = mrcfile.open('examples/1ye3.mrc')
from pyrosetta import *
import numpy as np
init()
from pyrosetta.toolbox import cleanATOM
cleanATOM("./examples/1ye3.pdb")
pose = pose_from_pdb("./examples/1ye3.clean.pdb")

etbl = read_etable('examples/etable.txt')
apix = 1

omap = sim_map(pose, mrc, apix, etbl)

# In[6]:

from matplotlib import pyplot as plt

with mrcfile.new('outputtest.mrc', overwrite=True) as mrc:
    mrc.set_data(numpy.float16((omap)))
mrc.close()
Exemple #16
0
    if (args.reso != 0):
        a_, b_, c_, d_ = resofitter(args.resofit)
    else:
        a_, b_, c_, d_ = 0, 0, 0, 0
else:
    a_, b_, c_, d_ = None, None, None, None

a_ = comm.bcast(a_, root=0)
b_ = comm.bcast(b_, root=0)
c_ = comm.bcast(c_, root=0)
d_ = comm.bcast(d_, root=0)

# Rank 0 process does pyrosetta work and gathers atom coordinates/type
if rank == 0:
    init()
    cleanATOM(args.pdb_file)
    CleanFName = args.pdb_file[:-4] + '.clean.pdb'
    pose = pose_from_pdb(CleanFName)
    AtomData = CoordSet(pose)
    print('Total atom amount: ' + str(len(AtomData)))
    chunks = [[] for _ in range(size)]
    for i, chunk in enumerate(AtomData):
        chunks[i % size].append(chunk)
else:
    AtomData = None
    chunks = None

# Separates atom sets to each process equally
AtomData = comm.scatter(chunks, root=0)
comm.Barrier()
Exemple #17
0
		s = cp.float(1/scattering_params[1])
		ampl = cp.float((1/cp.sqrt(cp.power(2*pi,3)))*(1/cp.power(s,3)))
		coords = None
		OutputArray += ((cp.float(scattering_params[0]) * cp.fft.ifftshift(ampl* cp.exp(-cp.power(pi,2)*(cp.power(ii,2)+cp.power(jj,2)+cp.power(kk,2))/(2*cp.power(s,2)) - ( (2*pi)*1j*(ii*center[0]+jj*center[1]+kk*center[2]) )) )))
		center = None
		#t2 = time.time()
		#print('Atom Addition Time: ' + str(t2-t1))
		#print('Current atom: ' + str(step))
		ToAdd, Rem, ampl, s, center, coords, scattering_params,t1,t2 = None,None,None,None,None,None,None,None,None
	OutputArray = cp.asnumpy(OutputArray)
	#print('This is the size: ' + str(OutputArray.nbytes))
	return OutputArray

mrc = mrcfile.open('EMD-7638-ASYM-COM-BOX180.mrc')
init()
cleanATOM("6cvb-1_11-ASYM-Dehydrated_modified.pdb")
pose = pose_from_pdb('6cvb-1_11-ASYM-Dehydrated_modified.clean.pdb')


print('Rank ID of this Process: ' + str(rank))

Dev = int(rank/2)

if rank == 0:
	AtomData = CoordSet(pose)
	print('Total atom amount: ' + str(len(AtomData)))
	chunks = [[] for _ in range(size)]
	for i, chunk in enumerate(AtomData):
		chunks[i%size].append(chunk)
else:
	AtomData = None
 def _preprocess(self):
     for mon_file in self.monomer_pdb:
         fname = os.path.split(mon_file)[1]
         if strip_pdb_ext(fname) not in self.written:
             shutil.copyfile(mon_file, os.path.join(self.sub_dir, fname))
             cleanATOM(os.path.join(self.sub_dir, fname))
import numpy
import scipy
import Bio
from pyrosetta import *
init()
from pyrosetta.toolbox import cleanATOM
cleanATOM("1yy8.pdb")
#from pyrosetta.toolbox import pose_from_rcsb
pose = pose_from_pdb("1YY8.clean.pdb")
print(pose)
print(pose.sequence())
print("Protein has", pose.total_residue(), "residues.")
print(pose.residue(500).name())
print(pose.pdb_info().chain(500))
print(pose.pdb_info().number(500))
print(pose.phi(5))
print(pose.psi(5))
print(pose.chi(1, 5))
R5N = AtomID(1, 5)
R5CA = AtomID(2, 5)
R5C = AtomID(3, 5)
print(pose.conformation().bond_length(R5N, R5CA))
print(pose.conformation().bond_length(R5CA, R5C))
from pyrosetta import PyMOLMover
pymol = PyMOLMover()
pymol.apply(pose)
# Calculating energy score
print("Calculating energy score of 6Q21 protein")
ras = pose_from_pdb("6q21.pdb")
print(ras)
print(ras.sequence())
import os

import pyrosetta
import rosetta
import pyrosetta.toolbox as toolbox
import pprint

pyrosetta.init()

toolbox.cleanATOM('../PDB_REDO/1DAN_HLTU.pdb')
pose = pyrosetta.pose_from_file("../PDB_REDO_Stripped/1DAN_HLTU.pdb")

pprint.pprint(dir(pyrosetta))
pprint.pprint(dir(pose))
                    (ii * center[0] + jj * center[1] + kk * center[2]))))))
        center = None
        #t2 = time.time()
        #print('Atom Addition Time: ' + str(t2-t1))
        #print('Current atom: ' + str(step))
        ToAdd, Rem, ampl, s, center, coords, t1, t2 = None, None, None, None, None, None, None, None
    OutputArray = cp.asnumpy(OutputArray)
    #print('This is the size: ' + str(OutputArray.nbytes))
    return OutputArray


# As the first benchmark, used apoF model (1.2 A res) from Scheres et. al. See https://doi.org/10.1101/2020.05.22.110189
mrc = mrcfile.open('apoF.mrc')
# Initializes pyrosetta
init()
cleanATOM("apoF__Q__emd_20521.pdb")
pose = pose_from_pdb('apoF__Q__emd_20521.clean.pdb')

print('Rank ID of this Process: ' + str(rank))

# Lines 111-132 are the major lines changed in this compared to the original sim script
# For apoF, which uses a cell of 340^3 dimensions, 3 processes can be stored onto one GPU
# If using cells with greater dimensions, you'll want to decrease line 120 to Use = 2 or 1
# Otherwise, your simulation will crash. Variable "Use" is used to determine the
# processes/gpu number, so adjust your SLURM workscript accordingly...
RankID = rank
RankID = comm.gather(RankID, root=0)
if rank == 0:
    CudList = os.environ['CUDA_VISIBLE_DEVICES'].split(',')
    RankID.sort()
    Use = 0
   dest="input",
   action="store",
   default=None,
   help="Path of the PDB file to optimize. It has to have .pdb extension")

options = parser.parse_args()

# debug the input
if options.input is None:
    raise Exception("No input provided") 	

if options.input.split('.')[-1] != 'pdb':
	raise Exception("The input has to be a .pdb file")

# clean the input file to be correct for pyrosetta
cleanATOM(options.input)
clean_path = options.input.split('.')[0]+'.clean.pdb'

# initialize the pose object that will be optimized
pose = pyrosetta.pose_from_pdb(clean_path)

# define a general energy score function for the energy optimization
scorefxn = get_fa_scorefxn()

# apply a classic relax pipeline(from P. Bradley, K. M. S. Misura & D. Baker, “Toward high-resolution de novo structure prediction for small proteins.” Science 309, 1868-1871 (2005)) 
relax = pyrosetta.rosetta.protocols.relax.ClassicRelax()
relax.set_scorefxn(scorefxn)
relax.apply(pose)

# save into a pdb file
pose.dump_pdb(options.outputfile)