def __init__(self, pdb, identifier=0, path=""): """ :param pdb: :type string: pdb ID of the protein to be folded. """ self.gen_last = 0 # Stores generation for which energy score was last calculated self.lowest = 100000000 # Lowest score observed # Rosetta inits # This has ended up only using rosetta to get the sequence of the pdb file, which is of course not necessary rosetta.init() # Initialize rosetta libraries pose_native = pose_from_rcsb(pdb) # Create rosetta pose of natively folded protein from pdb file self.sequence = pose_native.sequence() # Get sequence of protein self.id = identifier # Id of process self.rot_iter = 200 # Number of iterations to try to resolve side chain clashes self.rot_mover_size = 5 # Size of rotamer mover self.new_conf = False # Switch to build with new rotamer conformations self.mod_dict = {} # Dictionary of modified rotamers self.path = path self.c_size = len(self.sequence)*2 # Number of residues * 2 (phi and psi for each residue) # Ecspy inits rand = Random() rand.seed(int(time())) self.es = swarm.PSO(rand) # Create ecspy evolution strategy seeded with current time self.bounder = ec.Bounder(-180, 180) self.es.topology = topologies.star_topology
def add_cid_to_database(cid, name): # change directory to the custom database start_dir = os.getcwd() # if it does not exist if not os.path(database + fa_standard + fa_custom): # make a "custom" directory os.chdir(database + fa_standard + 'residue_types') os.mkdir('custom') # edit residue_type_sets.txt os.chdir(database + fa_standard) f = open('residue_type_sets.txt', 'w') data = f.readlines() data.append('\n## Custom\n') f.readlines(data) f.close() os.chdir(database + fa_standard + fa_custom) # get the ligand params_from_pubchem(cid, name) # add the ligand to residue_type_sets.txt os.chdir(database + fa_standard) f = open('residue_type_sets.txt', 'w') data = f.readlines() data.append(fa_custom + '/' + name + '.params\n') f.readlines(data) f.close() # return to original dir os.chdir(start) # reinitialize init()
def import_rosetta(): if not import_rosetta.imported: try: global rosetta,mutants import rosetta import lib.mutants as mutants rosetta.init() import_rosetta.imported = True except: logger.critical("PyRosetta not found. Rosetta utilities are unavailable.\n")
def testLoadStruct(self): """ Test load_rosetta against read_PDB""" init() pose = pose_from_sequence(3*'A') struct = load_rosetta(pose) pdb = read_PDB(get_fn('ala_ala_ala.pdb')) self.assertEqual(len(struct.atoms), len(pdb.atoms)) self.assertEqual(len(struct.residues), len(pdb.residues))
def main(): args = sys.argv in_file = args[1] out_file = args[2] distance_cutoff = float(sys.argv[3]) init(extra_options='-mute basic -mute core -mute protocols -mute Warning') all_lines = (open(in_file, 'r')).readlines() print(len(all_lines)) #get the protein used to initalize the forward simulation initial_pose = pose_from_pdb(str('burn1ABC_renumb.pdb')) #save each one of its changes chains=initial_pose.split_by_chain() ancestral1 = chains[1] ancestral2 = chains[2] ancestral3 = chains[3] ancestral1.dump_pdb("Ans_A.pdb") ancestral2.dump_pdb("Ans_B.pdb") ancestral3.dump_pdb("Ans_C.pdb") ancestral_structure1=capture_pdb_one("Ans_A_cap.pdb","Ans_A.pdb") ancestral_structure2=capture_pdb_one("Ans_B_cap.pdb","Ans_B.pdb") ancestral_structure3=capture_pdb_one("Ans_C_cap.pdb","Ans_C.pdb") all_data = [] i=0 for a_line in all_lines: split = a_line.split(',') if split[0] == 'Variant': continue if split[0] == 'WT': continue else: print(split[0]) pos=re.sub("[^0-9^.]", "", split[0]) #figure out if a position is in A B or C print(pos) if int(pos) <= ancestral1.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'A']) i=i+1 if int(pos) > ancestral1.total_residue() and int(pos) <= ancestral1.total_residue()+ancestral2.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'B']) i=i+1 if int(pos) > ancestral1.total_residue()+ancestral2.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'C']) i=i+1
def load_pdb(self, pdb_name): # store list of ligand params files lig_params = [] for f in os.listdir(self.working_dir): if f.endswith(".params"): lig_params.append(f) # extra options string creation ext_opts = "-mute basic -mute core -ignore_waters True" for param in lig_params: ext_opts = ext_opts + " -in:file:extra_res_fa %s" % param print "Initializing Rosetta with the following options:", ext_opts from rosetta import init init(extra_options=ext_opts) self.pose = pose_from_pdb(pdb_name)
def testLoadedPositions(self): """ Test that positions were properly loaded""" init() seq = 3*'A' pose = pose_from_sequence(seq) struct = load_rosetta(pose) posexyz = list( chain(*[[tuple(atom.xyz()) for atom in res.atoms()] for res in [pose.residue(idx) for idx in range(1, len(seq)+1)]])) structxyz = [(atom.xx, atom.xy, atom.xz) for atom in struct.atoms] self.assertEqual(posexyz, structxyz)
def test_loaded_positions(self): """ Test that positions were properly loaded""" init() seq = 3 * 'A' pose = pose_from_sequence(seq) struct = load_rosetta(pose) posexyz = list( chain(*[[tuple(atom.xyz()) for atom in res.atoms()] for res in [pose.residue(idx) for idx in range(1, len(seq) + 1)]])) structxyz = [(atom.xx, atom.xy, atom.xz) for atom in struct.atoms] self.assertEqual(posexyz, structxyz)
def test_loaded_topology(self): """ Test load_rosetta against OpenMM topology""" init() pose = pose_from_sequence(3 * 'A') struct = load_rosetta(pose) pdb = PDBFile(get_fn('ala_ala_ala.pdb')) self.assertEqual(len(list(struct.topology.atoms())), len(list(pdb.topology.atoms()))) self.assertEqual(len(list(struct.topology.bonds())), len(list(pdb.topology.bonds()))) self.assertEqual(len(list(struct.topology.residues())), len(list(pdb.topology.residues())))
def load_pdb(self, pdb_name): # store list of ligand params files lig_params = [] for f in os.listdir( self.working_dir ): if f.endswith( ".params" ): lig_params.append( f ) # extra options string creation ext_opts = "-mute basic -mute core -ignore_waters True" for param in lig_params: ext_opts = ext_opts + " -in:file:extra_res_fa %s" %param print "Initializing Rosetta with the following options:", ext_opts from rosetta import init init(extra_options=ext_opts) self.pose = pose_from_pdb( pdb_name )
def testLoadedTopology(self): """ Test load_rosetta against OpenMM topology""" init() pose = pose_from_sequence(3*'A') struct = load_rosetta(pose) pdb = PDBFile(get_fn('ala_ala_ala.pdb')) self.assertEqual(_unpackLen(struct.topology.atoms()), _unpackLen(pdb.topology.atoms())) self.assertEqual(_unpackLen(struct.topology.bonds()), _unpackLen(pdb.topology.bonds())) self.assertEqual(_unpackLen(struct.topology.residues()), _unpackLen(pdb.topology.residues()))
def __init__(self): self.__residueSet = None self.__scoreRosetta = 0 self.__ligandFile = None self.__ligandName = "NONAME" self.__bestLigand = None self.__bestScore = 999999.9 self.__isBest = False self.__pdbStr = [] self.__scorefxn = None 'Files Paths' self.__resDir = os.path.join(ROOTPATH, 'results') self.__molDir = os.path.join(ROOTPATH, 'others') self.__tmpDir = os.path.join(ROOTPATH, 'temp') 'Init Rosetta' rosetta.init()
def main(): opts, args = getopt.getopt(sys.argv[3:], 'i') show_index = 0 for o in opts: if '-i' in o: show_index = 1 rosetta.init() wtName = sys.argv[1] compareName = sys.argv[2] outputName = wtName.split('.')[0] + '_vs_' + compareName.split('.')[0] + ".txt" pose1 = rosetta.pose_from_pdb(wtName) pose2 = rosetta.pose_from_pdb(compareName) use_me = True if pose1.total_residue() != pose2.total_residue(): print "Residue number not equal", pose1.total_residue(), \ pose2.total_residue() use_me = False else: output = open(outputName, 'w') total_residue = pose1.total_residue() kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1)) # RMSD calculated by my own function for i in range(1, total_residue + 1): calculateRMS(pose1, pose2, i, output, show_index) # RMSD calculated by PyRosetta ro_rmsd = rosetta.CA_rmsd(pose1, pose2) print "rosetta generated rmsd: " + str(ro_rmsd) if use_me: global total_square me_rmsd = math.sqrt(total_square / total_residue) print "me generated rmsd: " + str(me_rmsd) output.write(outputName.split('.')[0] + ":\t" + str(ro_rmsd)) output.close() print "Done"
def __init__(self, pdb, mc_temperature=1, identifier=0, local=True, path=""): """ :param pdb: PDB id of protein to be folded. :param mc_temperature: Temperature for MC simulation. :param identifier: ID of process. :param local: :type boolean: If True, test moves per residue. """ self.gen_last = 0 # Stores generation for which energy score was last calculated self.lowest = 100000000 # Lowest score observed self.conformation = [] # Current conformation of protein self.mover_size = 20 # Degrees to move at each step self.current_score = 0 # The current energy at any state self.steps = 0 # Number of steps elapsed self.max_steps = 1000000 # Maximum number of steps to run simulation self.temperature = mc_temperature # Monte carlo temperature self.accepted = 1.0 # Number of MC steps accepted self.rejected = 1.0 # Number of MC steps rejected self.id = identifier # Id of process, for running multiple simultaneously self.threshold = 1 # Threshold counter for increasing mover range self.score_10 = 10000000 # The lowest score 10 steps ago self.local = local # Whether to use local mover (global otherwise) self.rot_conformation = [] # Container for rotamer conformations self.rot_conf_local = [] # Container for rotamer conformation within loop self.rot_iter = 100 # Number of iterations to try to resolve side chain clashes self.rot_mover_size = 5 # Size of rotamer mover self.new_conf = False # Switch to build with new rotamer conformations self.mod_dict = {} # Dicitonary of modified rotamers self.path = path # Path to directory to operate in # Rosetta inits # This has ended up only using rosetta for getting the sequence from pdb, which is of course not necessary rosetta.init() # Initialize rosetta libraries pose_native = pose_from_rcsb(pdb) # Create rosetta pose of natively folded protein from pdb file self.sequence = pose_native.sequence() # Get sequence of protein self.c_size = len(self.sequence)*2 # Number of residues * 2 (phi and psi for each residue)
def initialize_rosetta( constant_seed = False, debug = False ): ''' Initialize Rosetta and mute basic, core, and protocols. If constant_seed == True, use default constant seed 1111111 If debug == True, use default constant seed and do not mute Rosetta ''' # imports from rosetta import init print "Initializing Rosetta with sugar flags" # makes Rosetta quiet and sugar I/O ready #init( extra_options="-mute basic -mute core -mute protocols -include_sugars -override_rsd_type_limit -read_pdb_link_records -write_pdb_link_records" ) if constant_seed: init( extra_options="-mute basic -mute core -mute protocols -include_sugars -override_rsd_type_limit -write_pdb_link_records -constant_seed" ) elif debug: init( extra_options="-include_sugars -override_rsd_type_limit -write_pdb_link_records -constant_seed -out:level 400" ) else: init( extra_options="-mute basic -mute core -mute protocols -include_sugars -override_rsd_type_limit -write_pdb_link_records" )
# ''' # libraries from multiprocessing import Pool import numpy as np import subprocess import argparse import glob import time import sys import os import re if '-h' not in sys.argv: import rosetta # rosetta.init() rosetta.init(extra_options = " -ex1 -ex2 -no_optH false -use_input_sc ") # -mute basic -mute core from rosetta.protocols.simple_moves import symmetry from rosetta.utility import ostream # from rosetta.protocols import grafting ''' ### ''' def minimize_pose_backbone( (Pose, ScoreFunction) ): MinPose = Pose.clone() Movemap = rosetta.MoveMap() Movemap.set_bb(True) Movemap.set_chi(True) MinimizationMover = rosetta.MinMover()
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 1500 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() #change these for more or less flexability mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Set threshold for selection threshold = pre_pre_packing_score / 2 data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start sim i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while (res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while (proposed_res == res.name1()): new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #make the mutation #this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it. mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) # Assuming 1000 burn in phase, take this if out if you want to store everything if i > 1000: #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") pdb_name = str(i) + ".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename print 'program takes %f' % (t1 - t0)
parser.add_argument('--max_angle', type=float, default=30.0, help='the maximum angle for a backbone move') parser.add_argument('--Hbond_weight', type=float, default=2.0, help='the scoring weight for hydrogen bonds') parser.add_argument('--min_type', default='linmin', choices=['linmin', 'dfpmin'], help='the type of minimization') args = parser.parse_args() # Initialize Rosetta. init(extra_options='-include_sugars -override_rsd_type_limit -mute all') # Format sequence. sequence = args.monosaccharide_name + ("-" + args.linkage + "-" + \ args.monosaccharide_name) * (args.n_residues-1) # Create pose. print 'Creating pose with sequence:', sequence pose = pose_from_saccharide_sequence(sequence) # Set filenames. output_base_filename = args.monosaccharide_name + '-' + \ str(args.n_residues) + '-mer' # Run protocol. refine_saccharide(pose, args, output_base_filename)
#if args.pm: # pm.send_hbonds(pose) if __name__ == '__main__': # Parse arguments. args = parse_arguments() # Initialize Rosetta. print '\nInitializing Rosetta...' init(extra_options='-include_sugars ' '-write_pdb_link_records ' '-mute basic -mute numeric -mute utility ' '-mute core -mute protocols ' '-missing_density_to_jump ' '-default_max_cycles 200 ' '-default_repeats 30 ' '-set_weights cart_bonded 0.05 pro_close 0 ' '-minimize_bond_angles ' '-minimize_bond_lengths ' '-nonideal ') # Create pose. print 'Loading pose from:', args.pdb_filename pose = pose_from_pdb(args.pdb_filename) # Set up ScoreFunction. if args.mm: sf = create_score_function('mm_std') else: sf = get_fa_scorefxn()
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] out_file = args[2] score_type = int(args[3]) #set up timer to figure out how long the code took to run t0=time() # Initialize Rosetta. init(extra_options='-mute basic -mute core -mute protocol -mute warn') # Constants PACK_RADIUS = 5 #Amino acids, notice there is no C AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") #Number of mutations to accept max_accept_mut = 2000 #Population size N = 1 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n'] initial_pose = pose_from_pdb(pdb_file) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap This is where you turn the bb and side chain flexibility on and off mm = MoveMap() mm.set_bb(False) #Get the init score of the struct to calc the threshold pre_pre_packing_score = sf(initial_pose) print(pre_pre_packing_score) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') cp_init_pdb = Pose() cp_init_pdb.assign(initial_pose) chains=cp_init_pdb.split_by_chain() #split up AB inter and AC inter initial_poseAB = Pose() initial_poseAB.assign(initial_pose) initial_poseAC = Pose() initial_poseAC.assign(initial_pose) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) #score the inital stabs of each chain wt_a=sf(chains[1]) wt_b=sf(chains[2]) wt_c=sf(chains[3]) #score the intial interfaces inter_AB=InterfaceEnergy_split(initial_poseAB) inter_AC=InterfaceEnergy_split(initial_poseAC) #init thresholds set to half of the init stabilities, if you want to do a different protein change these threshold_a=-138.41754752 threshold_b=-61.378619136 threshold_c=-61.378619136 threshold_inter_ab=-10.3726691079 threshold_inter_ac=-10.3726691079 data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n') #check the inital starting score init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) print(init_score) #number of residues to select from n_res = initial_pose.total_residue() print(n_res) #start sim i=0 gen=0 while i < max_accept_mut: #update the number of generations that have pased gen+=1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #mut_location = random.randint(1, 10) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while(res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to toname = res.name1() new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while(proposed_res == res.name1()): new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #init mutant with current mutant_pose = Pose() mutant_pose.assign(initial_pose) #mutate mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) #get the probability that the mutation will be accepted probability = calc_prob_scores(mut_score['score'], init_score['score'], N) rand = random.random() #test to see if mutation is accepted if float(rand) < float(probability): print "accepted" #make a name for the new mutant variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res) # Assuming some burn in phase, make this zero if you want to store everything if i>=0: #save name and energy change data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n") #save the new accepted mutation pdb_name=str(i)+".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose init_score = mut_score #update number of accepts i+=1
PACK_RADIUS = 10.0 AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") # Parse arguments. parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('pdb_filename', help='the filename of the PDB structure to be evaluated') parser.add_argument('-m', '--minimize', action='store_true', help='flag to perform minimization after each mutation') args = parser.parse_args() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Prepare data headers. data = ['Variant,Rosetta Score,"delta-delta-G"\n'] # Load pdb file. initial_pose = pose_from_pdb(args.pdb_filename) # Set up ScoreFunction. sf = get_fa_scorefxn() # Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True)
#!/usr/bin/env python import sys, os, re import gzip from sys import argv from Bio.PDB import PDBParser, Selection, PDBIO, CaPPBuilder, Select parser = PDBParser(PERMISSIVE=1,QUIET=True) io = PDBIO() ppb = CaPPBuilder() import rosetta rosetta.init(extra_options = "-database /TCRmodeller/programs/PyRosetta/database -mute basic -mute core -mute protocols -renumber_pdb -ignore_unrecognized_res -ignore_zero_occupancy false") import rosetta.protocols.grafting as graft script, pdbcode, chainida, chainidb = argv class nonHetSelect(Select): def accept_residue(self,residue): if residue.id[0] == ' ': return 1 else: return 0 gzpdbfile_path = "/TCRmodeller/PDB_RELEASE/pdb_structures" + '/%s/pdb%s.ent.gz' %(pdbcode[1:3], pdbcode) gzpdbfile = gzip.open(gzpdbfile_path, 'rb') pdbfile = parser.get_structure("PDB", gzpdbfile) mychaina = pdbfile[0][chainida] io.set_structure(mychaina) io.save('tmpa.pdb', nonHetSelect()) faseqa = ""
#!/usr/bin/env python import os, sys from sys import argv import commands script, infile = argv pdbfile = os.path.splitext(os.path.basename(infile))[0] import rosetta rosetta.init(extra_options = "-database /TCRmodeller/programs/PyRosetta/database -no_optH true") def run_ala_scan(infile, resfile, chainstomove): outfile = resfile+".out" commandline = '~/Rosetta/main/source/bin/rosetta_scripts.macosclangrelease -parser:protocol ~/Desktop/tmp/alascan_sample.xml -parser:view -inout:dbms:mode sqlite3 -inout:dbms:database_name rosetta_output.db3 -no_optH true -s %s -parser:script_vars pathtoresfile=%s chainstomove=%s -overwrite > %s' % (infile, resfile, chainstomove, outfile) res, output = commands.getstatusoutput(commandline) command2 = 'grep "protocols.features.DdGFeatures: Residue " %s | awk \'{print $NF}\'' % (outfile) result, res_ddG = commands.getstatusoutput(command2) return res_ddG def print_resfile(start, end, p, infile, pdbfile, chainid): for x in xrange(int(start), int(end)+1): posenum = p.pdb_info().pdb2pose(chainid,x) if posenum == 0: print "tagu", pdbfile, chainid, x, "NA", "NA", "NA" continue resfile = pdbfile+"."+str(x)+"."+chainid+".mutation.resfile" fo = open(resfile, "w") if not p.residue(posenum).name1() == "A": mut_res = "A"
# ''' # libraries from multiprocessing import Pool import numpy as np import subprocess import argparse import glob import time import sys import os import re if '-h' not in sys.argv: import rosetta # rosetta.init() rosetta.init(extra_options=" -ex1 -ex2 -no_optH false -use_input_sc " ) # -mute basic -mute core from rosetta.protocols.simple_moves import symmetry from rosetta.utility import ostream # from rosetta.protocols import grafting ''' ### ''' def minimize_pose_backbone((Pose, ScoreFunction)): MinPose = Pose.clone() Movemap = rosetta.MoveMap() Movemap.set_bb(True) Movemap.set_chi(True) MinimizationMover = rosetta.MinMover() MinimizationMover.movemap(Movemap)
# Merge input files. print '\nGenerating starting structure file...', new_filename = merge_pdb_files(args.pdb_filename1, args.pdb_filename2, args.output_folder) print ' Success! Created new file:', new_filename # Initialize Rosetta. print '\nInitializing Rosetta...' init( extra_options='-include_sugars -include_lipids ' '-write_pdb_link_records ' '-mute basic -mute numeric -mute utility ' '-mute core -mute protocols ' #'-run:constant_seed ' #'-run:jran 618450550 ' #'-out:levels protocols.simple_moves.MinMover:500 ' #'-out:levels core.optimization.AtomTreeMinimizer:500 ' #'-out:levels core.optimization.Minimizer:500 ' #'-out:levels protocols.moves.RigidBodyMover:200 ' #'-out:levels core.optimize:500 ' #'-out:levels core.optimization.LineMinimizer:500 ' #'-out:levels protocols.simple_moves.PackRotamersMover:500 ' #'-out:levels core.pose:500 -out:levels core.io.pdb.file_data:500 -out:levels core.import_pose.import_pose:500' ) # Create pose. print '\nGenerating starting pose...' starting_pose = pose_from_pdb(new_filename) # Display stating pose. if args.pm: starting_pose.pdb_info().name(args.pdb_filename1[:-4] + "-" + \
from multiprocessing import Pool from scipy import spatial import itertools import numpy as np import subprocess import argparse import glob import sys import os import re if '-h' not in sys.argv: import rosetta # rosetta.init() rosetta.init(extra_options="-mute basic -mute core -mute protocols") from rosetta.protocols import grafting # from repo import solenoid_tools # ''' def dump_many_poses(IterablePoses, Tag): for i, Pose in enumerate(IterablePoses): rosetta.dump_pdb(Pose, '%s_n%d.pdb' % (Tag, (1 + i))) def fuse(Pose1, Pose2, SubsetSize=2): # Should continue to fiddle with the hardcoded var below,
def main(): #read in the file made by the forward sim args = sys.argv inputfile = args[1] data = open(inputfile) first_line = data.readlines()[1] var_line=first_line.split(',') start_stab=var_line[1] #the first entry in the file is the wild type structure, calc the threshold using this threshold=float(start_stab)+10 print(threshold) # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 0 #Population size N = 100 #Beta (temp term) beta = .6 #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') #Prepare data headers data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores=open(inputfile) #get just the mutation we want to revert to lines= variant_scores.readlines() var_line=lines[500] #gets the Nth line how ever long you want the burn to be print "staring here", var_line var_line=var_line.split(',')[0] var_loc=int(filter(str.isdigit, var_line)) var_rev=var_line[:1] gen=1 #get all the pdb files sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort) sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. for i in range(1,len(sort_list)-30): step=-15 #calc reversion for next 15 moves for infile in sort_list[i:i+31]: #for each mutation var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be var_line=var_line.split(',')[0] print(var_line) var_loc=int(filter(str.isdigit, var_line)) var_rev="" old="" if(step<0): var_rev=var_line[len(var_line)-1:len(var_line)] old=var_line[:1] else: var_rev=var_line[:1] old=var_line[len(var_line)-1:len(var_line)] print "Current File Being Processed is: " + infile print "revering to:", var_rev print "at:", var_loc #get the pdb you want to revert and make the reversion initial_pose = pose_from_pdb(infile) mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf) #repack mut task1 = standard_packer_task(mutant_pose) task1.restrict_to_repacking() task1.or_include_current(True) packer_rotamers_mover1 = RotamerTrialsMover(sf,task1) packer_rotamers_mover1.apply(mutant_pose) #repack init task2 = standard_packer_task(initial_pose) task2.restrict_to_repacking() task2.or_include_current(True) pack_rotamers_mover2 = RotamerTrialsMover(sf, task2) pack_rotamers_mover2.apply(initial_pose) #apply min mover min_mover.apply(mutant_pose) min_mover.apply(initial_pose) #get scores variant_score = sf(mutant_pose) initial_score = sf(initial_pose) #get prob probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant _score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") step=step+1 gen+=1 print '\nDONE' data_filename = 'premutate_rep1_bb_T_ch_T.csv' with open(data_filename, "w") as f: f.writelines(data)
def rescore_sol(folder, args): """ This function takes an input folder and scores all the solution state decoys in the folder. It returns a sorted list of names and scores. This requires the importation of PyRosetta. There is a limitation that this function will only use the default Rosetta score function. In this case, it is using talaris2014. """ import rosetta import rosetta.core.scoring.solid_surface opts = '-include_surfaces -mute basic -mute core -mute protocols' rosetta.init(extra_options = opts) if not args.silence: from time import time # Getting list of all files in the folder f_name = os.path.basename(folder).replace('_output', '') if not args.silence: print '\n\nFolder:\t{}'.format(f_name) folder_list = os.listdir(folder) # Full folder # Narrowing list to only solution models sol_pdbs = [] for i in folder_list: if 'Sol' in i: sol_pdbs.append(i) sol_pdbs.sort() count = len(sol_pdbs) if not args.silence: print "Scoring {} PDBs".format(count) # Writing unsorted scores file scoresc = os.path.join(folder, 'sol_score.sc') header = ('\t' * 6).join(['Description', 'Score']) with open(scoresc, 'w') as s: s.write(header) # Scoring solution PDBs and listing scores score_erors = {} sf = rosetta.get_fa_scorefxn() sol_scores = [] start = time() for i in range(len(sol_pdbs)): try: pdb = sol_pdbs[i] p = rosetta.pose_from_pdb(os.path.join(folder, pdb)) score = sf(p) sol_scores.append(score) # Adding score to unsorted list with open(scoresc, 'a') as s: s.write('\n{}\t{}'.format(pdb, score)) if not args.silence: elapsed = time() - start display_time(start, elapsed, i, count) except RuntimeError: print "Unable to read PDB: {}".format(pdb) if score_erors.has_key(f_name): score_erors[f_name].append(pdb) else: score_erors.update({f_name: [pdb]}) # Combining files names and scores, sorting by scores s_name_scores = sorted(zip(sol_pdbs, sol_scores), key=lambda x:x[1]) return s_name_scores, header, score_erors
def main(): parent_path = "/Users/yanxia/Documents/Workspace/PyRosetta_Practice/" resource_path = parent_path + "resources" os.chdir(resource_path) rosetta.init() # initiate pose and two score functions pose = rosetta.Pose() rosetta.make_pose_from_sequence(pose, "GSSGSSGTGVKPYGCSQCAKTFSLKSQLIVHQRSHTGVKPSGPSSG", "centroid") fa_scorefxn = rosetta.create_score_function("standard") ct_scorefxn = rosetta.create_score_function("score3") kt_value = 1 # initiate fragment set fragmentSet9 = rosetta.ConstantLengthFragSet(9) fragmentSet3 = rosetta.ConstantLengthFragSet(3) fragmentSet9.read_fragment_file("zf_9mer.txt") fragmentSet3.read_fragment_file("zf_3mer.txt") # set up movemap and Fragment Mover movemap = rosetta.MoveMap() movemap.set_bb(True) move_9mer = rosetta.ClassicFragmentMover(fragmentSet9, movemap) move_3mer = rosetta.ClassicFragmentMover(fragmentSet3, movemap) # Monte Carlo mc_low = rosetta.MonteCarlo(pose, ct_scorefxn, kt_value) #set up small and shear movers n_moves = 5 small_mover = rosetta.SmallMover(movemap, kt_value, n_moves) shear_mover = rosetta.ShearMover(movemap, kt_value, n_moves) #set up minimize mover min_mover = rosetta.MinMover() min_mover.movemap(movemap) min_mover.score_function(fa_scorefxn) min_mover.min_type("linmin") min_mover.tolerance(0.5) #set up sequence mover and repeat mover seq_mover = rosetta.SequenceMover() seq_mover.add_mover(small_mover) seq_mover.add_mover(min_mover) seq_mover.add_mover(shear_mover) seq_mover.add_mover(min_mover) # folding # first low resolution #ct_switch = rosetta.SwitchResidueTypeSetMover("centroid") #ct_switch.apply(pose) low_res_folding(pose, move_9mer, move_3mer, mc_low) # high resolution fa_switch = rosetta.SwitchResidueTypeSetMover("fa_standard") fa_switch.apply(pose) mc_high = rosetta.MonteCarlo(pose, fa_scorefxn, kt_value) for i in range(5): print "before: ", fa_scorefxn(pose) max_angle = 25 - 5 * i small_mover.angle_max("H", max_angle) small_mover.angle_max("E", max_angle) small_mover.angle_max("S", max_angle) shear_mover.angle_max("H", max_angle) shear_mover.angle_max("E", max_angle) shear_mover.angle_max("S", max_angle) for _ in range(simulation_iter): seq_mover.apply(pose) mc_high.boltzmann(pose) print "after: ", fa_scorefxn(pose) result_path = parent_path + "results/" os.chdir(result_path) pose.dump_pdb("ara.pdb") print "Done!"
def __init__(self, pdb, centroid=False, pdb_file='', frag=False, nine_mer=False, local=False, local_size=3, full=False, rosetta_refinement=False): """ :param pdb: :type string: pdb ID of the protein to be folded :param centroid: :type boolean: Option for use of centroid model """ self.loops = 0 # Stores generation for which energy score was last calculated self.scores = {} # Dictionary container for current gen genomes/scores self.scores_list = [] # List container of current gen scores for search self.gen_added = 0 # Last gen in which a point was added to novelty archive self.threshold = 10 # Novelty threshold for which point is added to archive self.acceptance_threshold = 100 # Novelty threshold for which move is accepted automatically self.num_added = 0 # Number of points added to novelty archive self.switch = False # All atom switch self.temperature = 5 # Monte Carlo temperature self.mover_range = 10 # +-range of the angle in degrees in which mover moves residue self.local_size = local_size # For local mover, size of fragment to move self.local = local # Whether to use local mover self.novelty_archive = deque() # Initialize novelty archive self.centroid = centroid # If true use centroid scoring self.last_lowest = 0 # For use in novelty loop self.last_lowest_10 = 0 # For use in clear main loop self.frag = frag # If true use frag mover self.rosetta_refinement = rosetta_refinement # If true refine rosetta fold # Rosetta inits rosetta.init() # Initialize rosetta libraries pose_native = pose_from_rcsb(pdb) # Create rosetta pose of natively folded protein from pdb file sequence = pose_native.sequence() # Get sequence of protein self.scorefxn = rosetta.get_fa_scorefxn() # Create the rosetta energy score function for all atom if pdb_file != '': self.pose = rosetta.pose_from_pdb(pdb_file) # If a starting pdb is given search from this pose elif rosetta_refinement: # If rosetta refinement, start from fastrelax structure self.pose = rosetta.pose_from_sequence(sequence) relax = rosetta.FastRelax() relax.set_scorefxn(self.scorefxn) relax.apply(self.pose) else: self.pose = rosetta.pose_from_sequence(sequence) # Create the rosetta pose that will be manipulated if centroid: # Switch pose to centroid if centroid option is true switch = rosetta.SwitchResidueTypeSetMover("centroid") switch.apply(self.pose) self.c_size = len(sequence)*2 # Number of residues * 2 (phi and psi for each residue) self.native_energy = self.scorefxn(pose_native) # Energy of the natively folded protein if centroid: # Switch rosetta score function if centroid self.scorefxn = rosetta.create_score_function('score3') self.conformation = [] i = 1 while i <= len(sequence): self.conformation.append(self.pose.phi(i)) self.conformation.append(self.pose.psi(i)) i += 1 self.mc_energy = self.scorefxn(self.pose) + 500 # Energy to be used as minimal criteria self.lowest = self.scorefxn(self.pose) # Lowest energy in archive if frag: if nine_mer: fragset = rosetta.ConstantLengthFragSet(9) fragset.read_fragment_file("aat000_09_05-1.200_v1_3") else: fragset = rosetta.ConstantLengthFragSet(3) fragset.read_fragment_file("aat000_03_05-1.200_v1_3") movemap = rosetta.MoveMap() movemap.set_bb(True) self.mover_3mer = rosetta.ClassicFragmentMover(fragset, movemap) if local: # For local, initialize na with appropriate number of deques self.novelty_archive = [deque() for i in range(self.c_size/2/self.local_size)] self.full = full # If true use full mover
def main(argv=None): if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv] print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser( description=' nc_cst_gen.py arguments ( -help ) %s' % InfoString) # Required arguments: ArgParser.add_argument('-pdbs', type=str, nargs='+', help=' input pdbs ', required=True) # Optional arguments: ArgParser.add_argument('-out', type=str, help=' output directory ', default='./') ArgParser.add_argument( '-max_dist', type=float, default=3.4, help=' distance between the oxygens and downstreams ') ArgParser.add_argument('-min_seq_sep', type=int, default=3, help=' minimum seperation in primary sequece ') ArgParser.add_argument('-upstream_atom', type=str, default='[ON]\w?\d?', help=' grep for upstream atoms ') ArgParser.add_argument('-downstream_atom', type=str, default='[ON]\w?\d?', help=' grep for downstream atoms ') ArgParser.add_argument( '-num_repeats', type=int, default=5, help=' number of repeats to extrapolate contacts for ') ArgParser.add_argument( '-min_sasa', type=float, default=0.0, help=' floor for weighting downstream oxygen contacts ') ArgParser.add_argument( '-min_sasa_weight', type=float, default=1.0, help=' weight of floor for downstream oxygen contacts ') ArgParser.add_argument( '-max_sasa', type=float, default=5.0, help=' ceiling for cst weighting downstream oxygen contacts ') ArgParser.add_argument( '-max_sasa_weight', type=float, default=0.1, help=' weight of ceiling for downstream oxygen contacts ') ArgParser.add_argument('-sasa_probe_radius', type=float, default=0.8, help=' probe radius for sasa calculations ') ArgParser.add_argument('-renumber_pose', type=bool, default=True, help='True|False renumber pdb residues ') ArgParser.add_argument('-disulfide', type=bool, default=True, help='True|False include disulfide constraints ') Args = ArgParser.parse_args() # if len(Args.pdbs[0]) == 1: # Args.pdbs = [''.join(Args.pdbs)] if Args.out[-1] != '/': Args.out = Args.out + '/' import rosetta rosetta.init(extra_options="-mute basic -mute core -mute protocols") ReportedRepeatCount = 0 TotalPdbs = len(Args.pdbs) # Instance of class to convert sasas to cst weight SasaScale = sasa_scale(Args.min_sasa, Args.min_sasa_weight, Args.max_sasa, Args.max_sasa_weight) for iPdb, Pdb in enumerate(Args.pdbs): print ' Working with %s; %d of %d total pdbs ' % (Pdb, iPdb + 1, TotalPdbs) # Starting rosetta Pose = rosetta.pose_from_pdb(Pdb) OutputPdb = Args.out + Pdb # Sets pdb info so residues in dumped pdbs are same as index Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose)) if Args.renumber_pose: rosetta.dump_pdb(Pose, OutputPdb) else: rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb', '_renumbered.pdb')) AllConstraints, SortedConstraints = get_pose_constraints( Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius, SasaScale, Args.upstream_atom, Args.downstream_atom, True) if Args.disulfide: DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints( Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG', False) AllConstraints.extend(DisulfAllConstraints) # print AllConstraints # print SortedConstraints # print # print # print DisulfAllConstraints # print DisulfSortedConstraints # sys.exit() CstName = OutputPdb.replace('.pdb', '_All.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(AllConstraints) BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints CstName = OutputPdb.replace('.pdb', '_BBBB.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(BackboneBackboneCst) CstName = OutputPdb.replace('.pdb', '_BBSC.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(BackboneSidechainCst) CstName = OutputPdb.replace('.pdb', '_SCSC.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(SidechainSidechainCst) CstName = OutputPdb.replace('.pdb', '_Disulf.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(DisulfAllConstraints)
def main(): parser = argparse.ArgumentParser() parser.add_argument('pdb_filename', action="store", type=str) parser.add_argument('replicate_number', action="store", type=int) inputs = parser.parse_args() #takes name of pdb file without the extention pdb_file = inputs.pdb_filename prot_name = pdb_file.split('/')[-1].split('.')[0] #set up timer to figure out how long the code took to run t0 = time() fasta_file = pdb_file.replace('/structures/', '/fastas/').replace('.pdb', '.fasta') records = list(SeqIO.parse(fasta_file, 'fasta')) assert len(records) == 1 wt_seq = str(records[0].seq) # Initialize Rosetta. #init(extra_options='-mute basic -mute core') init(extra_options= '-mute basic -mute core -rebuild_disulf false -detect_disulf false') ######################## # Constants ######################## PACK_RADIUS = 12.0 #Amino acids AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") AAs_choice_dict = {} for aa in AAs: AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa] #Number of mutations to accept max_accept_mut = 10 * len(wt_seq) #max_accept_mut = 2048 #Population size N = 1000 #Beta (temp term) beta = 1 #Fraction of the WT stability value to shoot for threshold_fraction = 0.5 ######################## ######################## #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load a clean pdb file initial_pose = pose_from_pdb(pdb_file) if '.clean' in pdb_file: pdb_file = ''.join(pdb_file.split('.clean')) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Threshold for selection threshold = post_pre_packing_score * threshold_fraction print 'threshold:', threshold data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start evolution i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 #print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to #new_mut_key = random.randint(0,len(AAs)-1) #proposed_res = AAs[new_mut_key] proposed_res = random.choice(AAs_choice_dict[res.name1()]) #make the mutation mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") # if i == (max_accept_mut - 1): # final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i)) # mutant_pose.dump_pdb(final_pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format( prot_name, prot_name, threshold_fraction, N, beta, inputs.replicate_number) with open(output_filename, "w") as outfile: outfile.writelines(data) print 'Data written to:', output_filename print 'program takes %f' % (t1 - t0)
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 5000 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) pdb_name = str(pdb_file) + "_min.pdb" initial_pose.dump_pdb(pdb_name) #Set threshold for selection #threshold = post_pre_packing_score/2 #threshold = post_pre_packing_score data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n') data_filename = pdb_file + '.score' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename '''
def setUpClass(cls): import rosetta rosetta.init("-out:levels", "all:warning") from interface_fragment_matching.parallel import openmp_utils openmp_utils.omp_set_num_threads(1)
def main(): opts, args = getopt.getopt(sys.argv[3:], 'i') show_index = 0 for o in opts: if '-i' in o: show_index = 1 rosetta.init() file_index = 1 wtName = sys.argv[1] compareName = sys.argv[2] relax_name = relax_filename(compareName, file_index) outputName = wtName.split('.')[0] + '_vs_' + compareName + ".txt" output = open(outputName, 'w') pose1 = rosetta.Pose(wtName) total_residue = pose1.total_residue() rmsdList = [0.00] * total_residue totalRMSD = [] while os.path.isfile(relax_name): pose2 = rosetta.Pose(relax_name) if pose1.total_residue() != pose2.total_residue(): print "Residue number not equal" break else: kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1)) for i in range(1, total_residue + 1): calculateRMS(pose1, pose2, i, rmsdList) ro_rmsd = rosetta.CA_rmsd(pose1, pose2) totalRMSD.append(ro_rmsd) print "rosetta generated rmsd: " + str(ro_rmsd) global total_square me_rmsd = math.sqrt(total_square / total_residue) print "me generated rmsd: " + str(me_rmsd) total_square = 0.0 file_index = file_index + 1 relax_name = relax_filename(compareName, file_index) if file_index == 1: print "No relaxation file" else: file_index = file_index - 1 rmsd_total = 0.0 if file_index > 0: for rmsd in totalRMSD: rmsd_total = rmsd_total + rmsd averageRMSD = rmsd_total / file_index print "average rmsd: ", str(averageRMSD) outputMinMax(totalRMSD, compareName) print "outputing " + outputName + "..." for index in range(1, total_residue + 1): rmsdList[index - 1] = rmsdList[index - 1] / file_index if show_index: output.write(str(index) + '\t' + str(rmsdList[index - 1]) + '\n') else: output.write(str(rmsdList[index - 1]) + '\n') output.write(outputName.split('.')[0] + "_relax\taverage rmsd: " + str(averageRMSD)) output.close() print "Done"
import rosetta #args = '-chemical:exclude_patches VirtualDNAPhosphate' #args = '-add_orbitals' #rosetta.init(extra_options=args) rosetta.init() from rosetta.protocols.grafting import CCDEndsGraftMover from rosetta.protocols.loops.loop_closure.ccd import CCDLoopClosureMover from rosetta.protocols.loops.loop_mover.refine import LoopMover_Refine_CCD import salign import os import logging class StructureBuilderPyRosetta(): def __init__(self, scaffold_pdb, gap_descriptions, chain, protein_only = True): self.logger = logging.getLogger(__name__) # Scoring function to determine probability of moves self.scorefxn = rosetta.core.scoring.get_score_function() self.res_type = 'fa_standard' # Load pose from input PDB file self.scaffold_pose = rosetta.pose_from_pdb(scaffold_pdb)
def main(): # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Population size N = 37 #Beta (temp term) beta = 1 #look up what the first stored value was in the files to get the threshold threshold = float(-534.687360627 / 2) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Prepare data headers data = ['Generation,RevertTo,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores = open('mh_rep_3_37.csv') #get just the mutation we want to revert to lines = variant_scores.readlines() var_line = lines[ 2] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] gen = 1 #get all the pdb files sort_list = sorted(glob.glob('*.pdb'), key=numericalSort) for i in range(1, len(sort_list) - 15): #calc reversion for next 15 moves for infile in sorted(glob.glob('*.pdb'), key=numericalSort)[i:i + 15]: #for each mutation var_line = lines[ gen + 1] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] print "Current File Being Processed is: " + infile initial_pose = pose_from_pdb(infile) initial_score = sf(initial_pose) print("init scored") mutant_pose = mutate_residue(initial_pose, var_loc, var_rev, PACK_RADIUS, sf) variant_score = sf(mutant_pose) probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") data.append( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") gen += 1 print '\nDONE' data_filename = 'rep_3_mh_37_rev_15_score.csv' with open(data_filename, "w") as f: f.writelines(data)
from __future__ import print_function # OpenMM imports from simtk.openmm import * from simtk.openmm.app import * from simtk.unit import * # ParmEd imports from parmed import load_rosetta # PyRosetta imports from toolbox import mutate_residue from rosetta import init, pose_from_sequence # Initialize PyRosetta init() # Create Ala12 print('Creating Ala12...') seq = 12*'A' pose = pose_from_sequence(seq) # Mutate residue 5 to K print('Mutating Fifth Ala to Lys...') mutant = mutate_residue(pose, 5, 'K') # Load mutant into ParmEd print('Loading into ParmEd...') struct = load_rosetta(mutant) # Load AMBER-99SBildn and TIP3P parameters
#! /usr/bin/env python # Thanks Alex Ford! from interface_fragment_matching.utility.analysis import AtomicSasaCalculator from multiprocessing import Process from scipy import spatial import numpy as np import subprocess import argparse import sys import os import rosetta rosetta.init(extra_options = "-mute basic -mute core -mute protocols") ThreeToOne = {'GLY':'G','ALA':'A','VAL':'V','LEU':'L','ILE':'I','MET':'M','PRO':'P','PHE':'F','TRP':'W','SER':'S','THR':'T','ASN':'N','GLN':'Q','TYR':'Y','CYS':'C','CYD':'C','LYS':'K','ARG':'R','HIS':'H','ASP':'D','GLU':'E','STO':'*','UNK':'U'} ChainAlphabetIndices = {'A':1, 'B':2, 'C':3, 'D':4, 'E':5, 'F':6, 'G':7, 'H':8, 'I':9, 'J':10, 'K':11, 'L':12, 'M':13, 'N':14, 'O':15, 'P':16, 'Q':17, 'R':18, 'S':19, 'T':20, 'U':21, 'V':22, 'W':23, 'X':24, 'Y':25, 'Z':26 } class alpha_carbon: """ Calpha node for wDAG searches of proteins for repeats """ def __init__(self, Number, CoordinateArray): self.CoordinateArray = CoordinateArray self.Number = int(Number) self.DownstreamNeighbors = {} # keyed with residue number, value is displacement vector to that residue's own CA # start of objects: class pdb_wdag: """ Init method takes protein and spawns Calpha instances to populate """
for r in range(1, N + 1): C = { r: rosetta.protocols.moves.XC_red, 1 + N - r: rosetta.protocols.moves.XC_white } #print r, N, C pymol.send_colors(pose, C, default_color=rosetta.protocols.moves.XC_blue) #pymol.send_energy( pose_s ) time.sleep(.1) rosetta.init() pose = rosetta.Pose() pose.name = 'CustomNamedPose' pose_s = rosetta.Pose() rosetta.pose_from_pdb(pose, "test/data/test_in.pdb") rosetta.pose_from_pdb(pose_s, "test/data/test_in_short.pdb") scorefxn = rosetta.create_score_function('standard') scorefxn(pose) pymol = rosetta.PyMOL_Mover() pymol.apply(pose_s) coloring_demo(pose_s)
def main(): parent_path = "/Users/yanxia/Documents/Workspace/PyRosetta_Practice/" resource_path = parent_path + "resources" os.chdir(resource_path) rosetta.init() # initiate pose and two score functions pose = rosetta.Pose() rosetta.make_pose_from_sequence( pose, "GSSGSSGTGVKPYGCSQCAKTFSLKSQLIVHQRSHTGVKPSGPSSG", "centroid") fa_scorefxn = rosetta.create_score_function("standard") ct_scorefxn = rosetta.create_score_function("score3") kt_value = 1 # initiate fragment set fragmentSet9 = rosetta.ConstantLengthFragSet(9) fragmentSet3 = rosetta.ConstantLengthFragSet(3) fragmentSet9.read_fragment_file("zf_9mer.txt") fragmentSet3.read_fragment_file("zf_3mer.txt") # set up movemap and Fragment Mover movemap = rosetta.MoveMap() movemap.set_bb(True) move_9mer = rosetta.ClassicFragmentMover(fragmentSet9, movemap) move_3mer = rosetta.ClassicFragmentMover(fragmentSet3, movemap) # Monte Carlo mc_low = rosetta.MonteCarlo(pose, ct_scorefxn, kt_value) #set up small and shear movers n_moves = 5 small_mover = rosetta.SmallMover(movemap, kt_value, n_moves) shear_mover = rosetta.ShearMover(movemap, kt_value, n_moves) #set up minimize mover min_mover = rosetta.MinMover() min_mover.movemap(movemap) min_mover.score_function(fa_scorefxn) min_mover.min_type("linmin") min_mover.tolerance(0.5) #set up sequence mover and repeat mover seq_mover = rosetta.SequenceMover() seq_mover.add_mover(small_mover) seq_mover.add_mover(min_mover) seq_mover.add_mover(shear_mover) seq_mover.add_mover(min_mover) # folding # first low resolution #ct_switch = rosetta.SwitchResidueTypeSetMover("centroid") #ct_switch.apply(pose) low_res_folding(pose, move_9mer, move_3mer, mc_low) # high resolution fa_switch = rosetta.SwitchResidueTypeSetMover("fa_standard") fa_switch.apply(pose) mc_high = rosetta.MonteCarlo(pose, fa_scorefxn, kt_value) for i in range(5): print "before: ", fa_scorefxn(pose) max_angle = 25 - 5 * i small_mover.angle_max("H", max_angle) small_mover.angle_max("E", max_angle) small_mover.angle_max("S", max_angle) shear_mover.angle_max("H", max_angle) shear_mover.angle_max("E", max_angle) shear_mover.angle_max("S", max_angle) for _ in range(simulation_iter): seq_mover.apply(pose) mc_high.boltzmann(pose) print "after: ", fa_scorefxn(pose) result_path = parent_path + "results/" os.chdir(result_path) pose.dump_pdb("ara.pdb") print "Done!"
### libraries from multiprocessing import Pool import numpy as np import subprocess import argparse import glob import time import sys import os import re if '-h' not in sys.argv: import rosetta # rosetta.init() rosetta.init(extra_options = " -ex1 -ex2 -no_optH false -use_input_sc ") # "-mute basic -mute core -mute protocols" from rosetta.protocols import grafting # from repo import solenoid_tools from generate_backbones import fuse from generate_backbones import get_residue_array from expand_constraints import constraint_extrapolator from expand_constraints import pose_has from expand_constraints import set_all_weights_zero # ''' # sys.argv = [ sys.argv[0], '-ref_pdb', '1EZG_Relax.pdb', '-ref_cst', '1EZG_Relax_All.cst', '-repeat_tag', 'rep24_1EZG_Relax']
def main(argv=None): if argv != None: sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ] print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser(description=' nc_cst_gen.py arguments ( -help ) %s'%InfoString) # Required arguments: ArgParser.add_argument('-pdbs', type=str, nargs='+', help=' input pdbs ', required=True) # Optional arguments: ArgParser.add_argument('-out', type=str, help=' output directory ', default='./') ArgParser.add_argument('-max_dist', type=float, default=3.4, help=' distance between the oxygens and downstreams ') ArgParser.add_argument('-min_seq_sep', type=int, default=3, help=' minimum seperation in primary sequece ') ArgParser.add_argument('-upstream_atom', type=str, default='[ON]\w?\d?', help=' grep for upstream atoms ') ArgParser.add_argument('-downstream_atom', type=str, default='[ON]\w?\d?', help=' grep for downstream atoms ') ArgParser.add_argument('-num_repeats', type=int, default=5, help=' number of repeats to extrapolate contacts for ') ArgParser.add_argument('-min_sasa', type=float, default=0.0, help=' floor for weighting downstream oxygen contacts ') ArgParser.add_argument('-min_sasa_weight', type=float, default=1.0, help=' weight of floor for downstream oxygen contacts ') ArgParser.add_argument('-max_sasa', type=float, default=5.0, help=' ceiling for cst weighting downstream oxygen contacts ') ArgParser.add_argument('-max_sasa_weight', type=float, default=0.1, help=' weight of ceiling for downstream oxygen contacts ') ArgParser.add_argument('-sasa_probe_radius', type=float, default=0.8, help=' probe radius for sasa calculations ') ArgParser.add_argument('-renumber_pose', type=bool, default=True, help='True|False renumber pdb residues ' ) ArgParser.add_argument('-disulfide', type=bool, default=True, help='True|False include disulfide constraints ' ) Args = ArgParser.parse_args() # if len(Args.pdbs[0]) == 1: # Args.pdbs = [''.join(Args.pdbs)] if Args.out [-1] != '/': Args.out = Args.out + '/' import rosetta rosetta.init(extra_options = "-mute basic -mute core -mute protocols") ReportedRepeatCount = 0 TotalPdbs = len(Args.pdbs) # Instance of class to convert sasas to cst weight SasaScale = sasa_scale( Args.min_sasa, Args.min_sasa_weight, Args.max_sasa, Args.max_sasa_weight ) for iPdb, Pdb in enumerate(Args.pdbs): print ' Working with %s; %d of %d total pdbs '%(Pdb, iPdb+1, TotalPdbs) # Starting rosetta Pose = rosetta.pose_from_pdb(Pdb) OutputPdb = Args.out+Pdb # Sets pdb info so residues in dumped pdbs are same as index Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose )) if Args.renumber_pose: rosetta.dump_pdb(Pose, OutputPdb) else: rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb', '_renumbered.pdb')) AllConstraints, SortedConstraints = get_pose_constraints(Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius, SasaScale, Args.upstream_atom, Args.downstream_atom, True) if Args.disulfide: DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints(Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG', False) AllConstraints.extend(DisulfAllConstraints) # print AllConstraints # print SortedConstraints # print # print # print DisulfAllConstraints # print DisulfSortedConstraints # sys.exit() CstName = OutputPdb.replace('.pdb', '_All.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(AllConstraints) BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints CstName = OutputPdb.replace('.pdb', '_BBBB.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(BackboneBackboneCst) CstName = OutputPdb.replace('.pdb', '_BBSC.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(BackboneSidechainCst) CstName = OutputPdb.replace('.pdb', '_SCSC.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(SidechainSidechainCst) CstName = OutputPdb.replace('.pdb', '_Disulf.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(DisulfAllConstraints)
# ''' import numpy as np import subprocess import argparse import glob import time import copy import sys import os import re if '-h' not in sys.argv: import rosetta # rosetta.init() rosetta.init(extra_options=" -ex1 -ex2 -no_optH false -use_input_sc " ) # "-mute basic -mute core -mute protocols" from rosetta.protocols import grafting # from repo import solenoid_tools # ''' # sys.argv = [ sys.argv[0], '-pdb_stem', '_2qiv_Relax' ] def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv]
#import sys import argparse #import random, math, os from rosetta import init, pose_from_pdb, get_fa_scorefxn, \ standard_packer_task, \ Pose, MoveMap, RotamerTrialsMover, MinMover from Bio.PDB import PDBParser, PDBIO, Dice import glob #from toolbox import mutate_residue #from time import time #from Bio import SeqIO init( extra_options= '-mute basic -mute core -ignore_zero_occupancy false -rebuild_disulf false -detect_disulf false' ) for pdb_file in glob.glob('../Data/structures/*.pdb'): print(pdb_file) if '.rosetta' in pdb_file: continue pdb_file_clean = pdb_file.replace('.pdb', '.rosetta.pdb') initial_pose = pose_from_pdb(pdb_file) initial_pose.dump_pdb(pdb_file_clean) io = PDBIO() pdb = PDBParser().get_structure( pdb_file.split('/')[-1].split('.')[0], pdb_file_clean) chains = list(pdb.get_chains()) assert len(chains) == 1