def get_E_per_residue( pose ): from rosetta import get_fa_scorefxn sf = get_fa_scorefxn() sf( pose ) for residue in pose: energy = pose.energies().residue_total_energy( residue.seqpos() ) if energy > 4: print residue.name(), '\t', pose.pdb_info().pose2pdb( residue.seqpos() ), '\t', energy
def get_E_per_residue_from_file( pdb_filename ): from rosetta import get_fa_scorefxn from antibody_functions import load_pose pose = load_pose( pdb_filename ) sf = get_fa_scorefxn() sf( pose ) for residue in pose: energy = pose.energies().residue_total_energy( residue.seqpos() ) if energy > 1.5: print residue.name(), '\t', pose.pdb_info().pose2pdb( residue.seqpos() ), '\t', energy
def get_fa_scorefxn_with_given_weights( weights_dict, verbose = False ): ''' Return an sf from get_fa_scoretype but with adjusted weights <scoretypes> with given <weights> If <input_scoretype> is not already part of the <sf>, this function will add it to <sf> with a weight of <weight>, and then get the score Will exit if the string( <input_scoretype> ) is not a valid ScoreType :param weights_dict: dict( ScoreType or str of ScoreType name : int( or float( weight ) ) ) :param verbose: bool( print the final weights of the returned ScoreFunction? ) Default = False "return: ScoreFunction( fa_scorefxn with adjusted weights of given scoretypes ) ''' # imports import sys from rosetta import get_fa_scorefxn, score_type_from_name from rosetta.core.scoring import fa_atr # argument check - check the passed argument is a dict if not isinstance( weights_dict, dict ): print "You didn't give me a dictionary for your input. I need a dict of ScoreType (or name) : weight. Exiting." sys.exit() # get a standard fa_scorefxn to start with sf = get_fa_scorefxn() # for each entry of the dictionary, change the weight for scoretype_name, scoretype_weight in weights_dict.items(): # if the key is a string if isinstance( scoretype_name, str ): try: scoretype = score_type_from_name( scoretype_name ) except: print "\nThe string name: '%s' does not appear to be a valid ScoreType. Exiting" %scoretype_name sys.exit() # set the weight sf.set_weight( scoretype, scoretype_weight ) # if the argument is a ScoreType object elif isinstance( scoretype_name, type( fa_atr ) ): # adjust the weight in the scorefxn using the corresponding weight given sf.set_weight( scoretype_name, scoretype_weight ) # else, I don't know what they gave me as a scoretype else: print "I'm not sure what '%s' is from your ScoreType key in your <weights_dict> argument. Exiting" %scoretype_name sys.exit() # if verbose, print out the weights of the new ScoreFunction if verbose: print "\nNew score weights sf:\n%s\n" %( "\n".join( [ "%s: %s" %( str( name ), sf.get_weight( name ) ) for name in sf.get_nonzero_weighted_scoretypes() ] ) ) # return the newly weighted fa_scorefxn return sf
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] out_file = args[2] score_type = int(args[3]) #set up timer to figure out how long the code took to run t0=time() # Initialize Rosetta. init(extra_options='-mute basic -mute core -mute protocol -mute warn') # Constants PACK_RADIUS = 5 #Amino acids, notice there is no C AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") #Number of mutations to accept max_accept_mut = 2000 #Population size N = 1 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n'] initial_pose = pose_from_pdb(pdb_file) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap This is where you turn the bb and side chain flexibility on and off mm = MoveMap() mm.set_bb(False) #Get the init score of the struct to calc the threshold pre_pre_packing_score = sf(initial_pose) print(pre_pre_packing_score) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') cp_init_pdb = Pose() cp_init_pdb.assign(initial_pose) chains=cp_init_pdb.split_by_chain() #split up AB inter and AC inter initial_poseAB = Pose() initial_poseAB.assign(initial_pose) initial_poseAC = Pose() initial_poseAC.assign(initial_pose) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) #score the inital stabs of each chain wt_a=sf(chains[1]) wt_b=sf(chains[2]) wt_c=sf(chains[3]) #score the intial interfaces inter_AB=InterfaceEnergy_split(initial_poseAB) inter_AC=InterfaceEnergy_split(initial_poseAC) #init thresholds set to half of the init stabilities, if you want to do a different protein change these threshold_a=-138.41754752 threshold_b=-61.378619136 threshold_c=-61.378619136 threshold_inter_ab=-10.3726691079 threshold_inter_ac=-10.3726691079 data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n') #check the inital starting score init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) print(init_score) #number of residues to select from n_res = initial_pose.total_residue() print(n_res) #start sim i=0 gen=0 while i < max_accept_mut: #update the number of generations that have pased gen+=1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #mut_location = random.randint(1, 10) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while(res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to toname = res.name1() new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while(proposed_res == res.name1()): new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #init mutant with current mutant_pose = Pose() mutant_pose.assign(initial_pose) #mutate mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) #get the probability that the mutation will be accepted probability = calc_prob_scores(mut_score['score'], init_score['score'], N) rand = random.random() #test to see if mutation is accepted if float(rand) < float(probability): print "accepted" #make a name for the new mutant variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res) # Assuming some burn in phase, make this zero if you want to store everything if i>=0: #save name and energy change data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n") #save the new accepted mutation pdb_name=str(i)+".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose init_score = mut_score #update number of accepts i+=1
# make the directory for the working PDBs in the base_structs_dir structure_dir = base_structs_dir + native_pdb_name if not os.path.isdir( structure_dir ): try: os.mkdir( structure_dir ) except: pass working_pose_decoy_name = structure_dir + '/' + native_pdb_name + "_just_%s_sugar_small_moves" %input_args.num_sugar_small_move_trials # use the scorefxn_file to set up additional weights if input_args.scorefxn_file is not None: main_sf = make_fa_scorefxn_from_file( input_args.scorefxn_file ) # else create a fa_scorefxn else: main_sf = get_fa_scorefxn() # fa_intra_rep should always be 0.440 since that's what I've been using main_sf.set_weight( score_type_from_name( "fa_intra_rep" ), 0.440 ) # set up constraints from the passed constraint file if input_args.native_constraint_file is not None: # add an appropriate weight to the main_sf if atom_pair_constraint and dihedral_constraint are 0 if main_sf.get_weight( score_type_from_name( "atom_pair_constraint" ) ) == 0: main_sf.set_weight( score_type_from_name( "atom_pair_constraint" ), 1.0 ) if main_sf.get_weight( score_type_from_name( "dihedral_constraint" ) ) == 0: main_sf.set_weight( score_type_from_name( "dihedral_constraint" ), 1.0 ) if input_args.verbose: print "Setting up a ConstraintSetMover"
def main(): parser = argparse.ArgumentParser() parser.add_argument('pdb_filename', action="store", type=str) parser.add_argument('replicate_number', action="store", type=int) inputs = parser.parse_args() #takes name of pdb file without the extention pdb_file = inputs.pdb_filename prot_name = pdb_file.split('/')[-1].split('.')[0] #set up timer to figure out how long the code took to run t0 = time() fasta_file = pdb_file.replace('/structures/', '/fastas/').replace('.pdb', '.fasta') records = list(SeqIO.parse(fasta_file, 'fasta')) assert len(records) == 1 wt_seq = str(records[0].seq) # Initialize Rosetta. #init(extra_options='-mute basic -mute core') init(extra_options= '-mute basic -mute core -rebuild_disulf false -detect_disulf false') ######################## # Constants ######################## PACK_RADIUS = 12.0 #Amino acids AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") AAs_choice_dict = {} for aa in AAs: AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa] #Number of mutations to accept max_accept_mut = 10 * len(wt_seq) #max_accept_mut = 2048 #Population size N = 1000 #Beta (temp term) beta = 1 #Fraction of the WT stability value to shoot for threshold_fraction = 0.5 ######################## ######################## #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load a clean pdb file initial_pose = pose_from_pdb(pdb_file) if '.clean' in pdb_file: pdb_file = ''.join(pdb_file.split('.clean')) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Threshold for selection threshold = post_pre_packing_score * threshold_fraction print 'threshold:', threshold data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start evolution i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 #print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to #new_mut_key = random.randint(0,len(AAs)-1) #proposed_res = AAs[new_mut_key] proposed_res = random.choice(AAs_choice_dict[res.name1()]) #make the mutation mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") # if i == (max_accept_mut - 1): # final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i)) # mutant_pose.dump_pdb(final_pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format( prot_name, prot_name, threshold_fraction, N, beta, inputs.replicate_number) with open(output_filename, "w") as outfile: outfile.writelines(data) print 'Data written to:', output_filename print 'program takes %f' % (t1 - t0)
def go(self): """ :param candidates: Current population being evaluated Scores the conformation by novelty. """ time.clock() while (self.loops < 1000000) or (self.lowest > self.native_energy): # Do MCNS on each residue in protein if self.frag: n = 1 while n <= self.c_size/2: self.frag_mover() n += 1 elif self.local: n = 1 while n < self.c_size/2/self.local_size: self.local_mover(n, self.local_size) n += 5 r = self.c_size/2 % self.local_size self.local_mover(n, r) elif self.full: self.full_mover() else: n = 1 while n <= self.c_size/2: self.mover(n) n += 1 # Options for dynamic parameter adjustment # If no new points have been added in 4 loops decrease novelty threshold by 5% # if self.loops % 4 == 0: # if self.num_added == 0: # self.acceptance_threshold *= .95 # If more than 20 points has been added in 4 loops increase novelty threshold by 20% # if self.loops % 4 == 0: # if self.num_added > 20: # self.acceptance_threshold *= 1.2 # If no new points have been added in 1 loop increase mover range by 10 # if self.num_added == 0: # if self.mover_range < 350: # self.mover_range += 10 # If more than 1 point has been added in 1 loop decrease mover range by 5 # if self.num_added > 1: # if self.mover_range > 5: # self.mover_range -= 5 # Do a individual residue mover round if no improvement in 20 moves # if (self.loops % 20 == 0) and (self.loops != 0): # if self.lowest == self.last_lowest_10: # n = 1 # while n <= self.c_size/2: # self.mover(n) # n += 1 # self.last_lowest_10 = self.lowest # Print lowest e_score in archive compared to native if len(self.novelty_archive) > 0: print "Lowest energy: " + str(self.lowest) + "\nNative energy: " + str(self.native_energy) + \ "\nNovelty points added: " + str(self.num_added) + "\n" # If lowest e_score is lower then 10000 convert to all-atom, switch energy function, and change minimal # criteria to new all-atom energy (for centroid) if (self.lowest < 10000) and not self.switch and self.centroid: # self.threshold = 10 switch_type = rosetta.SwitchResidueTypeSetMover("fa_standard") switch_type.apply(self.pose) self.scorefxn = rosetta.get_fa_scorefxn() self.mc_energy = self.scorefxn(self.pose) self.switch = True # Clear novelty archive after 10 loops and set mc energy to lowest if no improvement # if (self.loops % 10 == 0) and (self.loops != 0): # if self.lowest == self.last_lowest_10: # print("Reloading...") # self.pose = rosetta.pose_from_pdb('lowest.pdb') # self.novelty_archive = deque() # self.mc_energy = self.scorefxn(self.pose) + 500 # self.acceptance_threshold = 100 # self.last_lowest_10 = self.lowest # Decrease temp by 5 if no progress after 20 loops # if self.loops % 10 == 0: # if self.lowest == self.last_lowest: # self.temperature += 5 # self.last_lowest = self.lowest self.loops += 1 self.num_added = 0 print str(self.loops) + " iterations." # print "Threshold: " + str(self.threshold) print ("Time elapsed: " + str(time.clock()))
def __init__(self, pdb, centroid=False, pdb_file='', frag=False, nine_mer=False, local=False, local_size=3, full=False, rosetta_refinement=False): """ :param pdb: :type string: pdb ID of the protein to be folded :param centroid: :type boolean: Option for use of centroid model """ self.loops = 0 # Stores generation for which energy score was last calculated self.scores = {} # Dictionary container for current gen genomes/scores self.scores_list = [] # List container of current gen scores for search self.gen_added = 0 # Last gen in which a point was added to novelty archive self.threshold = 10 # Novelty threshold for which point is added to archive self.acceptance_threshold = 100 # Novelty threshold for which move is accepted automatically self.num_added = 0 # Number of points added to novelty archive self.switch = False # All atom switch self.temperature = 5 # Monte Carlo temperature self.mover_range = 10 # +-range of the angle in degrees in which mover moves residue self.local_size = local_size # For local mover, size of fragment to move self.local = local # Whether to use local mover self.novelty_archive = deque() # Initialize novelty archive self.centroid = centroid # If true use centroid scoring self.last_lowest = 0 # For use in novelty loop self.last_lowest_10 = 0 # For use in clear main loop self.frag = frag # If true use frag mover self.rosetta_refinement = rosetta_refinement # If true refine rosetta fold # Rosetta inits rosetta.init() # Initialize rosetta libraries pose_native = pose_from_rcsb(pdb) # Create rosetta pose of natively folded protein from pdb file sequence = pose_native.sequence() # Get sequence of protein self.scorefxn = rosetta.get_fa_scorefxn() # Create the rosetta energy score function for all atom if pdb_file != '': self.pose = rosetta.pose_from_pdb(pdb_file) # If a starting pdb is given search from this pose elif rosetta_refinement: # If rosetta refinement, start from fastrelax structure self.pose = rosetta.pose_from_sequence(sequence) relax = rosetta.FastRelax() relax.set_scorefxn(self.scorefxn) relax.apply(self.pose) else: self.pose = rosetta.pose_from_sequence(sequence) # Create the rosetta pose that will be manipulated if centroid: # Switch pose to centroid if centroid option is true switch = rosetta.SwitchResidueTypeSetMover("centroid") switch.apply(self.pose) self.c_size = len(sequence)*2 # Number of residues * 2 (phi and psi for each residue) self.native_energy = self.scorefxn(pose_native) # Energy of the natively folded protein if centroid: # Switch rosetta score function if centroid self.scorefxn = rosetta.create_score_function('score3') self.conformation = [] i = 1 while i <= len(sequence): self.conformation.append(self.pose.phi(i)) self.conformation.append(self.pose.psi(i)) i += 1 self.mc_energy = self.scorefxn(self.pose) + 500 # Energy to be used as minimal criteria self.lowest = self.scorefxn(self.pose) # Lowest energy in archive if frag: if nine_mer: fragset = rosetta.ConstantLengthFragSet(9) fragset.read_fragment_file("aat000_09_05-1.200_v1_3") else: fragset = rosetta.ConstantLengthFragSet(3) fragset.read_fragment_file("aat000_03_05-1.200_v1_3") movemap = rosetta.MoveMap() movemap.set_bb(True) self.mover_3mer = rosetta.ClassicFragmentMover(fragset, movemap) if local: # For local, initialize na with appropriate number of deques self.novelty_archive = [deque() for i in range(self.c_size/2/self.local_size)] self.full = full # If true use full mover
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 5000 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) pdb_name = str(pdb_file) + "_min.pdb" initial_pose.dump_pdb(pdb_name) #Set threshold for selection #threshold = post_pre_packing_score/2 #threshold = post_pre_packing_score data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n') data_filename = pdb_file + '.score' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename '''
def main(): #read in the file made by the forward sim args = sys.argv inputfile = args[1] data = open(inputfile) first_line = data.readlines()[1] var_line=first_line.split(',') start_stab=var_line[1] #the first entry in the file is the wild type structure, calc the threshold using this threshold=float(start_stab)+10 print(threshold) # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 0 #Population size N = 100 #Beta (temp term) beta = .6 #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') #Prepare data headers data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores=open(inputfile) #get just the mutation we want to revert to lines= variant_scores.readlines() var_line=lines[500] #gets the Nth line how ever long you want the burn to be print "staring here", var_line var_line=var_line.split(',')[0] var_loc=int(filter(str.isdigit, var_line)) var_rev=var_line[:1] gen=1 #get all the pdb files sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort) sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. for i in range(1,len(sort_list)-30): step=-15 #calc reversion for next 15 moves for infile in sort_list[i:i+31]: #for each mutation var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be var_line=var_line.split(',')[0] print(var_line) var_loc=int(filter(str.isdigit, var_line)) var_rev="" old="" if(step<0): var_rev=var_line[len(var_line)-1:len(var_line)] old=var_line[:1] else: var_rev=var_line[:1] old=var_line[len(var_line)-1:len(var_line)] print "Current File Being Processed is: " + infile print "revering to:", var_rev print "at:", var_loc #get the pdb you want to revert and make the reversion initial_pose = pose_from_pdb(infile) mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf) #repack mut task1 = standard_packer_task(mutant_pose) task1.restrict_to_repacking() task1.or_include_current(True) packer_rotamers_mover1 = RotamerTrialsMover(sf,task1) packer_rotamers_mover1.apply(mutant_pose) #repack init task2 = standard_packer_task(initial_pose) task2.restrict_to_repacking() task2.or_include_current(True) pack_rotamers_mover2 = RotamerTrialsMover(sf, task2) pack_rotamers_mover2.apply(initial_pose) #apply min mover min_mover.apply(mutant_pose) min_mover.apply(initial_pose) #get scores variant_score = sf(mutant_pose) initial_score = sf(initial_pose) #get prob probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant _score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") step=step+1 gen+=1 print '\nDONE' data_filename = 'premutate_rep1_bb_T_ch_T.csv' with open(data_filename, "w") as f: f.writelines(data)
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 1500 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() #change these for more or less flexability mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Set threshold for selection threshold = pre_pre_packing_score / 2 data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start sim i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while (res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while (proposed_res == res.name1()): new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #make the mutation #this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it. mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) # Assuming 1000 burn in phase, take this if out if you want to store everything if i > 1000: #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") pdb_name = str(i) + ".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename print 'program takes %f' % (t1 - t0)
#!/usr/bin/env python # :noTabs=true: import rosetta rosetta.init() pose = rosetta.pose_from_sequence('EVAAAVAT') pymol = rosetta.PyMOL_Mover() pymol.apply(pose) scorefxn = rosetta.get_fa_scorefxn( ) # rosetta.create_score_function('standard') scorefxn(pose) pymol.send_energy(pose) pymol.send_energy(pose, label=True) pymol.send_colors(pose, {}, default_color="orange") colors = {2: "red", 5: "white"} pymol.send_colors(pose, colors, default_color="blue") pymol.label_energy(pose, "fa_atr") pymol.send_hbonds(pose) pymol.send_ss(pose) pymol.send_polars(pose) mm = rosetta.MoveMap()
parser.add_argument('-m', '--minimize', action='store_true', help='flag to perform minimization after each mutation') args = parser.parse_args() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Prepare data headers. data = ['Variant,Rosetta Score,"delta-delta-G"\n'] # Load pdb file. initial_pose = pose_from_pdb(args.pdb_filename) # Set up ScoreFunction. sf = get_fa_scorefxn() # Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) # Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose)
def InterfaceEnergy_split(pdb): sf = get_fa_scorefxn() interface_mover = InterfaceAnalyzerMover(1, False, sf, False, True, True, False ) interface_mover.apply(pdb) return(interface_mover.get_interface_dG())
def main(): # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Population size N = 37 #Beta (temp term) beta = 1 #look up what the first stored value was in the files to get the threshold threshold = float(-534.687360627 / 2) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Prepare data headers data = ['Generation,RevertTo,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores = open('mh_rep_3_37.csv') #get just the mutation we want to revert to lines = variant_scores.readlines() var_line = lines[ 2] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] gen = 1 #get all the pdb files sort_list = sorted(glob.glob('*.pdb'), key=numericalSort) for i in range(1, len(sort_list) - 15): #calc reversion for next 15 moves for infile in sorted(glob.glob('*.pdb'), key=numericalSort)[i:i + 15]: #for each mutation var_line = lines[ gen + 1] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] print "Current File Being Processed is: " + infile initial_pose = pose_from_pdb(infile) initial_score = sf(initial_pose) print("init scored") mutant_pose = mutate_residue(initial_pose, var_loc, var_rev, PACK_RADIUS, sf) variant_score = sf(mutant_pose) probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") data.append( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") gen += 1 print '\nDONE' data_filename = 'rep_3_mh_37_rev_15_score.csv' with open(data_filename, "w") as f: f.writelines(data)
def mutate_residue_chain(pose, mutant_position, mutant_aa, pack_radius = 0.0, pack_scorefxn = '' ): """ Replaces the residue at <mutant_position> in <pose> with <mutant_aa> and repack any residues within <pack_radius> Angstroms of the mutating residue's center (nbr_atom) using <pack_scorefxn> note: <mutant_aa> is the single letter name for the desired ResidueType example: mutate_residue(pose, 30, A) See also: Pose PackRotamersMover MutateResidue pose_from_sequence """ #### a MutateResidue Mover exists similar to this except it does not pack #### the area around the mutant residue (no pack_radius feature) #mutator = MutateResidue(mutant_position, mutant_aa) #mutator.apply(test_pose) #test_pose = Pose() #test_pose.assign( pose ) if pose.is_fullatom() == False: IOError( 'mutate_residue only works with fullatom poses' ) # create a standard scorefxn by default if not pack_scorefxn: pack_scorefxn = get_fa_scorefxn() # create_score_function('standard') task = standard_packer_task(pose) # the Vector1 of booleans (a specific object) is needed for specifying the # mutation, this demonstrates another more direct method of setting # PackerTask options for design aa_bool = rosetta.utility.vector1_bool() # PyRosetta uses several ways of tracking amino acids (ResidueTypes) # the numbers 1-20 correspond individually to the 20 proteogenic amino acids # aa_from_oneletter returns the integer representation of an amino acid # from its one letter code # convert mutant_aa to its integer representation mutant_aa = aa_from_oneletter_code(mutant_aa) # mutation is performed by using a PackerTask with only the mutant # amino acid available during design # to do this, construct a Vector1 of booleans indicating which amino acid # (by its numerical designation, see above) to allow for i in range(1, 21): # in Python, logical expression are evaluated with priority, thus the # line below appends to aa_bool the truth (True or False) of the # statement i == mutant_aa aa_bool.append( i == mutant_aa ) # modify the mutating residue's assignment in the PackerTask using the # Vector1 of booleans across the proteogenic amino acids # prevent residues from packing if they are in a different chain then the mutant task2=restrict_non_nbrs_from_repacking_chain(pose, mutant_position, task, pack_radius) task2.nonconst_residue_task(mutant_position ).restrict_absent_canonical_aas(aa_bool) # apply the mutation and pack nearby residues packer = PackRotamersMover(pack_scorefxn, task2) packer.apply(pose) return(pose)
def mutate_residue(pose, mutant_position, mutant_aa, pack_radius=0.0, pack_scorefxn=''): """Replace the residue at <mutant_position> in <pose> with <mutant_aa> and repack any residues within <pack_radius> angstroms of the mutating residue's center (nbr_atom) using <pack_scorefxn> Note: <mutant_aa> is the single letter name for the desired ResidueType Example: mutate_residue(pose, 30, "A") See also: Pose PackRotamersMover """ if not pose.is_fullatom(): IOError('mutate_residue() only works with full-atom poses.') test_pose = rosetta.Pose() test_pose.assign(pose) # create a standard scorefxn by default if not pack_scorefxn: pack_scorefxn = rosetta.get_fa_scorefxn() task = rosetta.standard_packer_task(test_pose) task.or_include_current(True) # A vector1 of booleans (a specific object) is needed for specifying the # mutation. This demonstrates another more direct method of setting # PackerTask options for design. aa_bool = rosetta.utility.vector1_bool() # PyRosetta uses several ways of tracking amino acids (ResidueTypes). # The numbers 1-20 correspond individually to the 20 proteogenic amino # acids. aa_from_oneletter_code() returns the integer representation of an # amino acid from its one letter code # Convert mutant_aa to its integer representation. mutant_aa = rosetta.aa_from_oneletter_code(mutant_aa) # The mutation is performed by using a PackerTask with only the mutant # amino acid available during design. To do this, we construct a vector1 # of booleans indicating which amino acid (by its numerical designation; # see above) to allow. for i in range(1, 20 + 1): # In Python, logical expression are evaluated with priority, thus the # line below appends to aa_bool the truth (True or False) of the # statement i == mutant_aa. aa_bool.append(i == mutant_aa) # Modify the mutating residue's assignment in the PackerTask using the # vector1 of booleans across the proteogenic amino acids. task.nonconst_residue_task(mutant_position).restrict_absent_canonical_aas( aa_bool) # Prevent residues from packing by setting the per-residue "options" of # the PackerTask. center = pose.residue(mutant_position).nbr_atom_xyz() for i in range(1, pose.total_residue() + 1): # Only pack the mutating residue and any within the pack_radius if not i == mutant_position or center.distance_squared( test_pose.residue(i).nbr_atom_xyz()) > pack_radius**2: task.nonconst_residue_task(i).prevent_repacking() # Apply the mutation and pack nearby residues. packer = rosetta.PackRotamersMover(pack_scorefxn, task) packer.apply(test_pose) return test_pose
# sets up the input native PDB as being the base pose native_pose = Pose() native_pose.assign( load_pose( orig_pdb_filename_full_path ) ) native_pose.pdb_info().name( "native" ) # automatically populates chain and residue information into holder for native 3ay4 native_pose_info = hold_chain_and_res_designations_3ay4() native_pose_info.native() # use the scorefxn_file to set up additional weights if input_args.scorefxn_file is not None: sf = make_fa_scorefxn_from_file( input_args.scorefxn_file ) # else create a fa_scorefxn else: sf = get_fa_scorefxn() # fa_intra_rep should always 0.440 since that's what I've been using if input_args.fa_intra_rep: sf.set_weight( score_type_from_name( "fa_intra_rep" ), 0.440 ) # pymol stuff pmm = PyMOL_Mover() pmm.keep_history( True ) pmm.apply( native_pose ) # relay information to user info_file_details = [] info_file_details.append( "Native PDB filename:\t\t\t%s\n" %input_args.native_pdb_file.split( '/' )[-1] )
def rescore_sol(folder, args): """ This function takes an input folder and scores all the solution state decoys in the folder. It returns a sorted list of names and scores. This requires the importation of PyRosetta. There is a limitation that this function will only use the default Rosetta score function. In this case, it is using talaris2014. """ import rosetta import rosetta.core.scoring.solid_surface opts = '-include_surfaces -mute basic -mute core -mute protocols' rosetta.init(extra_options = opts) if not args.silence: from time import time # Getting list of all files in the folder f_name = os.path.basename(folder).replace('_output', '') if not args.silence: print '\n\nFolder:\t{}'.format(f_name) folder_list = os.listdir(folder) # Full folder # Narrowing list to only solution models sol_pdbs = [] for i in folder_list: if 'Sol' in i: sol_pdbs.append(i) sol_pdbs.sort() count = len(sol_pdbs) if not args.silence: print "Scoring {} PDBs".format(count) # Writing unsorted scores file scoresc = os.path.join(folder, 'sol_score.sc') header = ('\t' * 6).join(['Description', 'Score']) with open(scoresc, 'w') as s: s.write(header) # Scoring solution PDBs and listing scores score_erors = {} sf = rosetta.get_fa_scorefxn() sol_scores = [] start = time() for i in range(len(sol_pdbs)): try: pdb = sol_pdbs[i] p = rosetta.pose_from_pdb(os.path.join(folder, pdb)) score = sf(p) sol_scores.append(score) # Adding score to unsorted list with open(scoresc, 'a') as s: s.write('\n{}\t{}'.format(pdb, score)) if not args.silence: elapsed = time() - start display_time(start, elapsed, i, count) except RuntimeError: print "Unable to read PDB: {}".format(pdb) if score_erors.has_key(f_name): score_erors[f_name].append(pdb) else: score_erors.update({f_name: [pdb]}) # Combining files names and scores, sorting by scores s_name_scores = sorted(zip(sol_pdbs, sol_scores), key=lambda x:x[1]) return s_name_scores, header, score_erors