def mutate_residue(pose, mutant_position, mutant_aa, pack_radius, pack_scorefxn): if pose.is_fullatom() == False: IOError('mutate_residue only works with fullatom poses') test_pose = Pose() test_pose.assign(pose) # Create a packer task (standard) task = TaskFactory.create_packer_task(test_pose) # the Vector1 of booleans (a specific object) is needed for specifying the # mutation, this demonstrates another more direct method of setting # PackerTask options for design aa_bool = vector1_bool() # PyRosetta uses several ways of tracking amino acids (ResidueTypes) # the numbers 1-20 correspond individually to the 20 proteogenic amino acids # aa_from_oneletter returns the integer representation of an amino acid # from its one letter code # convert mutant_aa to its integer representation mutant_aa = aa_from_oneletter_code(mutant_aa) # mutation is performed by using a PackerTask with only the mutant # amino acid available during design # to do this, construct a Vector1 of booleans indicating which amino acid # (by its numerical designation, see above) to allow for i in range(1, 21): # in Python, logical expression are evaluated with priority, thus the # line below appends to aa_bool the truth (True or False) of the # statement i == mutant_aa aa_bool.append(i == mutant_aa) # modify the mutating residue's assignment in the PackerTask using the # Vector1 of booleans across the proteogenic amino acids task.nonconst_residue_task(mutant_position).restrict_absent_canonical_aas( aa_bool) # prevent residues from packing by setting the per-residue "options" of # the PackerTask center = pose.residue(mutant_position).nbr_atom_xyz() for i in range(1, pose.total_residue() + 1): dist = center.distance_squared(test_pose.residue(i).nbr_atom_xyz()) # only pack the mutating residue and any within the pack_radius if i != mutant_position and dist > pow(float(pack_radius), 2): task.nonconst_residue_task(i).prevent_repacking() # apply the mutation and pack nearby residues packer = PackRotamersMover(pack_scorefxn, task) packer.apply(test_pose) return test_pose
def load_pose( pose_filename ): ''' Load pose from a filename :param pose_filename: str( /path/to/pose/filename ) :return: a Rosetta Pose ''' # imports from rosetta import Pose, pose_from_file, FoldTree # create Pose object from filename pose = Pose() pose_from_file( pose, pose_filename ) # clean up the name of the pose pose_name = pose.pdb_info().name() pose_name = pose_name.split( '/' )[-1] pose.pdb_info().name( pose_name ) # store the original FoldTree and add empty loops for use later pose.orig_fold_tree = FoldTree( pose.fold_tree() ) pose.loops = None pose.loops_file = None return pose
def Fc_glycan_rmsd( working, working_Fc_glycan_chains, native, native_Fc_glycan_chains, decoy_num, dump_dir ): """ :param working: decoy Pose() :param working_Fc_glycan_chains: list( the chain id's for the working Fc glycan ). Ex = [ 'H', 'I' ] :param native: native Pose() :param native_Fc_glycan_chains: list( the chain id's for the native Fc glycan ). Ex = [ 'D', 'E' ] :param decoy_num: int( the number of the decoy for use when dumping its Fc glycan ) :param dump_dir: str( /path/to/dump_dir for the temp pdb files made. Files will be deleted ) return: float( Fc glycan rmsd ) """ # imports import os from rosetta import Pose from rosetta.core.scoring import non_peptide_heavy_atom_RMSD from antibody_functions import load_pose from util import dump_pdb_by_chain, id_generator # get temporary files to work with id = id_generator() if dump_dir.endswith( '/' ): working_filename = "%s%s_temp_working_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s%s_temp_native_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) else: working_filename = "%s/%s_temp_working_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s/%s_temp_native_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) # dump out the Fc glycans by their chain id's dump_pdb_by_chain( working_filename, working, working_Fc_glycan_chains, decoy_num, dump_dir = dump_dir ) dump_pdb_by_chain( native_filename, native, native_Fc_glycan_chains, decoy_num, dump_dir = dump_dir ) # load in the Fc glycans just_Fc_glycan = Pose() try: just_Fc_glycan.assign( load_pose( working_filename ) ) except: pass native_just_Fc_glycan = Pose() try: native_just_Fc_glycan.assign( load_pose( native_filename ) ) except: pass # calculate the glycan rmsd try: glycan_rmsd = non_peptide_heavy_atom_RMSD( just_Fc_glycan, native_just_Fc_glycan ) except: glycan_rmsd = "nan" pass # delete the files try: os.popen( "rm %s" %working_filename ) os.popen( "rm %s" %native_filename ) except: pass return glycan_rmsd
def pose_from_pubchem(cid, name, temporary=True): pose = Pose() if temporary: # the temporary solution, create an ephemeral ResidueSet params_from_pubchem(cid, name) # generate ResidueSet res_set = generate_nonstandard_residue_set([name]) # fill the pose pose_from_pdb(pose, res_set, name + '_0001.pdb') else: # permanent solution, add to .params list add_cid_to_database(cid, name) # fill the pose pose_from_pdb(pose, name + '_0001.pdb') return pose
for seq_pos in native_Fc_protein_nums: res = native_pose.residue( seq_pos ) # used to store scores for each mutation to then determine which was best AA_to_score_dict = {} # skipping Cys residues because they're just being sassy if not res.name1() == 'C': # don't do sugars if not res.is_carbohydrate(): # skip branch points like ASN 297 since it connects to GlcNAc 1 if not res.is_branch_point(): # for each amino acid for new_AA in AA_name1_list: # get a copy of the Pose mutant = Pose() mutant.assign( native_pose ) # mutate! mutant.assign( mutate_residue( seq_pos, new_AA, mutant, sf, pack_radius = 10 ) ) # change pose name to (orig AA)(pdb number and chain)_to_newAA orig_AA_name1 = str( native_pose.residue( seq_pos ).name1() ) mutant.pdb_info().name( "%s%s_to_%s" %( orig_AA_name1, str( mutant.pdb_info().pose2pdb( seq_pos ).strip().replace( ' ', '' ) ), new_AA )) # visualize mutation #pmm.apply(mutant) # store all information # store all data for this residue's mutation for the df # append original residue, position, and score to list
# utility functions from file_mover_based_on_fasc import main as get_lowest_E_from_fasc from get_pose_metrics_on_native import main as get_pose_metrics_on_native from pose_metrics_util import Fc_glycan_rmsd ################################ #### INITIAL PROTOCOL SETUP #### ################################ # initialize Rosetta initialize_rosetta() # load up the poses given from the arguments passed native_pose = Pose() native_pose.assign( load_pose( input_args.native_pdb_file ) ) # automatically populates chain and residue information into holder for native 3ay4 native_pose_info = hold_chain_and_res_designations_3ay4() native_pose_info.native() # get the full path of the native PDB name native_pdb_filename_full_path = input_args.native_pdb_file native_pdb_filename = native_pdb_filename_full_path.split( '/' )[-1] native_pdb_name = native_pdb_filename.split( ".pdb" )[0] # change the name of the native PDB name native_pose_name = "native_pose" native_pose.pdb_info().name( native_pose_name )
n_res = initial_pose.total_residue() for seq_pos in range(1, n_res + 1): res = initial_pose.residue(seq_pos) for AA in AAs: if res.name1() != AA: variant_name = \ res.name1() + \ str(initial_pose.pdb_info().number(seq_pos)) + \ AA # Check for disulfide special case. if res.name() == 'CYD': disulfide_partner = res.residue_connection_partner( res.n_possible_residue_connections()) temp_pose = Pose() temp_pose.assign(initial_pose) # (Packing causes seg fault if current CYS residue is not # also converted before mutating.) change_cys_state(seq_pos, 'CYS', temp_pose.conformation()) change_cys_state(disulfide_partner, 'CYS', temp_pose.conformation()) # Mutate protein. mutant_pose = mutate_residue(temp_pose, seq_pos, AA, PACK_RADIUS, sf) else: # Mutate protein. mutant_pose = mutate_residue(initial_pose, seq_pos, AA, PACK_RADIUS, sf) # Minimize.
packer = RotamerTrialsMover(sf, pt) # Prepare data storage. surface = [] params = [] if args.make_movie: dir_name = args.pdb_filename[:-4] + '_movie_frames' try: mkdir(dir_name) except OSError: print 'Warning: Directory already exists; files will be overwritten.' # Prepare temp pose. pose = Pose() # Switch based on ring size. ring_size = info.ring_size() if ring_size == 3: exit(' Functionality for 3-membered rings not coded yet; exiting') elif ring_size == 4: exit(' Functionality for 4-membered rings not coded yet; exiting') elif ring_size == 5: exit(' Functionality for 5-membered rings not coded yet; exiting') elif ring_size == 6: # Generate header for data output. header = "\t" for phi in range(0, 360 + 1, 30): header += str(phi) if phi == 360:
orig_pdb_filename = orig_pdb_filename_full_path.split( '/' )[-1] orig_pdb_name = orig_pdb_filename.split( ".pdb" )[0] # this is where the double pack and minimized versions of the native will be dumped structure_dir = base_structs_dir decoy_pdb_name = structure_dir + orig_pdb_name # this is where the single pack and minimized decoy will be dumped (the predecessor to double) # see long comment in jd code for clarification round1_structure_dir = base_structs_dir + "/round1_decoys/" if not os.path.isdir( round1_structure_dir ): os.mkdir( round1_structure_dir ) round1_decoy_pdb_name = round1_structure_dir + orig_pdb_name # sets up the input native PDB as being the base pose native_pose = Pose() native_pose.assign( load_pose( orig_pdb_filename_full_path ) ) native_pose.pdb_info().name( "native" ) # automatically populates chain and residue information into holder for native 3ay4 native_pose_info = hold_chain_and_res_designations_3ay4() native_pose_info.native() # fa_intra_rep should always 0.440 since that's what I've been using for sugars sf = get_fa_scorefxn() sf.set_weight( fa_intra_rep, 0.440 ) orig_fa_rep = sf.get_weight( fa_rep ) # pymol stuff
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] out_file = args[2] score_type = int(args[3]) #set up timer to figure out how long the code took to run t0=time() # Initialize Rosetta. init(extra_options='-mute basic -mute core -mute protocol -mute warn') # Constants PACK_RADIUS = 5 #Amino acids, notice there is no C AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") #Number of mutations to accept max_accept_mut = 2000 #Population size N = 1 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n'] initial_pose = pose_from_pdb(pdb_file) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap This is where you turn the bb and side chain flexibility on and off mm = MoveMap() mm.set_bb(False) #Get the init score of the struct to calc the threshold pre_pre_packing_score = sf(initial_pose) print(pre_pre_packing_score) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') cp_init_pdb = Pose() cp_init_pdb.assign(initial_pose) chains=cp_init_pdb.split_by_chain() #split up AB inter and AC inter initial_poseAB = Pose() initial_poseAB.assign(initial_pose) initial_poseAC = Pose() initial_poseAC.assign(initial_pose) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) #score the inital stabs of each chain wt_a=sf(chains[1]) wt_b=sf(chains[2]) wt_c=sf(chains[3]) #score the intial interfaces inter_AB=InterfaceEnergy_split(initial_poseAB) inter_AC=InterfaceEnergy_split(initial_poseAC) #init thresholds set to half of the init stabilities, if you want to do a different protein change these threshold_a=-138.41754752 threshold_b=-61.378619136 threshold_c=-61.378619136 threshold_inter_ab=-10.3726691079 threshold_inter_ac=-10.3726691079 data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n') #check the inital starting score init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) print(init_score) #number of residues to select from n_res = initial_pose.total_residue() print(n_res) #start sim i=0 gen=0 while i < max_accept_mut: #update the number of generations that have pased gen+=1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #mut_location = random.randint(1, 10) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while(res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to toname = res.name1() new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while(proposed_res == res.name1()): new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #init mutant with current mutant_pose = Pose() mutant_pose.assign(initial_pose) #mutate mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) #get the probability that the mutation will be accepted probability = calc_prob_scores(mut_score['score'], init_score['score'], N) rand = random.random() #test to see if mutation is accepted if float(rand) < float(probability): print "accepted" #make a name for the new mutant variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res) # Assuming some burn in phase, make this zero if you want to store everything if i>=0: #save name and energy change data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n") #save the new accepted mutation pdb_name=str(i)+".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose init_score = mut_score #update number of accepts i+=1
def score_all(pdb,sf,min_mover,beta,threshold_a, threshold_b,threshold_inter_ab,score_type): cp_init_pdb = Pose() cp_init_pdb.assign(pdb) chains = cp_init_pdb.split_by_chain() initial_poseAB = Pose() initial_poseAB.assign(pdb) initial_poseAC = Pose() initial_poseAC.assign(pdb) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_a=sf(chains[1]) init_b=sf(chains[2]) init_inter_AB=InterfaceEnergy_split_init(initial_poseAB) data=[init_a,init_b,init_inter_AB] thresh=[threshold_a,threshold_b,threshold_inter_ab] init_score=0 if score_type == 1: init_score=calc_x_list(data,beta,thresh) if score_type == 2: init_score=calc_x_list_2(data,beta,thresh) if score_type == 3: init_score=calc_x_list_3(data,beta,thresh) if score_type == 4: data=[init_a,init_b,init_c] thresh=[threshold_a,threshold_b,threshold_c] init_score=calc_x_list(data,beta,thresh) dic = {'score':init_score, 'a':init_a, 'b':init_b, 'ab':init_inter_AB} return(dic)
if f.endswith( ".pdb" ): structures.append( os.path.abspath( f ) ) structure_names.append( f.split( '/' )[-1] ) os.chdir( working_dir ) # inform the user of the structure directory and number of files to be analyzed num_structs = len( structure_names ) print "Analyzing", num_structs, "structures from", structure_dir print # check and load native pose initialize_rosetta() try: if os.path.isfile( input_args.native_pdb_filename ): native = Pose() native.assign( load_pose( input_args.native_pdb_filename ) ) except: print "It appears", input_args.native_pdb_filename, "is not a valid pdb file. Exiting" sys.exit() # make a scorefunction sf = get_fa_scorefxn() # collect the data rmsds = [] scores = [] pdb_names = [] # for if Pandas doesn't work
def pose_from_params(filename, params_list): res_set = generate_nonstandard_residue_set(params_list) pose = Pose() pose_from_pdb(pose, res_set, filename) return pose
def Fc_glycan_metrics( working, native, working_Fc_glycan_chains, native_Fc_glycan_chains, sf, decoy_num, dump_dir ): """ Return the glycan RMSD contribution of the two Fc glycans in 3ay4 (may work for other PDBs, but I don't know yet) Fc_glycan_buried_sasa = complex with Fc glycan - ( complex without Fc glycan + just Fc glycan ) hbonds contributed by Fc glycans = total hbonds in Pose - total hbonds in Pose without Fc glycans - just Fc glycan hbonds :param working: decoy Pose() :param native: native Pose() :param working_Fc_glycan_chains: list( the chain id's for the working Fc glycan ). Ex = [ 'H', 'I' ] :param native_Fc_glycan_chains: list( the chain id's for the native Fc glycan ). Ex = [ 'D', 'E' ] :param sf: ScoreFunction :param decoy_num: int( the number of the decoy for use when dumping its Fc glycan ) :param dump_dir: str( /path/to/dump_dir for the temp pdb files made. Files will be deleted ) :return: obj( DataHolder that contains Fc_glycan_rmsd, Fc_glycan_tot_score, Fc_glycan_buried_sasa, and Fc_glycan_internal_hbonds, Fc_glycan_hbonds_contributed ) """ ################# #### IMPORTS #### ################# # Rosetta functions from rosetta import Pose, calc_total_sasa from rosetta.core.scoring import non_peptide_heavy_atom_RMSD # Rosetta functions I wrote out from antibody_functions import load_pose, DataHolder # utility functions import os from util import dump_pdb_by_chain, id_generator from toolbox import get_hbonds # for use in SASA calculations probe_size = 1.4 # get glycan rmsd (not using above function because I want to use the glycan poses for something else # get temporary files to work with id = id_generator() if dump_dir.endswith( '/' ): working_filename = "%s%s_temp_working_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s%s_temp_native_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) else: working_filename = "%s/%s_temp_working_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s/%s_temp_native_just_glyc%s.pdb" %( dump_dir, id, str( decoy_num ) ) # dump out the Fc glycans by their chain id's dump_pdb_by_chain( working_filename, working, working_Fc_glycan_chains, decoy_num, dump_dir = dump_dir ) dump_pdb_by_chain( native_filename, native, native_Fc_glycan_chains, decoy_num, dump_dir = dump_dir ) # load in the Fc glycans working_just_Fc_glycan = Pose() try: working_just_Fc_glycan.assign( load_pose( working_filename ) ) except: pass native_just_Fc_glycan = Pose() try: native_just_Fc_glycan.assign( load_pose( native_filename ) ) except: pass # calculate the glycan rmsd try: glycan_rmsd = non_peptide_heavy_atom_RMSD( working_just_Fc_glycan, native_just_Fc_glycan ) except: glycan_rmsd = "nan" pass # get the metrics associated with just the Fc glycan # score first as to gain access to the hbonds data working_Fc_glycan_tot_score = sf( working_just_Fc_glycan ) native_Fc_glycan_tot_score = sf( native_just_Fc_glycan ) # SASA of just the glycan working_Fc_glycan_sasa = calc_total_sasa( working_just_Fc_glycan, probe_size ) native_Fc_glycan_sasa = calc_total_sasa( native_just_Fc_glycan, probe_size ) # num hbonds in Fc glycan working_Fc_glycan_internal_hbonds = get_hbonds( working_just_Fc_glycan ).nhbonds() native_Fc_glycan_internal_hbonds = get_hbonds( native_just_Fc_glycan ).nhbonds() # delete the files try: os.popen( "rm %s" %working_filename ) os.popen( "rm %s" %native_filename ) except: pass # now move to metrics requiring the removal of the glycan from the complex # get temporary files to work with id = id_generator() if dump_dir.endswith( '/' ): working_filename = "%s%s_working_no_glyc_%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s%s_native_no_glyc_%s.pdb" %( dump_dir, id, str( decoy_num ) ) else: working_filename = "%s/%s_working_no_glyc_%s.pdb" %( dump_dir, id, str( decoy_num ) ) native_filename = "%s/%s_native_no_glyc_%s.pdb" %( dump_dir, id, str( decoy_num ) ) # get the chain id's of everything discluding the passed Fc glycan chain id's working_pose_chains = [] for res in working: chain_id = working.pdb_info().chain( res.seqpos() ) if ( chain_id not in working_pose_chains ) and ( chain_id not in working_Fc_glycan_chains ): working_pose_chains.append( chain_id ) native_pose_chains = [] for res in native: chain_id = native.pdb_info().chain( res.seqpos() ) if ( chain_id not in native_pose_chains ) and ( chain_id not in native_Fc_glycan_chains ): native_pose_chains.append( chain_id ) # dump out the pose without its Fc glycans by the chain id's dump_pdb_by_chain( working_filename, working, working_pose_chains, decoy_num, dump_dir = dump_dir ) dump_pdb_by_chain( native_filename, native, native_pose_chains, decoy_num, dump_dir = dump_dir ) # load in the working Pose without the Fc glycans working_complex_no_Fc_glycan = Pose() native_complex_no_Fc_glycan = Pose() try: working_complex_no_Fc_glycan.assign( load_pose( working_filename ) ) native_complex_no_Fc_glycan.assign( load_pose( working_filename ) ) except: pass # score the Poses so their hbond energies get updated sf( working ) sf( working_complex_no_Fc_glycan ) sf( native ) sf( native_complex_no_Fc_glycan ) # get the number of hbonds in the Pose without the Fc glycans # working working_with_Fc_glycan_hbonds = get_hbonds( working ) working_no_Fc_glycan_hbonds = get_hbonds( working_complex_no_Fc_glycan ) working_Fc_glycan_hbonds_contributed = working_with_Fc_glycan_hbonds.nhbonds() - working_no_Fc_glycan_hbonds.nhbonds() - working_Fc_glycan_internal_hbonds # native native_with_Fc_glycan_hbonds = get_hbonds( native ) native_no_Fc_glycan_hbonds = get_hbonds( native_complex_no_Fc_glycan ) native_Fc_glycan_hbonds_contributed = native_with_Fc_glycan_hbonds.nhbonds() - native_no_Fc_glycan_hbonds.nhbonds() - native_Fc_glycan_internal_hbonds # get the SASA contributed by the presence of the Fc glycan # working working_with_Fc_glycan_sasa = calc_total_sasa( working, probe_size ) working_no_Fc_glycan_sasa = calc_total_sasa( working_complex_no_Fc_glycan, probe_size ) working_Fc_glycan_sasa_contributed = working_with_Fc_glycan_sasa - ( working_no_Fc_glycan_sasa + working_Fc_glycan_sasa ) # native native_with_Fc_glycan_sasa = calc_total_sasa( native, probe_size ) native_no_Fc_glycan_sasa = calc_total_sasa( native_complex_no_Fc_glycan, probe_size ) native_Fc_glycan_sasa_contributed = native_with_Fc_glycan_sasa - ( native_no_Fc_glycan_sasa + native_Fc_glycan_sasa ) # delete the files try: os.popen( "rm %s" %working_filename ) os.popen( "rm %s" %native_filename ) except: pass # store data in the DataHolder and return it data = DataHolder() data.Fc_glycan_rmsd = glycan_rmsd data.Fc_glycan_tot_score = working_Fc_glycan_tot_score data.native_Fc_glycan_tot_score = native_Fc_glycan_tot_score data.Fc_glycan_internal_hbonds = working_Fc_glycan_internal_hbonds data.native_Fc_glycan_internal_hbonds = native_Fc_glycan_internal_hbonds data.Fc_glycan_hbonds_contributed = working_Fc_glycan_hbonds_contributed data.native_Fc_glycan_hbonds_contributed = native_Fc_glycan_hbonds_contributed data.Fc_glycan_sasa_contributed = working_Fc_glycan_sasa_contributed data.native_Fc_glycan_sasa_contributed = native_Fc_glycan_sasa_contributed data.probe_size = probe_size return data
# keeping this here for now in case I decide to do the alanine scan on the whole protein if seq_pos in native_Fc_protein_nums: # use the contact map created earlier to determine if this residue is near the glycan # since the contact map was made between the protein and glycan, just check to see if this res is a key within_10A_of_Fc_glycan_res.append( seq_pos in Fc_protein_to_Fc_glycan_cmap.keys() ) # use the contact map to determine if this residue is within 10A of the Fc-FcR interface within_10A_of_FcR_interface.append( seq_pos in Fc_protein_to_FcR_protein_cmap.keys() ) # otherwise it's a residue outside the Fc protein, so just put None else: within_10A_of_Fc_glycan_res.append( None ) # get a copy of the Pose mutant = Pose() mutant.assign( native_pose ) # build the new Ala residue based on the position of the old residue # if this is the first residue of the pose if seq_pos == 1: # this is the N-terminal end, so use an N-terminal ALA residue ALA_restype = ALA_pose.conformation().residue_type( 1 ) # if this is the last residue of the pose elif seq_pos == native_pose.n_residue(): # this is the C-terminal end, so use an C-terminal ALA residue ALA_restype = ALA_pose.conformation().residue_type( 3 ) # otherwise, this is a residue with something bonded on both sides else:
def pseudo_interface_energy_3ay4( pose, in_sf, native = False, pmm = None ): """ Attempts to get pseudo-interface energy of a glycosylated 3ay4 decoy Lots of hard coding here - works on a decoy pose as Rosetta renumbers the Pose a bit Makes the two ASN connections to the Fc A and B glycans JUMPs instead of chemical EDGEs :param pose: Pose :param in_sf: ScoreFunction :param native: bool( is this the native 3ay4 or a decoy? Answer determines how FoldTree gets coded ) :param pmm: PyMOL_Mover( pass a PyMOL_Mover object if you want to watch the protocol ). Default = None :return: float( pseudo interface energy ) """ from rosetta import FoldTree, Pose from rosetta.numeric import xyzVector_Real from rosetta.core.scoring import score_type_from_name # if this isn't the Fc-FcR structure of 3ay4, just return 0 if pose.n_residue() != 618: return 0 # set atom_pair_constraint weight to 0 sf = in_sf.clone() sf.set_weight( score_type_from_name( "atom_pair_constraint" ), 0.0 ) # get the score of the whole complex start_score = sf( pose ) # hard code the new FoldTree specific to a glycosylated decoy of 3ay4 if not native: ft = FoldTree() ft.add_edge( 1, 215, -1 ) ft.add_edge( 1, 216, 1 ) # beginning of chain A to beginning of chain B ft.add_edge( 216, 431, -1 ) ft.add_edge( 1, 432, 2 ) # beginning of chain A to beginning of chain C ft.add_edge( 432, 591, -1 ) ft.add_edge( 579, 592, "ND2", "C1" ) ft.add_edge( 592, 596, -1 ) ft.add_edge( 594, 597, "O6", "C1" ) ft.add_edge( 597, 598, -1 ) ft.add_edge( 592, 599, "O6", "C1" ) ft.add_edge( 462, 600, "ND2", "C1" ) ft.add_edge( 600, 602, -1 ) ft.add_edge( 69, 603, 3 ) # ASN 297 A to core GlcNAc H ft.add_edge( 603, 607, -1 ) ft.add_edge( 605, 608, "O6", "C1" ) ft.add_edge( 608, 610, -1 ) ft.add_edge( 284, 611, 4 ) # ASN 297 B to core GlcNAc J ft.add_edge( 611, 615, -1 ) ft.add_edge( 613, 616, "O6", "C1" ) ft.add_edge( 616, 618, -1 ) # hard code the new FoldTree specific to the native 3ay4 else: ft = FoldTree() ft.add_edge( 1, 215, -1 ) ft.add_edge( 69, 216, 1 ) # ASN 297 A to core GlcNAc D ft.add_edge( 216, 220, -1 ) ft.add_edge( 218, 221, "O6", "C1" ) ft.add_edge( 221, 223, -1 ) ft.add_edge( 1, 224, 2 ) # beginning of chain A to beginning of chain B ft.add_edge( 224, 439, -1 ) ft.add_edge( 292, 440, 3 ) # ASN 297 B to core GlcNAc E ft.add_edge( 440, 444, -1 ) ft.add_edge( 442, 445, "O6", "C1" ) ft.add_edge( 445, 447, -1 ) ft.add_edge( 1, 448, 4 ) # beginning of chain A to beginning of chain C ft.add_edge( 448, 607, -1 ) ft.add_edge( 595, 608, "ND2", "C1" ) ft.add_edge( 608, 612, -1 ) ft.add_edge( 610, 613, "O6", "C1" ) ft.add_edge( 613, 614, -1 ) ft.add_edge( 608, 615, "O6", "C1" ) ft.add_edge( 478, 616, "ND2", "C1" ) ft.add_edge( 616, 618, -1 ) # make a temporary Pose and give the new FoldTree to it try: pmm.keep_history( True ) except: pass temp_pose = Pose() temp_pose.assign( pose ) temp_pose.fold_tree( ft ) try: pmm.apply( temp_pose ) except: pass # split apart the two Fc sugars one-by-one # if decoy structure -- the two glycans are now the last two new jumps if not native: jump = temp_pose.jump( 3 ) # sugar A vec = xyzVector_Real( 1000, 1000, 1000 ) jump.set_translation( vec ) temp_pose.set_jump( 3, jump ) try: pmm.apply( temp_pose ) except: pass jump = temp_pose.jump( 4 ) # sugar B vec = xyzVector_Real( 1000, 1000, 1000 ) jump.set_translation( vec ) temp_pose.set_jump( 4, jump ) try: pmm.apply( temp_pose ) except: pass # else native structure -- the two glycans are the first and third new jumps else: jump = temp_pose.jump( 1 ) # sugar A vec = xyzVector_Real( 1000, 1000, 1000 ) jump.set_translation( vec ) temp_pose.set_jump( 1, jump ) try: pmm.apply( temp_pose ) except: pass jump = temp_pose.jump( 3 ) # sugar B vec = xyzVector_Real( 1000, 1000, 1000 ) jump.set_translation( vec ) temp_pose.set_jump( 3, jump ) try: pmm.apply( temp_pose ) except: pass # score the split-apart Pose split_score = sf( temp_pose ) # get the pseudo-interface score # total - split = interface ( ie. interface + split = total ) pseudo_interface_energy = start_score - split_score return pseudo_interface_energy
# Prepack. packer.apply(pose) visualize(pose) print ' Score after prepacking:', sf(pose) sf.show(pose) # TEMP # Fold the polysaccharide. print 'Folding/Refining structural polysaccharide with', print pose.total_residue(), #print pose.residue(2).carbohydrate_info().short_name(), 'residues...' print 'residues...' # Prepare job distributor. jd = PyJobDistributor(args.output_folder + output_base_filename, args.n_decoys, sf) starting_pose = Pose() starting_pose.assign(pose) while not jd.job_complete: print ' Decoy', jd.current_num print ' Randomizing positions...' pose.assign(starting_pose) # Run protocol. print ' Refining...' refine_saccharide(pose, args) jd.output_decoy(pose) args.make_movie = False # Make no more than one movie.
n_res = initial_pose.total_residue() for seq_pos in range(1, n_res + 1): res = initial_pose.residue(seq_pos) for AA in AAs: if res.name1() != AA: variant_name = \ res.name1() + \ str(initial_pose.pdb_info().number(seq_pos)) + \ AA # Check for disulfide special case. if res.name() == 'CYD': disulfide_partner = res.residue_connection_partner( res.n_residue_connections()) temp_pose = Pose() temp_pose.assign(initial_pose) # (Packing causes seg fault if current CYS residue is not # also converted before mutating.) change_cys_state(seq_pos, 'CYS', temp_pose.conformation()) change_cys_state(disulfide_partner, 'CYS', temp_pose.conformation()) # Mutate protein. mutant_pose = mutate_residue(temp_pose, seq_pos, AA, PACK_RADIUS, sf) else: # Mutate protein. mutant_pose = mutate_residue(initial_pose, seq_pos, AA, PACK_RADIUS, sf)
# utility functions from file_mover_based_on_fasc import main as get_lowest_E_from_fasc from get_pose_metrics import main as get_pose_metrics ################################ #### INITIAL PROTOCOL SETUP #### ################################ # initialize Rosetta initialize_rosetta() ## load up the poses given from the arguments passed # native pose ( for comparison, really ) native_pose = Pose() native_pose.assign( load_pose( input_args.native_pdb_file ) ) # get the full path of the native PDB name native_pdb_filename_full_path = input_args.native_pdb_file native_pdb_filename = native_pdb_filename_full_path.split( '/' )[-1] native_pdb_name = native_pdb_filename.split( ".pdb" )[0] # change the name of the native PDB name native_pose_name = "native_pose" native_pose.pdb_info().name( native_pose_name ) # load up the working pose working_pose = Pose() working_pose.assign( load_pose( input_args.working_pdb_file ) )
jump_minimizer = MinMover(mm, sf, 'dfpmin', 0.01, True) #dock_hires = DockMCMProtocol() # is not rigid-body #dock_hires.set_scorefxn(sf) #dock_hires.set_partners(partners) # Prepare job distributor. jd = PyJobDistributor(new_filename[:-4], args.n_decoys, sf) if args.ref: if not args.ref.endswith('.pdb'): exit('Reference file must have the ".pdb" file extension.') ref_pose = pose_from_pdb(args.ref) # Begin docking protocol. print '\nDocking...' pose = Pose() # working pose last_pose = Pose() # needed because I have not modified C++ code while not jd.job_complete: print ' Decoy', jd.current_num print ' Randomizing positions...' pose.assign(starting_pose) if not args.local: randomizerA.apply(pose) randomizerB.apply(pose) visualize(pose) slider.apply(pose) visualize(pose) print ' Ligand center is', print pose.jump(JUMP_NUM).get_translation().length, print 'Angstroms from protein center.'
pdb_chain = 'A', do_pack = False, do_min = False ) ) # mutate chain B mutant_pose.assign( mutate_residue( pdb_num, new_residue, mutant_pose, sf, pdb_num = True, pdb_chain = 'B', do_pack = False, do_min = False ) ) except: continue ########################### #### gradient pack/min #### ########################### min_pose = Pose() for ii in range( 1 ): working_pose = mutant_pose.clone() for jj in range( 1 ): # packing task = standard_packer_task( working_pose ) task.or_include_current( True ) task.restrict_to_repacking() rtm = RotamerTrialsMover( sf, task ) rtm.apply( working_pose ) # minimizing mm = MoveMap() mm.set_bb( True ) mm.set_chi( True )
def sequence_mapping(pdb_file, sequence_file, score_file, relax, jobs): if os.path.exists( os.getcwd() + '/' + pdb_file ) and pdb_file: init() pose = Pose() score_fxn = create_score_function('talaris2014') if (relax): refinement = FastRelax(score_fxn) pose_from_pdb(pose, pdb_file) if os.path.exists( os.getcwd() + '/' + sequence_file ) and sequence_file: fid = open(sequence_file,'r') fod = open(score_file,'w') data = fid.readlines() fid.close() sequences = [] read_seq = False for i in data: if not len(i): continue elif i[0] == '>': read_seq = True fasta_line = re.split(':|\s+|\||\\n',i[1:]) name_cpt=0 while (name_cpt<len(fasta_line) and not fasta_line[name_cpt]): name_cpt+=1 if name_cpt<len(fasta_line): job_output = fasta_line[name_cpt] else: print 'Error: Please enter an identifier for sequences in your fasta file' exit(1) elif read_seq: seq=list(i) resn=1 for j in i: if j!='\n' and resn<=pose.total_residue(): mutator = MutateResidue( resn , one_to_three[j] ) mutator.apply( pose ) resn+=1 elif resn>pose.total_residue(): print 'WARNING: couldn\'t mutate residue number '+str(resn)+', sequence too long for backbone...' resn+=1 if (relax): jd = PyJobDistributor(job_output, jobs, score_fxn) jd.native_pose = pose scores = [0]*(jobs) counter = 0 decoy=Pose() while not jd.job_complete: decoy.assign(pose) resn=1 refinement.apply(decoy) jd.output_decoy(decoy) scores[counter]=score_fxn(decoy) counter+=1 for i in range(0, len(scores)): fod.writelines(job_output + '_' + str(i+1) + ' : '+str(scores[i])+'\n') else: pose_packer = standard_packer_task(pose) pose_packer.restrict_to_repacking() packmover = PackRotamersMover(score_fxn, pose_packer) packmover.apply(pose) fod.writelines(job_output+' : '+str(score_fxn(pose))+'\n') pose.dump_pdb(job_output+'_1.pdb') else: print 'Bad fasta format' exit(1) fod.close() else: print 'Please provide a valid sequence file, '+sequence_file+' doesn\'t exist' else: print 'Please provide a valid backbone file, '+pdb_file+' doesn\'t exist'
def pose_from_sequence(seq, res_type='fa_standard', name='', chain_id='A'): """ Returns a pose generated from amino acid single letters in <seq> using the <res_type> ResidueType, the new pose's PDBInfo is named <name> and all residues have chain ID <chain_id> example: pose=pose_from_sequence('LIGAND') See also: Pose make_pose_from_sequence pose_from_pdb pose_from_rcsb """ pose = Pose() make_pose_from_sequence(pose, seq, res_type) #pdb_info = rosetta.core.pose.PDBInfo(pose.total_residue()) # actual, for other code pdb_info = PDBInfo(pose.total_residue()) # create a PDBInfo object for i in range(0, pose.total_residue()): if pose.residue(i + 1).is_protein(): # set to a more reasonable default pose.set_phi(i + 1, -150) pose.set_psi(i + 1, 150) pose.set_omega(i + 1, 180) # set PDBInfo info for chain and number #pdb_info.chain(i+1,chain_id) #pdb_info.number(i+1,i+1) #### you can alternatively use the deprecated method set_extended_torsions #### which requires a Pose and a Loop object...so make a large loop #set_extended_torsions( pose , Loop ( 1 , pose.total_residue() ) ) # set the PDBInfo pose.pdb_info(pdb_info) # default name to first 3 letters if not name: name = seq[:4] pose.pdb_info().name(name) # print pose return pose
def score_all(pdb,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type): cp_init_pdb = Pose() cp_init_pdb.assign(pdb) chains = cp_init_pdb.split_by_chain() initial_poseAB = Pose() initial_poseAB.assign(pdb) initial_poseAC = Pose() initial_poseAC.assign(pdb) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) init_a=sf(chains[1]) init_b=sf(chains[2]) init_c=sf(chains[3]) init_inter_AB=InterfaceEnergy_split(initial_poseAB) init_inter_AC=InterfaceEnergy_split(initial_poseAC) data=[init_a,init_b,init_c,init_inter_AB,init_inter_AC] thresh=[threshold_a,threshold_b,threshold_c,threshold_inter_ab,threshold_inter_ac] init_score=0 #score everything (bind both) if score_type == 1: init_score=calc_x_list(data,beta,thresh) #pentalize binding B' (bind B and not B') if score_type == 2: init_score=calc_x_list_2(data,beta,thresh) #bind just the best duplicate (bind max) if score_type == 3: init_score=calc_x_list_3(data,beta,thresh) #just stabilitys and no binding (no bind) if score_type == 4: data=[init_a,init_b,init_c] thresh=[threshold_a,threshold_b,threshold_c] init_score=calc_x_list(data,beta,thresh)
def mutate_residue(pose, mutant_position, mutant_aa, pack_radius=0.0, pack_scorefxn=''): """ Replaces the residue at <mutant_position> in <pose> with <mutant_aa> and repack any residues within <pack_radius> Angstroms of the mutating residue's center (nbr_atom) using <pack_scorefxn> note: <mutant_aa> is the single letter name for the desired ResidueType example: mutate_residue(pose,30,A) See also: Pose PackRotamersMover MutateResidue pose_from_sequence """ #### a MutateResidue Mover exists similar to this except it does not pack #### the area around the mutant residue (no pack_radius feature) #mutator = MutateResidue( mutant_position , mutant_aa ) #mutator.apply( test_pose ) if pose.is_fullatom() == False: IOError('mutate_residue only works with fullatom poses') test_pose = Pose() test_pose.assign(pose) # create a standard scorefxn by default if not pack_scorefxn: pack_scorefxn = create_score_function('standard') task = standard_packer_task(test_pose) # the Vector1 of booleans (a specific object) is needed for specifying the # mutation, this demonstrates another more direct method of setting # PackerTask options for design aa_bool = rosetta.utility.vector1_bool() # PyRosetta uses several ways of tracking amino acids (ResidueTypes) # the numbers 1-20 correspond individually to the 20 proteogenic amino acids # aa_from_oneletter returns the integer representation of an amino acid # from its one letter code # convert mutant_aa to its integer representation mutant_aa = aa_from_oneletter_code(mutant_aa) # mutation is performed by using a PackerTask with only the mutant # amino acid available during design # to do this, construct a Vector1 of booleans indicating which amino acid # (by its numerical designation, see above) to allow for i in range(1, 21): # in Python, logical expression are evaluated with priority, thus the # line below appends to aa_bool the truth (True or False) of the # statement i == mutant_aa aa_bool.append(i == mutant_aa) # modify the mutating residue's assignment in the PackerTask using the # Vector1 of booleans across the proteogenic amino acids task.nonconst_residue_task(mutant_position).restrict_absent_canonical_aas( aa_bool) # prevent residues from packing by setting the per-residue "options" of # the PackerTask center = pose.residue(mutant_position).nbr_atom_xyz() for i in range(1, pose.total_residue() + 1): # only pack the mutating residue and any within the pack_radius if not i == mutant_position or center.distance_squared( test_pose.residue(i).nbr_atom_xyz()) > pack_radius**2: task.nonconst_residue_task(i).prevent_repacking() # apply the mutation and pack nearby residues packer = PackRotamersMover(pack_scorefxn, task) packer.apply(test_pose) return test_pose
initialize_rosetta() # get the full path to the original native PDB filename orig_pdb_filename_full_path = input_args.native_pdb_file orig_pdb_filename = orig_pdb_filename_full_path.split( '/' )[-1] orig_pdb_name = orig_pdb_filename.split( ".pdb" )[0] # make the directory for the native PDB in the base_structs_dir # this is where packed and minimized versions of the native will lie structure_dir = base_structs_dir + orig_pdb_name if not os.path.isdir( structure_dir ): os.mkdir( structure_dir ) decoy_pdb_name = structure_dir + '/' + orig_pdb_name # sets up the input native PDB as being the base pose native_pose = Pose() native_pose.assign( load_pose( orig_pdb_filename_full_path ) ) native_pose.pdb_info().name( "native" ) # automatically populates chain and residue information into holder for native 3ay4 native_pose_info = hold_chain_and_res_designations_3ay4() native_pose_info.native() # use the scorefxn_file to set up additional weights if input_args.scorefxn_file is not None: sf = make_fa_scorefxn_from_file( input_args.scorefxn_file ) # else create a fa_scorefxn else: sf = get_fa_scorefxn()
initialize_rosetta() pmm = PyMOL_Mover() pmm.keep_history( True ) #working = load_pose( "just_native_sugar.pdb" ) native = load_pose( "/Users/Research/pyrosetta_dir/project_structs/lowest_E_double_pack_and_min_only_native_crystal_struct_3ay4_Fc_FcgRIII.pdb" ) native.pdb_info().name( "native" ) #pmm.apply( native ) #working = load_pose( "just_native_ASN.pdb" ) working = load_pose( "/Users/Research/pyrosetta_dir/project_structs/lowest_E_double_pack_and_min_only_native_crystal_struct_3ay4_Fc_FcgRIII_removed_Fc_sugar.pdb" ) working.pdb_info().name( "working" ) decoy = Pose() decoy.assign( working ) decoy.pdb_info().name( "decoy" ) #pmm.apply( decoy ) glycosylate_these_ASN = [ 69, 284 ] for ASN in glycosylate_these_ASN: glycosylate_pose_by_file( decoy, ASN, "ND2", "/Users/Research/antibody_project/send_to_louis/project_glyco_files/3ay4_Fc_Glycan.iupac" ) decoy.pdb_info().name( "decoy" ) #pmm.apply( decoy ) for decoy_res in glycosylate_these_ASN: native_res = decoy_to_native_res_map[ decoy_res ] print decoy_res, native_res