# Fasta file option fasta_filename = options.fasta_filename if fasta_filename: # defaults to off, empty string f = open(fasta_filename, 'r') # open the file sequence = f.readlines() # read the text f.close() # close it # removing the trailing "\n" and any header lines sequence = [line.strip() for line in sequence if not '>' in line] sequence = ''.join(sequence) # combine into a single sequence elif options.sequence: sequence = options.sequence else: pdb_filename = options.pdb_filename #Default is the test PDB, not an empty string. pose_from_file(pose, pdb_filename) sequence = pose.sequence() #Checks for the sequence in a fasta, then direct, and finally from a PDB file. If no PDB file is given, it will load the default. # fragment files options long_frag_filename = options.long_frag_filename long_frag_length = int(options.long_frag_length) short_frag_filename = options.short_frag_filename short_frag_length = int(options.short_frag_length) # folding protocol options kT = float(options.kT) long_inserts = int(options.long_inserts) short_inserts = int(options.short_inserts) cycles = int(options.cycles) # PyJobDistributor options jobs = int(options.jobs) job_output = options.job_output
def packer_task(pose, PDB_out=False): """ Demonstrates the syntax necessary for basic usage of the PackerTask object performs demonstrative sidechain packing and selected design using <pose> and writes structures to PDB files if <PDB_out> is True """ # create a copy of the pose test_pose = Pose() test_pose.assign(pose) # this object is contained in PyRosetta v2.0 and above pymover = PyMOLMover() # create a standard ScoreFunction scorefxn = get_fa_scorefxn( ) # create_score_function_ws_patch('standard', 'score12') ############ # PackerTask # a PackerTask encodes preferences and options for sidechain packing, an # effective Rosetta methodology for changing sidechain conformations, and # design (mutation) # a PackerTask stores information on a per-residue basis # each residue may be packed or designed # PackerTasks are handled slightly differently in PyRosetta ####pose_packer = PackerTask() # this line will not work properly pose_packer = standard_packer_task(test_pose) # the pose argument tells the PackerTask how large it should be # sidechain packing "optimizes" a pose's sidechain conformations by cycling # through (Dunbrack) rotamers (sets of chi angles) at a specific residue # and selecting the rotamer which achieves the lowest score, # enumerating all possibilities for all sidechains simultaneously is # impractically expensive so the residues to be packed are individually # optimized in a "random" order # packing options include: # -"freezing" the residue, preventing it from changing conformation # -including the original sidechain conformation when determining the # lowest scoring conformation pose_packer.restrict_to_repacking() # turns off design pose_packer.or_include_current(True) # considers original conformation print(pose_packer) # packing and design can be performed by a PackRotamersMover, it requires # a ScoreFunction, for optimizing the sidechains and a PackerTask, # setting the packing and design options packmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_packer) scorefxn(pose) # to prevent verbose output on the next line print('\nPre packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('original') # for PyMOLMover pymover.apply(test_pose) packmover.apply(test_pose) print('Post packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('packed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('packed.pdb') # since the PackerTask specifies how the sidechains change, it has been # extended to include sidechain constitutional changes allowing # protein design, this method of design is very similar to sidechain # packing; all rotamers of the possible mutants at a single residue # are considered and the lowest scoring conformation is selected # design options include: # -allow all amino acids # -allow all amino acids except cysteine # -allow specific amino acids # -prevent specific amino acids # -allow polar amino acids only # -prevent polar amino acids # -allow only the native amino acid # the myriad of packing and design options can be set manually or, more # commonly, using a specific file format known as a resfile # resfile syntax is explained at: # http://www.rosettacommons.org/manuals/archive/rosetta3.1_user_guide/file_resfiles.html # manually setting deign options is tedious, the methods below are handy # for creating resfiles # mutate the "middle" residues center = test_pose.total_residue() // 2 specific_design = {} for i in range(center - 2, center + 3): specific_design[i] = 'ALLAA' # write a resfile to perform these mutations generate_resfile_from_pose(test_pose, 'sample_resfile', False, specific=specific_design) # setup the design PackerTask, use the generated resfile pose_design = standard_packer_task(test_pose) rosetta.core.pack.task.parse_resfile(test_pose, pose_design, 'sample_resfile') print(pose_design) # prepare a new structure test_pose.assign(pose) # perform design designmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_design) print( '\nDesign with all proteogenic amino acids at (pose numbered)\ residues', center - 2, 'to', center + 2) print('Pre-design score:', scorefxn(test_pose)) print( 'Pre-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) designmover.apply(test_pose) # perform design print('\nPost-design score:', scorefxn(test_pose)) print( 'Post-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) test_pose.pdb_info().name('designed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('designed.pdb')
def interface_ddG(pose, mutant_position, mutant_aa, movable_jumps, scorefxn='', cutoff=8.0, out_filename=''): # 1. create a reference copy of the pose wt = Pose() # the "wild-type" wt.assign(pose) # 2. setup a specific default ScoreFunction if not scorefxn: # this is a modified version of the scoring function discussed in # PNAS 2002 (22)14116-21, without environment dependent hbonding scorefxn = ScoreFunction() scorefxn.set_weight(fa_atr, 0.44) scorefxn.set_weight(fa_rep, 0.07) scorefxn.set_weight(fa_sol, 1.0) scorefxn.set_weight(hbond_bb_sc, 0.5) scorefxn.set_weight(hbond_sc, 1.0) # 3. create a copy of the pose for mutation mutant = Pose() mutant.assign(pose) # 4. mutate the desired residue # the pack_radius argument of mutate_residue (see below) is redundant # for this application since the area around the mutation is already # repacked mutant = mutate_residue(mutant, mutant_position, mutant_aa, 0.0, scorefxn) # 5. calculate the "interaction energy" # the method calc_interaction_energy is exposed in PyRosetta however it # does not alter the protein conformation after translation and may miss # significant interactions # an alternate method for manually separating and scoring is provided called # calc_binding_energy (see Interaction Energy vs. Binding Energy below) wt_score = calc_binding_energy(wt, scorefxn, mutant_position, cutoff) mut_score = calc_binding_energy(mutant, scorefxn, mutant_position, cutoff) #### the method calc_interaction_energy separates an input pose by #### 500 Angstroms along the jump defined in a Vector1 of jump numbers #### for movable jumps, a ScoreFunction must also be provided #### if setup_foldtree has not been applied, calc_interaction_energy may be #### wrong (since the jumps may be wrong) #wt_score = calc_interaction_energy(wt, scorefxn, movable_jumps) #mut_score = calc_interaction_energy(mutant, scorefxn, movable_jumps) ddg = mut_score - wt_score # 6. output data (optional) # -export the mutant structure to PyMOL (optional) mutant.pdb_info().name(pose.sequence()[mutant_position - 1] + str(pose.pdb_info().number(mutant_position)) + mutant.sequence()[mutant_position - 1]) pymover = PyMOLMover() scorefxn(mutant) pymover.apply(mutant) pymover.send_energy(mutant) # -write the mutant structure to a PDB file if out_filename: mutant.dump_pdb(out_filename) return ddg
def design_with_config(**config) -> dict: start_time = time.time() # Runtime measuring print('DESIGNING') ref15 = get_fa_scorefxn() # REF15 if config is 'ref15': scfxn = ref15 else: scfxn = create_scfxn.creat_scfxn_from_config( config=config) # optimization score Function # pick random # pose = random.choice(pdbs) prot_name = random.choice(list(pdbs.keys())) pose = pdbs[prot_name] # pose = pdbs['1K9P'] # prot_name = '1K9P' # copy pose for comparison after design native_pose = Pose() native_pose.assign(pose) resfile = "./design.resfile" with open(resfile, "w") as f: f.write("ALLAAxc \n") f.write("start\n") # def run(pose): taskf = prs.rosetta.core.pack.task.TaskFactory() taskf.push_back( prs.rosetta.core.pack.task.operation.InitializeFromCommandline()) taskf.push_back(prs.rosetta.core.pack.task.operation.ReadResfile(resfile)) packer = prs.rosetta.protocols.minimization_packing.PackRotamersMover( scfxn) packer.task_factory(taskf) taskf.create_task_and_apply_taskoperations(pose) packer.apply(pose) # TODO: What defines our loss, for now use REF15 or bloss62 matrix bloss62 = substitution_matrices.load("BLOSUM62") # compute normalizes similarity similar = pairwise2.align.globaldx( pose.sequence(), native_pose.sequence(), bloss62, score_only=True) / len(pose.sequence()) # compute Rosetta SimpleMetrics PSSM pssm_score = pssms[prot_name].calculate(pose) print('scored with pssm ') # moritz says its okay to return energy normalized by length # check if pose can be pickled fast and returned took = time.time() - start_time # This has to be serializable in order to get pickled and send back to parent result = { "sequence": pose.sequence(), "pose": PackedPose(pose), "prot_len": len(pose.sequence()), "prot_name": prot_name, "bloss62": -similar, "ref15": (ref15(pose) / len(pose.sequence())), "scfxn": (scfxn(pose) / len(pose.sequence())), "pssm": -pssm_score, "runtime": took } print('DESIGN_DONE: ', result) print("Took: {} to run design on length {}".format( time.strftime("%H: %M: %S", time.gmtime(took)), len(pose.sequence()))) return result
def scanning(pdb_filename, partners, mutant_aa='A', interface_cutoff=8.0, output=False, trials=1, trial_output=''): """ Performs "scanning" at an interface within <pdb_filename> between <partners> by mutating relevant residues to <mutant_aa> and repacking residues within <pack_radius> Angstroms, further repacking all residues within <interface_cutoff> of the interface residue, scoring the complex and subtracting the score of a pose with the partners separated by 500 Angstroms. <trials> scans are performed (to average results) with summaries written to <trial_output>_(trial#).txt. Structures are exported to a PyMOL instance. """ # 1. create a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. setup the docking FoldTree and other related parameters dock_jump = 1 movable_jumps = Vector1([dock_jump]) protocols.docking.setup_foldtree(pose, partners, movable_jumps) # 3. create ScoreFuncions for the Interface and "ddG" calculations # the pose's Energies objects MUST be updated for the Interface object to # work normally scorefxn = get_fa_scorefxn() # create_score_function('standard') scorefxn(pose) # needed for proper Interface calculation # setup a "ddG" ScoreFunction, custom weights ddG_scorefxn = ScoreFunction() ddG_scorefxn.set_weight(core.scoring.fa_atr, 0.44) ddG_scorefxn.set_weight(core.scoring.fa_rep, 0.07) ddG_scorefxn.set_weight(core.scoring.fa_sol, 1.0) ddG_scorefxn.set_weight(core.scoring.hbond_bb_sc, 0.5) ddG_scorefxn.set_weight(core.scoring.hbond_sc, 1.0) # 4. create an Interface object for the pose interface = Interface(dock_jump) interface.distance(interface_cutoff) interface.calculate(pose) # 5. create a PyMOLMover for sending output to PyMOL (optional) pymover = PyMOLMover() pymover.keep_history(True) # for multiple trajectories pymover.apply(pose) pymover.send_energy(pose) # 6. perform scanning trials # the large number of packing operations introduces a lot of variability, # for best results, perform several trials and average the results, # these score changes are useful to QUALITATIVELY defining "hotspot" # residues # this script does not use a PyJobDistributor since no PDB files are output for trial in range(trials): # store the ddG values in a dictionary ddG_mutants = {} for i in range(1, pose.total_residue() + 1): # for residues at the interface if interface.is_interface(i) == True: # this way you can TURN OFF output by providing False arguments # (such as '', the default) filename = '' if output: filename = pose.pdb_info().name()[:-4] + '_' +\ pose.sequence()[i-1] +\ str(pose.pdb_info().number(i)) + '->' + mutant_aa # determine the interace score change upon mutation ddG_mutants[i] = interface_ddG(pose, i, mutant_aa, movable_jumps, ddG_scorefxn, interface_cutoff, filename) # output results print('=' * 80) print('Trial', str(trial + 1)) print( 'Mutants (PDB numbered)\t\"ddG\" (interaction dependent score change)' ) residues = list(ddG_mutants.keys() ) # list(...) conversion is for python3 compatbility residues.sort() # easier to read display = [ pose.sequence()[i - 1] + str(pose.pdb_info().number(i)) + mutant_aa + '\t' + str(ddG_mutants[i]) + '\n' for i in residues ] print(''.join(display)[:-1]) print('=' * 80) # write to file f = open(trial_output + '_' + str(trial + 1) + '.txt', 'w') f.writelines(display) f.close() #### alternate output using scanning_analysis (see below), only display #### mutations with "deviant" score changes print('Likely Hotspot Residues') for hotspot in scanning_analysis(trial_output): print(hotspot) print('=' * 80)