def has_interface(self, pose: pyrosetta.Pose, interface: str) -> bool: if pose is None: pose = self.pose pose2pdb = pose.pdb_info().pose2pdb have_chains = {pose2pdb(r).split()[1] for r in range(1, pose.total_residue() + 1)} want_chains = set(interface.replace('_', '')) return have_chains == want_chains
def get_NGL_selection_from_AtomID(pose: pyrosetta.Pose, atom_id: pyrosetta.AtomID): pose_resi = atom_id.rsd() residue = pose.residue(pose_resi) atom_name = residue.atom_name(atom_id.atomno()).strip() pdb_resi, chain = pose.pdb_info().pose2pdb(pose_resi).strip().split() return f'[{residue.name3().strip()}]{pdb_resi}:{chain}.{atom_name}'
def make_mutant(self, pose: pyrosetta.Pose, mutation: str, chain='A') -> pyrosetta.Pose: """ Make a point mutant (``A23D``). :param pose: pose :param mutation: :param chain: :return: """ mutant = pose.clone() pose2pdb = pose.pdb_info().pdb2pose rex = re.match('(\w)(\d+)(\w)', mutation) r = pose2pdb(res=int(rex.group(2)), chain=chain) rn = pose.residue(r).name1() assert rn == rex.group( 1 ), f'residue {r}(pose)/{rex.group(2)}(pdb) is a {rn}, not a {rex.group()}' MutateResidue = pyrosetta.rosetta.protocols.simple_moves.MutateResidue MutateResidue(target=r, new_res=self._name3[rex.group(3)]).apply(mutant) self.relax_around_mover(mutant, int(rex.group(2)), chain, distance=12, cycles=15) return mutant
def get_neighbour_vector( self, pose: pyrosetta.Pose, resi: int, chain: str, distance: int, include_focus_in_subset: bool = True, own_chain_only: bool = False ) -> pyrosetta.rosetta.utility.vector1_bool: resi_sele = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector( ) if chain is None: # pose numbering. resi_sele.set_index(resi) else: resi_sele.set_index(pose.pdb_info().pdb2pose(chain=chain, res=resi)) NeighborhoodResidueSelector = pyrosetta.rosetta.core.select.residue_selector.NeighborhoodResidueSelector neigh_sele = NeighborhoodResidueSelector( resi_sele, distance=distance, include_focus_in_subset=include_focus_in_subset) if own_chain_only and chain is not None: chain_sele = pyrosetta.rosetta.core.select.residue_selector.ChainSelector( chain) and_sele = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector( neigh_sele, chain_sele) return and_sele.apply(pose) else: return neigh_sele.apply(pose)
def pose_fx(pose: pyrosetta.Pose): """ Histidine in delta. """ pdb2pose = pose.pdb_info().pdb2pose r = pdb2pose(res=41, chain='A') MutateResidue = pyrosetta.rosetta.protocols.simple_moves.MutateResidue MutateResidue(target=r, new_res='HIS').apply(pose)
def add_bfactor_from_score(pose: pyrosetta.Pose): """ Adds the bfactors from total_score. Snippet for testing in Jupyter >>> import nglview as nv >>> view = nv.show_rosetta(pose) >>> # view = nv.show_file('test.cif') >>> view.clear_representations() >>> view.add_tube(radiusType="bfactor", color="bfactor", radiusScale=0.10, colorScale="RdYlBu") >>> view ``replace_res_remap_bfactors`` may have been a cleaner strategy. This was quicker to write. If this fails, it may be because the pose was not scored first. """ if pose.pdb_info().obsolete(): raise ValueError( 'Pose pdb_info is flagged as obsolete (change `pose.pdb_info().obsolete(False)`)' ) # scores energies = pose.energies() def get_res_score(res): total_score = pyrosetta.rosetta.core.scoring.ScoreType.total_score # if pose.residue(res).is_polymer() try: return energies.residue_total_energies(res)[total_score] except: return float('nan') # the array goes from zero (nan) to n_residues total_scores = np.array( [float('nan')] + [get_res_score(res) for res in range(1, pose.total_residue() + 1)]) mask = np.isnan(total_scores) total_scores -= np.nanmin(total_scores) total_scores *= 100 / np.nanmax(total_scores) total_scores = np.nan_to_num(total_scores, nan=100) total_scores[mask] = 0. # add to pose pdb_info = pose.pdb_info() for res in range(pose.total_residue()): for i in range(pose.residue(res + 1).natoms()): pdb_info.bfactor(res + 1, i + 1, total_scores[res + 1])
def poised_pose_fx(pose: pyrosetta.Pose): """ Histidine in delta and cysteine in thiolate. """ pdb2pose = pose.pdb_info().pdb2pose r = pdb2pose(res=41, chain='A') MutateResidue = pyrosetta.rosetta.protocols.simple_moves.MutateResidue MutateResidue(target=r, new_res='HIS_D').apply(pose) r = pdb2pose(res=145, chain='A') MutateResidue(target=r, new_res='CYZ').apply(pose)
def copy_pdb_info(self, original_pose: pyrosetta.Pose, final_pose: pyrosetta.Pose): # get original residue info ResInfo.pdb_info = original_pose.pdb_info() original = [] previous = None for row in self: if row[0] == 0: # insertion code is a letter previous.icode raise NotImplementedError ri = ResInfo.get(row[0]) original.append(ri) previous = ri # set info pdb_info = final_pose.pdb_info() for i, row in enumerate(original): row.set(i + 1, pdb_info) pdb_info().obsolete(False)
def pose_from_sequence( seq , res_type = 'fa_standard' , name = '' , chain_id = 'A' ): """ Returns a pose generated from amino acid single letters in <seq> using the <res_type> ResidueType, the new pose's PDBInfo is named <name> and all residues have chain ID <chain_id> example: pose=pose_from_sequence('LIGAND') See also: Pose make_pose_from_sequence pose_from_file pose_from_rcsb """ pose=Pose() make_pose_from_sequence(pose,seq,res_type) #pdb_info = rosetta.core.pose.PDBInfo(pose.total_residue()) # actual, for other code pdb_info = PDBInfo(pose.total_residue()) # create a PDBInfo object for i in range(0,pose.total_residue()): if pose.residue(i+1).is_protein(): # set to a more reasonable default pose.set_phi(i+1,-150) pose.set_psi(i+1,150) pose.set_omega(i+1,180) # set PDBInfo info for chain and number #pdb_info.chain(i+1,chain_id) #pdb_info.number(i+1,i+1) #### you can alternatively use the deprecated method set_extended_torsions #### which requires a Pose and a Loop object...so make a large loop #set_extended_torsions( pose , Loop ( 1 , pose.total_residue() ) ) # set the PDBInfo pose.pdb_info(pdb_info) # default name to first 3 letters if not name: name = seq[:4] pose.pdb_info().name(name) # print pose return pose
def movement(self, original: pyrosetta.Pose, resi: int, chain: str, distance: int, trials: int = 50, temperature: int = 1.0, replicate_number: int = 10): """ This method adapted from a notebook of mine, but not from an official source, is not well written. It should be a filter and score combo. It returns the largest bb_rmsd of the pdb residue resi following backrub. """ # this code is experimental n = self.get_neighbour_vector(pose=original, resi=resi, chain=chain, distance=distance, own_chain_only=False) # resi if chain is None: # pose numbering. target_res = resi else: target_res = original.pdb_info().pdb2pose(chain=chain, res=resi) # prep rv = pyrosetta.rosetta.core.select.residue_selector.ResidueVector(n) backrub = pyrosetta.rosetta.protocols.backrub.BackrubMover() backrub.set_pivot_residues(rv) # https://www.rosettacommons.org/docs/latest/scripting_documentation/RosettaScripts/Movers/movers_pages/GenericMonteCarloMover monégasque = pyrosetta.rosetta.protocols.monte_carlo.GenericMonteCarloMover(maxtrials=trials, max_accepted_trials=trials, # gen.max_accepted_trials() = 0 task_scaling=5, # gen.task_scaling() mover=backrub, temperature=temperature, sample_type='low', drift=True) monégasque.set_scorefxn(self.scorefxn) # monégasque.add_filter(filters , False , 0.005 , 'low' , True ) # define the first 4 atoms (N C CA O) am = pyrosetta.rosetta.utility.vector1_unsigned_long(4) for i in range(1, 5): am[i] = i # find most deviant best_r = 0 for i in range(replicate_number): variant = original.clone() monégasque.apply(variant) if monégasque.accept_counter() > 0: variant = monégasque.last_accepted_pose() # pretty sure redundant # bb_rmsd is all residues: pyrosetta.rosetta.core.scoring.bb_rmsd(pose, ori) r = pyrosetta.rosetta.core.scoring.residue_rmsd_nosuper(variant.residue(target_res), original.residue(target_res), am) if r > best_r: best_r = r return best_r
def pose2pandas(pose: pyrosetta.Pose, scorefxn: pyrosetta.ScoreFunction) -> pd.DataFrame: """ Return a pandas dataframe from the scores of the pose :param pose: :return: """ pose.energies().clear_energies() scorefxn(pose) scores = pd.DataFrame(pose.energies().residue_total_energies_array()) pi = pose.pdb_info() scores['residue'] = scores.index.to_series() \ .apply(lambda r: pose.residue( r +1) \ .name1() + pi.pose2pdb( r +1) ) return scores
def clarify_selector(selector: pyrosetta.rosetta.core.select.residue_selector. ResidueSelector, pose: pyrosetta.Pose) -> List['str']: """ Given a selector and pose return a list of residues in NGL selection format Example, [CMP]787:H :param selector: :param pose: :return: list of residues in NGL selection format """ pose2pdb = pose.pdb_info().pose2pdb vector = selector.apply(pose) rv = pyrosetta.rosetta.core.select.residue_selector.ResidueVector(vector) return [ f'[{pose.residue(r).name3()}]{pose2pdb(r).strip().replace(" " ,":")}' for r in rv ]
def __init__(self, mutation_name: str, chain: str, pose: pyrosetta.Pose): self.mutation = self.parse_mutation(mutation_name) rex = re.match('(\w)(\d+)(\w)', self.mutation) self.pdb_resi = int(rex.group(2)) self.chain = chain self.from_resn1 = rex.group(1) self.from_resn3 = self._name3[rex.group(1)] self.to_resn1 = rex.group(3) self.to_resn3 = self._name3[rex.group(3)] pose2pdb = pose.pdb_info().pdb2pose self.pose_resi = pose2pdb(res=self.pdb_resi, chain=self.chain) if self.pose_resi != 0: self.pose_residue = pose.residue(self.pose_resi) self.pose_resn1 = self.pose_residue.name1() self.pose_resn3 = self.pose_residue.name3() else: self.pose_residue = None self.pose_resn1 = None self.pose_resn3 = None
def relax_around_mover(self, pose: pyrosetta.Pose, resi: int, chain: str, scorefxn=None, cycles=5, distance=5, cartesian=False) -> None: """ Relaxes pose ``distance`` around resi:chain. :param resi: PDB residue number. :param chain: :param pose: :param scorefxn: :param cycles: of relax (3 quick, 15 thorough) :param distance: :param cartesian: :return: """ if scorefxn is None: scorefxn = pyrosetta.get_fa_scorefxn() #self._cst_score(scorefxn) movemap = pyrosetta.MoveMap() #### resi_sele = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector( ) resi_sele.set_index(pose.pdb_info().pdb2pose(chain=chain, res=resi)) NeighborhoodResidueSelector = pyrosetta.rosetta.core.select.residue_selector.NeighborhoodResidueSelector neigh_sele = NeighborhoodResidueSelector(resi_sele, distance=distance, include_focus_in_subset=True) n = neigh_sele.apply(pose) movemap.set_bb(allow_bb=n) movemap.set_chi(allow_chi=n) relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn, cycles) relax.set_movemap_disables_packing_of_fixed_chi_positions(True) relax.set_movemap(movemap) relax.cartesian(cartesian) relax.apply(pose)
print('norm of xyz at5N:', at5N.xyz().norm) print(res5.atoms()) # <-- Still missing atomN = AtomID(1, 5) atomCA = AtomID(2, 5) atomC = AtomID(3, 5) print('bond length of N-CA in residue 5 is ') print(pose.conformation().bond_length(atomN, atomCA)) print('bond angle of N-CA-C in residue 5 is ') print(pose.conformation().bond_angle(atomN, atomCA, atomC)) print('setting bond length of N-CA in residue 5 to 1.5A ') pose.conformation().set_bond_length(atomN, atomCA, 1.5) print('setting bond angle of N-CA-C in residue 5 to 90 ') pose.conformation().set_bond_angle(atomN, atomCA, atomC, 90) # TODO: make the above work with atom objects instead of atomIDs print('pose was generated from this pdb file: ', pose.pdb_info().name()) print('pose numbering for chain A, residue 5, is ', pose.pdb_info().pdb2pose('A', 5)) print('pdb chain letter and residue number for residue 5, is ', pose.pdb_info().pose2pdb(5)) # TODO: pdb_info.* does not tab-complete # Creating residue example chm = rosetta.core.chemical.ChemicalManager.get_instance() rts = chm.residue_type_set('fa_standard') ala = rosetta.core.conformation.ResidueFactory.create_residue( rts.name_map('ALA')) print(ala)
def sample_refinement(pdb_filename, kT=1.0, smallmoves=3, shearmoves=5, backbone_angle_max=7, cycles=9, jobs=1, job_output='refine_output'): """ Performs fullatom structural refinement on the input <pdb_filename> by perturbing backbone torsion angles with a maximum perturbation of <backbone_angle_max> for <cycles> trials of <smallmoves> perturbations of a random residue's phi or psi and <shearmoves> perturbations of a random residue's phi and the preceding residue's psi followed by gradient based backbone torsion angle minimization and sidechain packing with an acceptance criteria scaled by <kT>. <jobs> trajectories are performed, continually exporting structures to a PyMOL instance. Output structures are named <job_output>_(job#).pdb. """ # 1. create a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. create a reference copy of the pose in fullatom starting_pose = Pose() starting_pose.assign(pose) # 3. create a standard ScoreFunction #### implement the desired ScoreFunction here scorefxn = get_fa_scorefxn() # create_score_function('standard') #### If you wish to use the ClassRelax protocol, uncomment the following #### line and comment-out the protocol setup below #refinement = protocols.relax.ClassicRelax( scorefxn ) #### Setup custom high-resolution refinement protocol #### backbone refinement protocol # 4. create a MoveMap, all backbone torsions free movemap = MoveMap() movemap.set_bb(True) # 5. create a SmallMover # a SmallMover perturbs a random (free in the MoveMap) residue's phi or psi # torsion angle for an input number of times and accepts of rejects this # change based on the Metropolis Criteria using the "rama" ScoreType and # the parameter kT # set the maximum angle to backbone_angle_max, apply it smallmoves times smallmover = protocols.simple_moves.SmallMover(movemap, kT, smallmoves) # angle_max is secondary structure dependent, however secondary structure # has not been evaulated in this protocol, thus they are all set # to the same value0 smallmover.angle_max(backbone_angle_max) # sets all at once #### use the overloaded version of the SmallMover.angle_max method if you #### want to use secondary structure biased moves #smallmover.angle_max('H', backbone_angle_max) #smallmover.angle_max('E', backbone_angle_max) #smallmover.angle_max('L', backbone_angle_max) # 6. create a ShearMover # a ShearMover is identical to a SmallMover except that the angles perturbed # are instead a random (free in the MoveMap) residue's phi and the # preceding residue's psi, this reduces the downstream structural change # set the maximum angle to backbone_angle_max, apply it shearmoves times shearmover = protocols.simple_moves.ShearMover(movemap, kT, shearmoves) # same angle_max restictions as SmallMover shearmover.angle_max(backbone_angle_max) #### use the overloaded version of the SmallMover.angle_max method if you #### want to use secondary structure biased moves #shearmover.angle_max('H', backbone_angle_max) #shearmover.angle_max('E', backbone_angle_max) #shearmover.angle_max('L', backbone_angle_max) # 7. create a MinMover, for backbone torsion minimization minmover = protocols.minimization_packing.MinMover() minmover.movemap(movemap) minmover.score_function(scorefxn) #### sidechain refinement protocol, simple packing # 8. setup a PackRotamersMover to_pack = standard_packer_task(starting_pose) to_pack.restrict_to_repacking() # prevents design, packing only to_pack.or_include_current(True) # considers the original sidechains packmover = protocols.minimization_packing.PackRotamersMover( scorefxn, to_pack) #### assess the new structure # 9. create a PyMOLMover pymover = PyMOLMover() # uncomment the line below to load structures into successive states #pymover.keep_history(True) #### the PyMOLMover slows down the protocol SIGNIFICANTLY but provides #### very informative displays #### the keep_history flag (when True) tells the PyMOLMover to store new #### structures into successive states, for a single trajectory, this #### allows you to see intermediate changes (depending on where the #### PyMOLMover is applied), when using a JobDistributor or otherwise #### displaying multiple trajectories with a single protocol, the output #### can get confusing to interpret, by changing the pose's PDBInfo.name #### the structure will load into a new PyMOL state #### try uncommenting the lines below to see different output #pymover.update_energy(True) # see the total score in color # 10. export the original structure, and scores, to PyMOL pymover.apply(pose) scorefxn(pose) pymover.send_energy(pose) # 11. setup a RepeatMover on a TrialMover of a SequenceMover (wow!) # -setup a TrialMover # a. create a SequenceMover of the previous moves #### add any other moves you desire combined_mover = SequenceMover() combined_mover.add_mover(smallmover) combined_mover.add_mover(shearmover) combined_mover.add_mover(minmover) combined_mover.add_mover(packmover) #### explore the protocol using the PyMOLMover, try viewing structures #### before they are accepted or rejected combined_mover.add_mover(pymover) # b. create a MonteCarlo object to define success/failure mc = MonteCarlo(pose, scorefxn, kT) # must reset for each trajectory! # c. create the TrialMover trial = TrialMover(combined_mover, mc) #### explore the protocol using the PyMOLMover, try viewing structures #### after acceptance/rejection, comment-out the lines below #original_trial = TrialMover(combined_mover, mc) #trial = SequenceMover() #trial.add_mover(original_trial) #trial.add_mover(pymover) #### for each trajectory, try cycles number of applications # -create the RepeatMover refinement = RepeatMover(trial, cycles) #### # 12. create a (Py)JobDistributor jd = PyJobDistributor(job_output, jobs, scorefxn) jd.native_pose = starting_pose # 13. store the score evaluations for output # printing the scores as they are produced would be difficult to read, # Rosetta produces a lot of verbose output when running scores = [0] * (jobs + 1) scores[0] = scorefxn(starting_pose) # 14. perform the refinement protocol counter = 0 # for exporting to PyMOL while not jd.job_complete: # a. set necessary variables for the new trajectory # -reload the starting pose pose.assign(starting_pose) # -change the pose's PDBInfo.name, for the PyMOLMover counter += 1 pose.pdb_info().name(job_output + '_' + str(counter)) # -reset the MonteCarlo object (sets lowest_score to that of p) mc.reset(pose) #### if you create a custom protocol, you may have additional #### variables to reset, such as kT #### if you create a custom protocol, this section will most likely #### change, many protocols exist as single Movers or can be #### chained together in a sequence (see above) so you need #### only apply the final Mover # b. apply the refinement protocol refinement.apply(pose) #### # c. output the lowest scoring decoy structure for this trajectory # -recover and output the decoy structure to a PDB file mc.recover_low(pose) jd.output_decoy(pose) # -export the final structure to PyMOL for each trajectory pose.pdb_info().name(job_output + '_' + str(counter) + '_final') pymover.apply(pose) pymover.send_energy(pose) # see the total score in color # -store the final score for this trajectory scores[counter] = scorefxn(pose) # 15. output the score evaluations print('Original Score\t:\t', scores[0]) for i in range(1, len(scores)): # print out the job scores print(job_output + '_' + str(i) + '\t:\t', scores[i]) return scores # for other protocols
import pyrosetta import pyrosetta.rosetta as rosetta from pyrosetta import init, pose_from_file, create_score_function, Pose, MoveMap, PyMOLMover from pyrosetta.rosetta import core, protocols from pyrosetta.teaching import MinMover, SmallMover, ShearMover, TrialMover, MonteCarlo, RepeatMover init(extra_options = "-constant_seed") # WARNING: option '-constant_seed' is for testing only! MAKE SURE TO REMOVE IT IN PRODUCTION RUNS!!!!! import os; os.chdir('.test.output') start = pose_from_file("../test/data/workshops/1YY8.clean.pdb") test = Pose() test.assign(start) start.pdb_info().name("start") test.pdb_info().name("test") pmm = PyMOLMover() pmm.apply(start) pmm.apply(test) pmm.keep_history(True) print( pmm ) # Small and Shear Moves kT = 1.0 n_moves = 1 movemap = MoveMap() movemap.set_bb(True) small_mover = SmallMover(movemap, kT, n_moves) shear_mover = ShearMover(movemap, kT, n_moves)
def sample_dna_interface(pdb_filename, partners, jobs=1, job_output='dna_output'): """ Performs DNA-protein docking using Rosetta fullatom docking (DockingHighRes) on the DNA-protein complex in <pdb_filename> using the relative chain <partners> . <jobs> trajectories are performed with output structures named <job_output>_(job#).pdb. """ # 1. creates a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. setup the docking FoldTree # using this method, the jump number 1 is automatically set to be the # inter-body jump dock_jump = 1 # the exposed method setup_foldtree takes an input pose and sets its # FoldTree to have jump 1 represent the relation between the two docking # partners, the jump points are the residues closest to the centers of # geometry for each partner with a cutpoint at the end of the chain, # the second argument is a string specifying the relative chain orientation # such as "A_B" of "LH_A", ONLY TWO BODY DOCKING is supported and the # partners MUST have different chain IDs and be in the same pose (the # same PDB), additional chains can be grouped with one of the partners, # the "_" character specifies which bodies are separated # the third argument...is currently unsupported but must be set (it is # supposed to specify which jumps are movable, to support multibody # docking...but Rosetta doesn't currently) # the FoldTrees setup by this method are for TWO BODY docking ONLY! protocols.docking.setup_foldtree(pose, partners, Vector1([dock_jump])) # 3. create a copy of the pose for testing test_pose = Pose() test_pose.assign(pose) # 4. create ScoreFunctions for centroid and fullatom docking scorefxn = create_score_function('dna') scorefxn.set_weight(core.scoring.fa_elec, 1) # an "electrostatic" term #### global docking, a problem solved by the Rosetta DockingProtocol, #### requires interface detection and refinement #### as with other protocols, these tasks are split into centroid (interface #### detection) and high-resolution (interface refinement) methods #### without a centroid representation, low-resolution DNA-protein #### prediction is not possible and as such, only the high-resolution #### DNA-protein interface refinement is available #### WARNING: if you add a perturbation or randomization step, the #### high-resolution stages may fail (see Changing DNA Docking #### Sampling below) #### a perturbation step CAN make this a global docking algorithm however #### the rigid-body sampling preceding refinement will require EXTENSIVE #### sampling to produce accurate results and this algorithm spends most #### of its effort in refinement (which may be useless for the predicted #### interface) # 5. setup the high resolution (fullatom) docking protocol (DockMCMProtocol) # ...as should be obvious by now, Rosetta applications have no central # standardization, the DockingProtocol object can be created and # applied to perform Rosetta docking, many of its options and settings # can be set using the DockingProtocol setter methods # as there is currently no centroid representation of DNA in the chemical # database, the low-resolution docking stages are not useful for # DNA docking # instead, create an instance of just the high-resolution docking stages docking = protocols.docking.DockMCMProtocol() docking.set_scorefxn(scorefxn) # 6. setup the PyJobDistributor jd = PyJobDistributor(job_output, jobs, scorefxn) # 7. setup a PyMOL_Observer (optional) # the PyMOL_Observer object owns a PyMOLMover and monitors pose objects for # structural changes, when changes are detected the new structure is # sent to PyMOL # fortunately, this allows investigation of full protocols since # intermediate changes are displayed, it also eliminates the need to # manually apply the PyMOLMover during a custom protocol # unfortunately, this can make the output difficult to interpret (since you # aren't explicitly telling it when to export) and can significantly slow # down protocols since many structures are output (PyMOL can also slow # down if too many structures are provided and a fast machine may # generate structures too quickly for PyMOL to read, the # "Buffer clean up" message # uncomment the line below to use the PyMOL_Observer ## AddPyMOLObserver(test_pose, True) # 8. perform protein-protein docking counter = 0 # for pretty output to PyMOL while not jd.job_complete: # a. set necessary variables for this trajectory # -reset the test pose to original (centroid) structure test_pose.assign(pose) # -change the pose name, for pretty output to PyMOL counter += 1 test_pose.pdb_info().name(job_output + '_' + str(counter)) # b. perform docking docking.apply(test_pose) # c. output the decoy structure: # to PyMOL test_pose.pdb_info().name(job_output + '_' + str(counter) + '_fa') # to a PDB file jd.output_decoy(test_pose)
def movemap(pose, PDB_out=False): """ Demonstrates the syntax necessary for basic usage of the MoveMap object performs these changes with a demonstrative backbone minimization using <pose> and writes structures to PDB files if <PDB_out> is True """ ######### # MoveMap # a MoveMap encodes what data is allowed to change in a Pose, referred to as # its degrees of freedom # a MoveMap is separate from a Pose and is usually required by a Mover so # that the correct degrees of freedom are manipulated, in this way, # MoveMap and Pose objects often work in parallel # several MoveMap's can correspond to the same Pose # a MoveMap stores information on a per-residue basis about the # backbone ({phi, psi, omega}) and chi ({chi_i}) torsion angle sets # the MoveMap can only set these sets of torsions to True or False, it # cannot set freedom for the individual angles (such as phi free and psi # fixed) # the MoveMap has no upper-limit on its residue information, it defaults to # all residues (up to residue 99999999) backbone and chi False # you can view the MoveMap per-residue torsion settings by using the # MoveMap.show( Pose.total_residue() ) method (the input argument is the # highest residue to output, it does not support viewing a range) pose_move_map = MoveMap() # change all backbone torsion angles pose_move_map.set_bb(True) # change all chi angle torsion angles (False by default) pose_move_map.set_chi(False) # change a single backbone torsion angles #pose_move_map.set_bb(1, True) # example syntax # change a single residue's chi torsion angles #pose_move_map.set_chi(1, True) # example syntax pose_move_map.show(pose.total_residue()) # perform gradient based minimization on the "median" residues, this # method (MinMover) determines the gradient of an input pose using a # ScoreFunction for evaluation and a MoveMap to define the degrees of # freedom # create a standard ScoreFunction scorefxn = get_fa_scorefxn( ) # create_score_function_ws_patch('standard', 'score12') # redefine the MoveMap to include the median half of the residues # turn "off" all backbone torsion angles pose_move_map.set_bb(False) # reset to backbone False # turn "on" a range of residue backbone torsion angles pose_move_map.set_bb_true_range(int(pose.total_residue() / 4), int(pose.total_residue() * 3 / 4)) # create the MinMover minmover = protocols.minimization_packing.MinMover() minmover.score_function(scorefxn) minmover.movemap(pose_move_map) # create a copy of the pose test_pose = Pose() test_pose.assign(pose) # apply minimization scorefxn(test_pose) # to prevent verbose output on the next line pymover = PyMOLMover() #### uncomment the line below and "comment-out" the two lines below to #### export the structures into different PyMOL states of the same object #pymover.keep_history = True # enables viewing across states #### comment-out the line below, changing PDBInfo names tells the #### PyMOLMover to produce new objects test_pose.pdb_info().name('original') pymover.apply(test_pose) print('\nPre minimization score:', scorefxn(test_pose)) minmover.apply(test_pose) if PDB_out: test_pose.dump_pdb('minimized.pdb') print('Post minimization score:', scorefxn(test_pose)) #### comment-out the line below test_pose.pdb_info().name('minimized') pymover.apply(test_pose)
def has_residue(self, pose: pyrosetta.Pose, resi: int, chain: str) -> bool: if pose is None: pose = self.pose pdb2pose = pose.pdb_info().pdb2pose r = pdb2pose(res=resi, chain=chain) return r != 0
def scanning(pdb_filename, partners, mutant_aa='A', interface_cutoff=8.0, output=False, trials=1, trial_output=''): """ Performs "scanning" at an interface within <pdb_filename> between <partners> by mutating relevant residues to <mutant_aa> and repacking residues within <pack_radius> Angstroms, further repacking all residues within <interface_cutoff> of the interface residue, scoring the complex and subtracting the score of a pose with the partners separated by 500 Angstroms. <trials> scans are performed (to average results) with summaries written to <trial_output>_(trial#).txt. Structures are exported to a PyMOL instance. """ # 1. create a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. setup the docking FoldTree and other related parameters dock_jump = 1 movable_jumps = Vector1([dock_jump]) protocols.docking.setup_foldtree(pose, partners, movable_jumps) # 3. create ScoreFuncions for the Interface and "ddG" calculations # the pose's Energies objects MUST be updated for the Interface object to # work normally scorefxn = get_fa_scorefxn() # create_score_function('standard') scorefxn(pose) # needed for proper Interface calculation # setup a "ddG" ScoreFunction, custom weights ddG_scorefxn = ScoreFunction() ddG_scorefxn.set_weight(core.scoring.fa_atr, 0.44) ddG_scorefxn.set_weight(core.scoring.fa_rep, 0.07) ddG_scorefxn.set_weight(core.scoring.fa_sol, 1.0) ddG_scorefxn.set_weight(core.scoring.hbond_bb_sc, 0.5) ddG_scorefxn.set_weight(core.scoring.hbond_sc, 1.0) # 4. create an Interface object for the pose interface = Interface(dock_jump) interface.distance(interface_cutoff) interface.calculate(pose) # 5. create a PyMOLMover for sending output to PyMOL (optional) pymover = PyMOLMover() pymover.keep_history(True) # for multiple trajectories pymover.apply(pose) pymover.send_energy(pose) # 6. perform scanning trials # the large number of packing operations introduces a lot of variability, # for best results, perform several trials and average the results, # these score changes are useful to QUALITATIVELY defining "hotspot" # residues # this script does not use a PyJobDistributor since no PDB files are output for trial in range(trials): # store the ddG values in a dictionary ddG_mutants = {} for i in range(1, pose.total_residue() + 1): # for residues at the interface if interface.is_interface(i) == True: # this way you can TURN OFF output by providing False arguments # (such as '', the default) filename = '' if output: filename = pose.pdb_info().name()[:-4] + '_' +\ pose.sequence()[i-1] +\ str(pose.pdb_info().number(i)) + '->' + mutant_aa # determine the interace score change upon mutation ddG_mutants[i] = interface_ddG(pose, i, mutant_aa, movable_jumps, ddG_scorefxn, interface_cutoff, filename) # output results print('=' * 80) print('Trial', str(trial + 1)) print( 'Mutants (PDB numbered)\t\"ddG\" (interaction dependent score change)' ) residues = list(ddG_mutants.keys() ) # list(...) conversion is for python3 compatbility residues.sort() # easier to read display = [ pose.sequence()[i - 1] + str(pose.pdb_info().number(i)) + mutant_aa + '\t' + str(ddG_mutants[i]) + '\n' for i in residues ] print(''.join(display)[:-1]) print('=' * 80) # write to file f = open(trial_output + '_' + str(trial + 1) + '.txt', 'w') f.writelines(display) f.close() #### alternate output using scanning_analysis (see below), only display #### mutations with "deviant" score changes print('Likely Hotspot Residues') for hotspot in scanning_analysis(trial_output): print(hotspot) print('=' * 80)
def interface_ddG(pose, mutant_position, mutant_aa, movable_jumps, scorefxn='', cutoff=8.0, out_filename=''): # 1. create a reference copy of the pose wt = Pose() # the "wild-type" wt.assign(pose) # 2. setup a specific default ScoreFunction if not scorefxn: # this is a modified version of the scoring function discussed in # PNAS 2002 (22)14116-21, without environment dependent hbonding scorefxn = ScoreFunction() scorefxn.set_weight(fa_atr, 0.44) scorefxn.set_weight(fa_rep, 0.07) scorefxn.set_weight(fa_sol, 1.0) scorefxn.set_weight(hbond_bb_sc, 0.5) scorefxn.set_weight(hbond_sc, 1.0) # 3. create a copy of the pose for mutation mutant = Pose() mutant.assign(pose) # 4. mutate the desired residue # the pack_radius argument of mutate_residue (see below) is redundant # for this application since the area around the mutation is already # repacked mutant = mutate_residue(mutant, mutant_position, mutant_aa, 0.0, scorefxn) # 5. calculate the "interaction energy" # the method calc_interaction_energy is exposed in PyRosetta however it # does not alter the protein conformation after translation and may miss # significant interactions # an alternate method for manually separating and scoring is provided called # calc_binding_energy (see Interaction Energy vs. Binding Energy below) wt_score = calc_binding_energy(wt, scorefxn, mutant_position, cutoff) mut_score = calc_binding_energy(mutant, scorefxn, mutant_position, cutoff) #### the method calc_interaction_energy separates an input pose by #### 500 Angstroms along the jump defined in a Vector1 of jump numbers #### for movable jumps, a ScoreFunction must also be provided #### if setup_foldtree has not been applied, calc_interaction_energy may be #### wrong (since the jumps may be wrong) #wt_score = calc_interaction_energy(wt, scorefxn, movable_jumps) #mut_score = calc_interaction_energy(mutant, scorefxn, movable_jumps) ddg = mut_score - wt_score # 6. output data (optional) # -export the mutant structure to PyMOL (optional) mutant.pdb_info().name(pose.sequence()[mutant_position - 1] + str(pose.pdb_info().number(mutant_position)) + mutant.sequence()[mutant_position - 1]) pymover = PyMOLMover() scorefxn(mutant) pymover.apply(mutant) pymover.send_energy(mutant) # -write the mutant structure to a PDB file if out_filename: mutant.dump_pdb(out_filename) return ddg
def sample_docking(pdb_filename, partners, translation = 3.0, rotation = 8.0, jobs = 1, job_output = 'dock_output'): """ Performs protein-protein docking using the Rosetta standard DockingProtocol on the proteins in <pdb_filename> using the relative chain <partners> with an initial perturbation using <translation> Angstroms and <rotation> degrees. <jobs> trajectories are performed with output structures named <job_output>_(job#).pdb. structures are exported to a PyMOL instance. """ # 1. creates a pose from the desired PDB file pose = Pose() pose_from_file(pose, pdb_filename) # 2. setup the docking FoldTree # using this method, the jump number 1 is automatically set to be the # inter-body jump dock_jump = 1 # the exposed method setup_foldtree takes an input pose and sets its # FoldTree to have jump 1 represent the relation between the two docking # partners, the jump points are the residues closest to the centers of # geometry for each partner with a cutpoint at the end of the chain, # the second argument is a string specifying the relative chain partners # such as "A_B" of "LH_A", ONLY TWO BODY DOCKING is supported and the # partners MUST have different chain IDs and be in the same pose (the # same PDB), additional chains can be grouped with one of the partners, # the "_" character specifies which bodies are separated # the third argument...is currently unsupported but must be set (it is # supposed to specify which jumps are movable, to support multibody # docking...but Rosetta doesn't currently) # the FoldTrees setup by this method are for TWO BODY docking ONLY! protocols.docking.setup_foldtree(pose, partners, Vector1([dock_jump])) # 3. create centroid <--> fullatom conversion Movers to_centroid = SwitchResidueTypeSetMover('centroid') to_fullatom = SwitchResidueTypeSetMover('fa_standard') # and a Mover to recover sidechain conformations # when a protocol samples backbone torsion space in centroid, # the sidechain conformations are neglected, when it is transferred # to fullatom, we typically set the sidechain conformations to their # "original" values and perform sidechain packing, # a ReturnSidechainMover saves a pose's sidechains (in this case # staring_pose) and when applied, inserts these conformations # into the input pose recover_sidechains = protocols.simple_moves.ReturnSidechainMover(pose) # 4. convert to centroid to_centroid.apply(pose) # 5. create a (centroid) test pose test_pose = Pose() test_pose.assign(pose) # 6. create ScoreFunctions for centroid and fullatom docking scorefxn_low = create_score_function('interchain_cen') scorefxn_high = create_score_function('docking') # PyRosetta3: scorefxn_high_min = create_score_function_ws_patch('docking', 'docking_min') scorefxn_high_min = create_score_function('docking', 'docking_min') # 7. create Movers for producing an initial perturbation of the structure # the DockingProtocol (see below) can do this but several Movers are # used to demonstrate their syntax # these Movers randomize the orientation (rotation) of each docking partner randomize_upstream = RigidBodyRandomizeMover(pose, dock_jump, partner_upstream) randomize_downstream = RigidBodyRandomizeMover(pose, dock_jump, partner_downstream) # this Mover translates one docking partner away from the other in a random # direction a distance specified by the second argument (in Angstroms) # and rotates this partner randomly by the third argument (in degrees) dock_pert = RigidBodyPerturbMover(dock_jump, translation, rotation) # this Mover randomizes a pose's partners (rotation) spin = RigidBodySpinMover(dock_jump) # this Mover uses the axis defined by the inter-body jump (jump 1) to move # the docking partners close together slide_into_contact = protocols.docking.DockingSlideIntoContact(dock_jump) # 8. setup the MinMover # the MoveMap can set jumps (by jump number) as degrees of freedom movemap = MoveMap() movemap.set_jump(dock_jump, True) # the MinMover can minimize score based on a jump degree of freedom, this # will find the distance between the docking partners which minimizes # the score minmover = protocols.minimization_packing.MinMover() minmover.movemap(movemap) minmover.score_function(scorefxn_high_min) # 9. create a SequenceMover for the perturbation step perturb = protocols.moves.SequenceMover() perturb.add_mover(randomize_upstream) perturb.add_mover(randomize_downstream) perturb.add_mover(dock_pert) perturb.add_mover(spin) perturb.add_mover(slide_into_contact) perturb.add_mover(to_fullatom) perturb.add_mover(recover_sidechains) perturb.add_mover(minmover) # 10. setup the DockingProtocol # ...as should be obvious by now, Rosetta applications have no central # standardization, the DockingProtocol object can be created and # applied to perform Rosetta docking, many of its options and settings # can be set using the DockingProtocol setter methods # here, on instance is created with all default values and the movable jump # is manually set to jump 1 (just to be certain), the centroid docking # ScoreFunction is set and the fullatom docking ScoreFunction is set dock_prot = protocols.docking.DockingProtocol() # contains many docking functions dock_prot.set_movable_jumps(Vector1([1])) # set the jump to jump 1 dock_prot.set_lowres_scorefxn(scorefxn_low) dock_prot.set_highres_scorefxn(scorefxn_high_min) #### you can alternatively access the low and high resolution sections of #### the DockingProtocol, both are applied by the DockingProtocol but #### a novel protocol may only require centroid (DockingLowRes) or #### fullatom (DockingHighRes), uncomment the lines below and their #### application below #docking_low = DockingLowRes() #docking_low.set_movable_jumps(Vector1([1])) #docking_low.set_scorefxn(scorefxn_low) #docking_high = DockingHighRes() #docking_high.set_movable_jumps(Vector1([1])) #docking_high.set_scorefxn(scorefxn_high) # 11. setup the PyJobDistributor jd = PyJobDistributor(job_output, jobs, scorefxn_high) temp_pose = Pose() # a temporary pose to export to PyMOL temp_pose.assign(pose) to_fullatom.apply(temp_pose) # the original pose was fullatom recover_sidechains.apply(temp_pose) # with these sidechains jd.native_pose = temp_pose # for RMSD comparison # 12. setup a PyMOL_Observer (optional) # the PyMOL_Observer object owns a PyMOLMover and monitors pose objects for # structural changes, when changes are detected the new structure is # sent to PyMOL # fortunately, this allows investigation of full protocols since # intermediate changes are displayed, it also eliminates the need to # manually apply the PyMOLMover during a custom protocol # unfortunately, this can make the output difficult to interpret (since you # aren't explicitly telling it when to export) and can significantly slow # down protocols since many structures are output (PyMOL can also slow # down if too many structures are provided and a fast machine may # generate structures too quickly for PyMOL to read, the # "Buffer clean up" message # uncomment the line below to use the PyMOL_Observer ## AddPyMOLObserver(test_pose, True) # 13. perform protein-protein docking counter = 0 # for pretty output to PyMOL while not jd.job_complete: # a. set necessary variables for this trajectory # -reset the test pose to original (centroid) structure test_pose.assign(pose) # -change the pose name, for pretty output to PyMOL counter += 1 test_pose.pdb_info().name(job_output + '_' + str(counter)) # b. perturb the structure for this trajectory perturb.apply(test_pose) # c. perform docking dock_prot.apply(test_pose) #### alternate application of the DockingProtocol pieces #docking_low.apply(test_pose) #docking_high.apply(test_pose) # d. output the decoy structure to_fullatom.apply(test_pose) # ensure the output is fullatom # to PyMOL test_pose.pdb_info().name(job_output + '_' + str( counter ) + '_fa') # to a PDB file jd.output_decoy(test_pose)
def sample_single_loop_modeling(pdb_filename, loop_begin, loop_end, loop_cutpoint, frag_filename, frag_length, outer_cycles_low=2, inner_cycles_low=5, init_temp_low=2.0, final_temp_low=0.8, outer_cycles_high=5, inner_cycles_high=10, init_temp_high=2.2, final_temp_high=0.6, jobs=1, job_output='loop_output'): """ Performs simple single loop construction on the input <pdb_filename> with loop from <loop_begin> to <loop_end> with a cutpoint at <loop_cutpoint> using fragments of length <frag_length> in the file <frag_filename>. <jobs> trajectories are performed, each using a low resolution (centroid) simulated annealing with <outer_cycles> rounds and <inner_cycles> steps per round decrementing "temperature" from <init_temp> to <final_temp> geometrically. Output structures are named <job_output>_(job#).pdb. """ # 1. create a pose from the desired PDB file p = Pose() pose_from_file(p, pdb_filename) # 2. create a reference copy of the pose in fullatom starting_p = Pose() starting_p.assign(p) #### if you are constructing multiple loops simultaneously, changes will #### occur in most of the steps below # 3. create the Loop object # (note: Loop objects merely specify residues, they contain no # conformation data) my_loop = protocols.loops.Loop(loop_begin, loop_end, loop_cutpoint) #### if using multiple loops, add additional Loop objects # 4. use the Loop to set the pose FoldTree protocols.loops.set_single_loop_fold_tree(p, my_loop) #### alternate FoldTree setup, if you uncomment the lines below, #### comment-out the set_single_loop_foldtree line above (line 189) #### -create an empty FoldTree #ft = FoldTree() #### -make it a single edge the length of pose #ft.simple_tree(p.total_residue()) #### -insert a jump corresponding to the single loop region #ft.add_jump(loop_begin - 2, loop_end + 2, loop_cutpoint) #### -give the pose this FoldTree (set it to this object), this will #### erase any previous FoldTree held by the pose #p.fold_tree(ft) #### there is also a fold_tree_from_loops method in exposed which sets up #### a FoldTree but it is different from set_single_loop_foldtree in #### that is creates jumps +/- 1 residue from their corresponding loop #### endpoints and requires a third argument, the FoldTree to setup # 5. sets the cut-point residues as cut-point variants protocols.loops.add_single_cutpoint_variant(p, my_loop) # 6. create the MoveMap, allow the loop region backbone and # all chi torsions to be free movemap = MoveMap() movemap.set_bb_true_range(loop_begin, loop_end) movemap.set_chi(True) # sets all chi torsions free # 7. setup the fragment Mover # this "try--except" is used to catch improper fragment files try: fragset = core.fragment.ConstantLengthFragSet(frag_length, frag_filename) #### the ConstantLengthFragSet is overloaded, this same #### ConstantLengthFragSet can be obtained with different syntax # to obtain custom fragments, see Generating Fragment Files below except: raise IOError('Make sure frag_length matches the fragments in\n\ frag_file and that frag_file is valid') fragment_mover = protocols.simple_moves.ClassicFragmentMover( fragset, movemap) # 8. create a Mover for loop modeling using CCD (low resolution) ccd_closure = protocols.loops.loop_closure.ccd.CCDLoopClosureMover( my_loop, movemap) # 9. create ScoreFunctions # for centroid, use the default centroid ScoreFunction with chainbreak on scorefxn_low = create_score_function('cen_std') # the chainbreak ScoreType exists to penalize broken bonds # try creating a broken pose in the interpreter and use a ScoreFunction # with a chainbreak score to investigate its impact, the score is 0.0 # except when a bond is broken # this penalizes failures caused by CCD failing to close the loop scorefxn_low.set_weight(core.scoring.chainbreak, 1) # for fullatom, used for packing and scoring final output scorefxn_high = get_fa_scorefxn( ) # create_score_function_ws_patch('standard', 'score12') # 10. setup sidechain packing Mover task_pack = core.pack.task.TaskFactory.create_packer_task(starting_p) task_pack.restrict_to_repacking() # prevents design, packing only task_pack.or_include_current(True) # considers original sidechains pack = protocols.minimization_packing.PackRotamersMover( scorefxn_high, task_pack) # 11. setup the high resolution refinement # by creating a Loops object, # (note: Loops is basically a list of Loop objects), sample_loops = protocols.loops.Loops() # giving it the loop to remodel, sample_loops.add_loop(my_loop) # and creating a fullatom CCD Mover (high resolution) # this Mover is somewhat abnormal since it handles everything itself, it: # -creates its own MoveMap for the loop regions # -creates its own ScoreFunction (default to get_fa_scorefxn()) # -creates its own FoldTree for the pose based on the loops # -creates its own MonteCarlo object for monitoring the pose # -performs "simulated annealing" with 3 outer cycles and 90 inner # cycles, very similar to the protocol outlined ere # -creates its own backbone Movers (SmallMover, ShearMover) # -creates its own PackRotamersMover, it does NOT restrict repacking # to the loop regions and can alter all sidechain conformations loop_refine = LoopMover_Refine_CCD(sample_loops) # some of these parameters or objects can be set but the protocol # executed by this Mover is effectively untouchable #loop_refine.set_score_function(scorefxn_high) # in beta v2 and above loop_refine.temp_initial(init_temp_high) loop_refine.temp_final(init_temp_high) loop_refine.outer_cycles(outer_cycles_high) loop_refine.max_inner_cycles(inner_cycles_high) # 12. create centroid <--> fullatom conversion Movers to_centroid = SwitchResidueTypeSetMover('centroid') to_fullatom = SwitchResidueTypeSetMover('fa_standard') # and a Mover to recover sidechain conformations # when a protocol samples backbone torsion space in centroid, # the sidechain conformations are neglected, when it is transferred # to fullatom, we typically set the sidechain conformations to their # "original" values and perform sidechain packing, # a ReturnSidechainMover saves a pose's sidechains (in this case # staring_pose) and when applied, inserts these conformations # into the input pose recover_sidechains = protocols.simple_moves.ReturnSidechainMover( starting_p) # 13. create a reference copy of the pose in centroid # the first stage of each trajectory is in centroid # so a centroid reference is needed and the pose must start in centroid to_centroid.apply(p) starting_p_centroid = Pose() starting_p_centroid.assign(p) # 14. create the geometric "temperature" increment for simulated annealing gamma = pow((final_temp_low / init_temp_low), (1.0 / (outer_cycles_low * inner_cycles_low))) # 15. create a PyMOLMover for exporting structures to PyMOL pymov = PyMOLMover() # uncomment the line below to load structures into successive states #pymov.keep_history(True) scorefxn_high(starting_p) # for exporting the scores pymov.apply(starting_p) pymov.send_energy(starting_p) # 16. create a (Py)JobDistributor # a PyJobDistributor uses the job_output argument to name all output files # and performs the specified number (int) of jobs # a ScoreFunction is required since the PyJobDistributor output .fasc file # contains scoring information about each output PDB jd = PyJobDistributor(job_output, jobs, scorefxn_high) jd.native_pose = starting_p # 17. perform the loop modeling protocol counter = 0 # for exporting to PyMOL while not jd.job_complete: # a. set necessary variables for the new trajectory # -reload the starting pose (centroid) p.assign(starting_p_centroid) # -change the pose's PDBInfo.name, for exporting to PyMOL counter += 1 p.pdb_info().name(job_output + '_' + str(counter) + '_cen') # -reset the starting "temperature" (to init_temp) kT = init_temp_low # -create a MonteCarlo object for this trajectory # a MonteCarlo object assesses pass/fail by the Metropolis Criteria # and also records information on the lowest scoring pose mc = MonteCarlo(p, scorefxn_low, kT) # b. "randomize" the loop #### this section may change if you intend to use multiple loops or #### alter the sampling method to "randomize" the loop # -by breaking it open, for i in range(loop_begin, loop_end + 1): p.set_phi(i, -180) p.set_psi(i, 180) pymov.apply(p) # -and then inserting fragments # the number of insertions performed is somewhat arbitrary for i in range(loop_begin, loop_end + 1): fragment_mover.apply(p) pymov.apply(p) #### # low resolution loop modeling: # c. simulated annealing incrementing kT geometrically # from init_temp to final_temp #### this section may change if you intend to use multiple loops or #### alter the sampling method for low resolution modeling for i in range(1, outer_cycles_low + 1): # -start with the lowest scoring pose mc.recover_low(p) # loads mc's lowest scoring pose into p # -take several steps of in the simulated annealing by for j in range(1, inner_cycles_low + 1): # >increasing the "temperature" kT = kT * gamma mc.set_temperature(kT) # >inserting a fragment, fragment_mover.apply(p) pymov.apply(p) # >performing CCD, ccd_closure.apply(p) pymov.apply(p) # >and assessing the Metropolis Criteria mc.boltzmann(p) #### # the LoopMover_Refine_CCD makes A LOT of moves, DO NOT expect to # see useful results if you use the PyMOLMover keep_history option, the large # number of intermediates will slow processing to a halt # d. convert the best structure (lowest scoring) into fullatom by: # -recovering the best (centroid) structure (lowest scoring), mc.recover_low(p) # loads mc's lowest scoring pose into p # -switching the ResidueTypeSet to fullatom (from centroid), to_fullatom.apply(p) # -recovering the original sidechain conformations, recover_sidechains.apply(p) # -and packing the result (since the backbone conformation has changed) pack.apply(p) pymov.apply(p) p.pdb_info().name(job_output + '_' + str(counter) + '_fa') # high-resolution refinement: #### this section may change if you intend to use multiple loops or #### alter the sampling method for high resolution refinement # e. apply the LoopMover_Refine_CCD loop_refine.apply(p) # f. output the decoy (pose result from this trajectory) # include the loop RMSD (Lrsmd) # -output a PDB file using the PyJobDistributor lrms = protocols.loops.loop_rmsd(p, starting_p, sample_loops, True) jd.additional_decoy_info = ' Lrmsd: ' + str(lrms) jd.output_decoy(p) # -export the structure to PyMOL pymov.apply(p) pymov.send_energy(p)
def sample_folding(sequence, long_frag_filename, long_frag_length, short_frag_filename, short_frag_length, kT=3.0, long_inserts=1, short_inserts=3, cycles=40, jobs=1, job_output='fold_output'): """ Performs exporting structures to a PyMOL instance Output structures are named <job_output>_(job#).pdb """ # 1. create a pose from the desired sequence (fullatom) # the method pose_from_sequence produces a complete IDEALIZED # protein conformation of the input sequence, the ResidueTypeSet (second # argument below) may be varied, and this method supports non-proteogenic # chemistry (though it is still a Rosetta Residue). however this syntax # is more involved and not robust to user errors, and not presented here # small differences in bond lengths and bond angles WILL change the results, #### if you desire an alternate starting conformation, alter steps #### 1. and 2. as you please pose = pose_from_sequence(sequence, 'fa_standard') # 2. linearize the pose by setting backbone torsions to large values # the method make_pose_from_sequence does not create the new pose's # PDBInfo object, so its done here, without it an error occurs later pose.pdb_info(rosetta.core.pose.PDBInfo(pose.total_residue())) for i in range(1, pose.total_residue() + 1): pose.set_omega(i, 180) pose.set_phi(i, -150) # reasonably straight pose.set_psi(i, 150) #### if you want to see the decoy scores, the PDBInfo needs these lines #pose.pdb_info().chain(i, 'A') # necessary to color by score #pose.pdb_info().number(i, i) # for PDB numbering #### # 3. create a (fullatom) reference copy of the pose test_pose = Pose() test_pose.assign(pose) test_pose.pdb_info().name('linearized pose') # 4. create centroid <--> fullatom conversion Movers to_centroid = SwitchResidueTypeSetMover('centroid') # centroid Residue objects, of amino acids, have all their sidechain atoms # replaced by a single representative "atom" to speed up calculations to_fullatom = SwitchResidueTypeSetMover('fa_standard') # 5. convert the poses to centroid to_centroid.apply(pose) to_centroid.apply(test_pose) # 6. create the MoveMap, all backbone torsions free movemap = MoveMap() movemap.set_bb(True) # minimizing the centroid chi angles (the sidechain centroid atoms) is # almost always USELESS since this compression is performed for speed, # not accuracy and clashes usually occur when converting to fullatom # 7. setup the ClassicFragmentMovers # for the long fragments file # this "try--except" is used to catch improper fragment files try: fragset_long = core.fragment.ConstantLengthFragSet( long_frag_length, long_frag_filename) #### the ConstantLengthFragSet is overloaded, this same #### ConstantLengthFragSet can be obtained with different syntax # to obtain custom fragments, see Generating Fragment Files below except: raise IOError('Make sure long_frag_length matches the fragments in\n\ long_frag_file and that long_frag_file is valid') long_frag_mover = protocols.simple_moves.ClassicFragmentMover( fragset_long, movemap) # and for the short fragments file # this "try--except" is used to catch improper fragment files try: fragset_short = core.fragment.ConstantLengthFragSet( short_frag_length, short_frag_filename) except: raise IOError('Make sure short_frag_length matches the fragments in\n\ short_frag_file and that short_frag_file is valid') short_frag_mover = protocols.simple_moves.ClassicFragmentMover( fragset_short, movemap) # 8. setup RepeatMovers for the ClassicFragmentMovers insert_long_frag = protocols.moves.RepeatMover(long_frag_mover, long_inserts) insert_short_frag = protocols.moves.RepeatMover(short_frag_mover, short_inserts) # 9. create a PyMOL_Observer for exporting structures to PyMOL (optional) # the PyMOL_Observer object owns a PyMOLMover and monitors pose objects for # structural changes, when changes are detected the new structure is # sent to PyMOL # fortunately, this allows investigation of full protocols since # intermediate changes are displayed, it also eliminates the need to # manually apply the PyMOLMover during a custom protocol # unfortunately, this can make the output difficult to interpret (since you # aren't explicitly telling it when to export) and can significantly slow # down protocols since many structures are output (PyMOL can also slow # down if too many structures are provided and a fast machine may # generate structures too quickly for PyMOL to read, the # "Buffer clean up" message # uncomment the line below to use PyMOL_Observer ## AddPyMOLObserver(test_pose, True) # 10. create ScoreFunctions # for low-resolution, centroid, poses necessary for the TrialMover's # MonteCarlo object (see below) scorefxn_low = create_score_function('score3') # for high-resolution, fullatom, poses necessary for scoring final output # from the PyJobDistributor (see below) scorefxn_high = get_fa_scorefxn( ) # create_score_function('standard', 'score12') # 11. setup a RepeatMover on a TrialMover of a SequenceMover # -setup a TrialMover # a. create a SequenceMover of the fragment insertions #### add any other moves you desire folding_mover = protocols.moves.SequenceMover() folding_mover.add_mover(insert_long_frag) folding_mover.add_mover(insert_short_frag) # b. create a MonteCarlo object to define success/failure # must reset the MonteCarlo object for each trajectory! mc = MonteCarlo(test_pose, scorefxn_low, kT) # c. create the TrialMover trial = TrialMover(folding_mover, mc) #### for each trajectory, try cycles number of applications # -create the RepeatMover folding = protocols.moves.RepeatMover(trial, cycles) # 12. create a (Py)JobDistributor jd = PyJobDistributor(job_output, jobs, scorefxn_high) # 13. store the score evaluations for output # printing the scores as they are produced would be difficult to read, # Rosetta produces a lot of verbose output when running scores = [0] * (jobs + 1) scores[0] = scorefxn_low(pose) # 14. perform folding by counter = 0 # for exporting to PyMOL while not jd.job_complete: # a. set necessary variables for the new trajectory # -reload the starting pose test_pose.assign(pose) # -change the pose's PDBInfo.name, for the PyMOL_Observer counter += 1 test_pose.pdb_info().name(job_output + '_' + str(counter)) # -reset the MonteCarlo object (sets lowest_score to that of test_pose) mc.reset(test_pose) #### if you create a custom protocol, you may have additional #### variables to reset, such as kT #### if you create a custom protocol, this section will most likely #### change, many protocols exist as single Movers or can be #### chained together in a sequence (see above) so you need #### only apply the final Mover # b. apply the refinement protocol folding.apply(test_pose) #### # c. export the lowest scoring decoy structure for this trajectory # -recover the lowest scoring decoy structure mc.recover_low(test_pose) # -store the final score for this trajectory scores[counter] = scorefxn_low(test_pose) # -convert the decoy to fullatom # the sidechain conformations will all be default, # normally, the decoys would NOT be converted to fullatom before # writing them to PDB (since a large number of trajectories would # be considered and their fullatom score are unnecessary) # here the fullatom mode is reproduced to make the output easier to # understand and manipulate, PyRosetta can load in PDB files of # centroid structures, however you must convert to fullatom for # nearly any other application to_fullatom.apply(test_pose) # -guess what cysteines are involved in disulfide bridges guess_disulfides(test_pose) # -output the fullatom decoy structure into a PDB file jd.output_decoy(test_pose) # -export the final structure to PyMOL test_pose.pdb_info().name(job_output + '_' + str(counter) + '_fa') #### if you want to see the decoy scores, uncomment the line below #scorefxn_high( test_pose ) # 15. output the score evaluations print('===== Centroid Scores =====') print('Original Score\t:\t', scores[0]) for i in range(1, len(scores)): # print out the job scores # the "[:14].ljust(14)" is to force the text alignment print( (job_output + '_' + str( i ))[:14].ljust(14) +\ '\t:\t', scores[i] ) return scores # for other protocols
def packer_task(pose, PDB_out=False): """ Demonstrates the syntax necessary for basic usage of the PackerTask object performs demonstrative sidechain packing and selected design using <pose> and writes structures to PDB files if <PDB_out> is True """ # create a copy of the pose test_pose = Pose() test_pose.assign(pose) # this object is contained in PyRosetta v2.0 and above pymover = PyMOLMover() # create a standard ScoreFunction scorefxn = get_fa_scorefxn( ) # create_score_function_ws_patch('standard', 'score12') ############ # PackerTask # a PackerTask encodes preferences and options for sidechain packing, an # effective Rosetta methodology for changing sidechain conformations, and # design (mutation) # a PackerTask stores information on a per-residue basis # each residue may be packed or designed # PackerTasks are handled slightly differently in PyRosetta ####pose_packer = PackerTask() # this line will not work properly pose_packer = standard_packer_task(test_pose) # the pose argument tells the PackerTask how large it should be # sidechain packing "optimizes" a pose's sidechain conformations by cycling # through (Dunbrack) rotamers (sets of chi angles) at a specific residue # and selecting the rotamer which achieves the lowest score, # enumerating all possibilities for all sidechains simultaneously is # impractically expensive so the residues to be packed are individually # optimized in a "random" order # packing options include: # -"freezing" the residue, preventing it from changing conformation # -including the original sidechain conformation when determining the # lowest scoring conformation pose_packer.restrict_to_repacking() # turns off design pose_packer.or_include_current(True) # considers original conformation print(pose_packer) # packing and design can be performed by a PackRotamersMover, it requires # a ScoreFunction, for optimizing the sidechains and a PackerTask, # setting the packing and design options packmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_packer) scorefxn(pose) # to prevent verbose output on the next line print('\nPre packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('original') # for PyMOLMover pymover.apply(test_pose) packmover.apply(test_pose) print('Post packing score:', scorefxn(test_pose)) test_pose.pdb_info().name('packed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('packed.pdb') # since the PackerTask specifies how the sidechains change, it has been # extended to include sidechain constitutional changes allowing # protein design, this method of design is very similar to sidechain # packing; all rotamers of the possible mutants at a single residue # are considered and the lowest scoring conformation is selected # design options include: # -allow all amino acids # -allow all amino acids except cysteine # -allow specific amino acids # -prevent specific amino acids # -allow polar amino acids only # -prevent polar amino acids # -allow only the native amino acid # the myriad of packing and design options can be set manually or, more # commonly, using a specific file format known as a resfile # resfile syntax is explained at: # http://www.rosettacommons.org/manuals/archive/rosetta3.1_user_guide/file_resfiles.html # manually setting deign options is tedious, the methods below are handy # for creating resfiles # mutate the "middle" residues center = test_pose.total_residue() // 2 specific_design = {} for i in range(center - 2, center + 3): specific_design[i] = 'ALLAA' # write a resfile to perform these mutations generate_resfile_from_pose(test_pose, 'sample_resfile', False, specific=specific_design) # setup the design PackerTask, use the generated resfile pose_design = standard_packer_task(test_pose) rosetta.core.pack.task.parse_resfile(test_pose, pose_design, 'sample_resfile') print(pose_design) # prepare a new structure test_pose.assign(pose) # perform design designmover = protocols.minimization_packing.PackRotamersMover( scorefxn, pose_design) print( '\nDesign with all proteogenic amino acids at (pose numbered)\ residues', center - 2, 'to', center + 2) print('Pre-design score:', scorefxn(test_pose)) print( 'Pre-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) designmover.apply(test_pose) # perform design print('\nPost-design score:', scorefxn(test_pose)) print( 'Post-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) test_pose.pdb_info().name('designed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('designed.pdb')