def _load_existing(self): for file in os.listdir(self.sub_dir): if 'relaxed' in file: pdb_pose = pose_from_pdb(os.path.join(self.sub_dir, file)) self.written[strip_pdb_ext( os.path.split(file)[1])] = (pdb_pose, self.scorefxn(pdb_pose))
def relex_from_pdb(seq, feature_path, input_pdb, output_pdb): pose = pose_from_pdb(input_pdb) raw_constraints = Constraints(seq, feature_path) constraints = raw_constraints.get_constraint_v1_fix_gly() constraints.apply(pose) relax(pose) pose.dump_pdb(output_pdb)
def pdb_to_chains(pdb_path): pdb_file = os.path.basename(pdb_path) pdb_id, _ = os.path.splitext(pdb_file) # Generate chains from PDB pose = pyrosetta.pose_from_pdb(pdb_path) chains = pre_compute_chains(pose, batch_size=64) for chain_id, (residue_l, residue_r) in chains.items(): seq_string = '' chain_list = [] valid_chain = True for r_id in range(residue_l, residue_r + 1): try: r_letter, r_vec = aa_to_vector(pose, r_id) seq_string += r_letter chain_list.append(r_vec) except AssertionError as e: print('Could not vectorize residue {}: {}, skipping chain {}'. format(r_id, str(e), chain_id)) valid_chain = False break if valid_chain: yield chain_id, seq_string, chain_list
def check_loop_clash(path_to_pdb_w_loop, scorefxn, clash_en_threshold=5): pose = py.pose_from_pdb(path_to_pdb_w_loop) scorefxn(pose) if any([pose.energies().residue_total_energy(i) > clash_en_threshold for i in range(1, pose.pdb_info().nres() + 1)]): return False else: return True
def safe_load_pdb(pdb, rosetta_flags_file=""): run_pyrosetta_with_flags(rosetta_flags_file, mute=False) try: return pyrosetta.pose_from_pdb(pdb) except RuntimeError as e: print(e) print(f"unable to load: {pdb}") return
def safe_load_pdbs(pdbs): for pdb in pdbs: try: yield pyrosetta.pose_from_pdb(pdb) except RuntimeError as e: print(e) print(f"unable to load: {pdb}") continue
def dictionaried_poses(pdb_file_names, score_function, cst_files): pose_set = {} for pdb_file_name in pdb_file_names: pose_set[pdb_file_name] = pr.pose_from_pdb(pdb_file_name) score_function(pose_set[pdb_file_name]) for cst in cst_files.keys(): print(cst) if cst in pdb_file_name: apply_match_constraints(pose_set[pdb_file_name], cst_files[cst]) return pose_set
def get_psize(): size = 0 name = names[0] pose = pyrosetta.pose_from_pdb(name) for i in range(pose.total_residue()): for j in range(pose.residue(i + 1).natoms()): if pose.residue(i + 1).atom(j + 1).type() == 24: size += 1 break return size
def _pose_from_pdb(self, pdb_file: str) -> Tuple[Pose, float]: name = strip_pdb_ext(os.path.split(pdb_file)[1]) name = ''.join(sorted(name)) if name in self.written: return self.written[name] new_pose = pose_from_pdb(pdb_file) self.relaxer.apply(new_pose) new_pose.dump_pdb( os.path.join( os.path.split(pdb_file)[0], name + '.relaxed.clean.pdb')) score = get_fa_scorefxn()(new_pose) return new_pose, score
def reset(self, **kwargs): dir = 'protein_data/short_valid' protein_name = np.random.choice(os.listdir(dir)) if self.start_distance > self.best_distance: self.write_best_conformation(self.best_distance) if self.validation: self.name = protein_name dir = 'protein_data/short_valid' else: self.name = protein_name dir = 'protein_data/short_valid' self.target_protein_pose = pose_from_pdb(os.path.join(dir, self.name)) self.prev_residues_angle_distane = {} self.protein_pose = pose_from_sequence( self.target_protein_pose.sequence()) for i in range(1, self.target_protein_pose.total_residue()): self.prev_residues_angle_distane[i * 2] = self.get_residue_distance( 0, i) self.prev_residues_angle_distane[i * 2 + 1] = self.get_residue_distance( 1, i) if not self.shuffle: self.protein_pose = pose_from_sequence( self.target_protein_pose.sequence()) else: self.scramble_pose(self.protein_pose) self.move_counter = 0 self.reward = 0.0 self.prev_ca_rmsd = None self.achieved_goal_counter = 0 self.current_residue = 1 self.best_distance = self._get_ca_metric(self.protein_pose, self.target_protein_pose) self.start_distance = self._get_ca_metric(self.protein_pose, self.target_protein_pose) self.best_energy = self.scorefxn(self.protein_pose) self.start_energy = self.scorefxn(self.protein_pose) self.prev_energy = self.scorefxn(self.protein_pose) self.encoded_residue_sequence = self._encode_residues( self.target_protein_pose.sequence()) self.residue_mask = self.create_residue_mask( self.target_protein_pose.sequence()) self.save_best_matches() return self._get_state()
def main(argv=None): if argv is None: argv = sys.argv Pose = pyrosetta.pose_from_pdb(argv[1]) if len(argv) > 2: with open(argv[2], 'w') as output_file: for position in get_torisions(Pose): print >> output_file, '\t'.join([str(x) for x in position]) else: for position in get_torisions(Pose): print('\t'.join([str(x) for x in position]))
def getScore(path): """Calculate pyRosetta score""" # Clean PDB files cleanATOM(path) # Clean struct name cleanName = path[0:-3] + 'clean' + '.pdb' pose = pr.pose_from_pdb(cleanName) tmp = scorefxn(pose) energy = eTerms(pose) energy['ref15'] = tmp os.remove(cleanName) return energy
def trim_and_dump_one_pdb(pdb): # try: Pose = pyrosetta.pose_from_pdb(pdb) n_residue = Pose.size() global number_trimmed for trim in range(1, number_trimmed + 1): pdb_ntrim = re.sub(r'(.*).pdb', r'\1_trim{0}.pdb', pdb).format(trim) pose_ntrim = grafting.return_region(Pose, 1 + trim, n_residue) pose_ntrim.pdb_info(pyrosetta.rosetta.core.pose.PDBInfo(pose_ntrim)) pose_ntrim.dump_pdb(pdb_ntrim)
def preprocess(path: str, config: dict) -> dict: poses = {} rosetta_init() objects = os.listdir('./obj/') if 'base_poses.pkl' in objects: file = open('./obj/base_poses.pkl', 'rb') poses = pk.load(file) else: file = open('./obj/base_poses.pkl', 'wb') for pdb in config.keys(): cleanATOM(path + pdb + '.pdb') pose = pose_from_pdb(path + pdb + '.clean.pdb') relax.relax_pose(pose=pose, scorefxn=get_fa_scorefxn(), tag='') poses[pdb] = pose pk.dump(poses, file) return poses
def get_distance_df(glob_path): distances = [] for i in glob.glob(glob_path): p = pyrosetta.pose_from_pdb(i) l = get_all_distances(p) df = pandas.DataFrame(l) df['INPUT_PDB'] = i distances.append(df) ##If there is more than one distances_df = pandas.concat(distances).groupby([ 'atom_1', 'atom_2', 'p1', 'p2', 'res1', 'res2', 'res1_3', 'res2_3', 'pdb1', 'pdb2', 'chain1', 'chain2' ]).aggregate({ 'dist': ['mean', 'min', 'max', 'std'] }).reset_index() return distances_df
def renumber_set(pdbfile, residue_set): """ Take in residue numbers and expand to all chains. Mostly for Hfq stuff. Will need further refinining. 16,17,19 should become 16A, 17A, 19A, 16B, 17B, 19B ... in pose numbering for all chains """ renumbered_set = [] pose = pyrosetta.pose_from_pdb(pdbfile) chains = get_chains(pose) for chain in chains: for res in residue_set: pose_num = pose.pdb_info().pdb2pose(chain, res) if pose_num != 0: renumbered_set.append(pose_num) return renumbered_set
def bind(unit1: tuple, unit2: tuple): # returns tuple[dict[str: tuple[float, str]], dict]: scorefxn = get_fa_scorefxn() rosetta_init() og_score = scorefxn(unit1[1]) + scorefxn(unit2[1]) fin1 = open("subunits/" + unit1[0] + ".clean.pdb", "r").read() fin2 = open("subunits/" + unit2[0] + ".clean.pdb", "r").read() new_unit = (unit1[0]+unit2[0]).split('_') new_unit.remove('') new_unit.sort() new_str = '_'.join(new_unit)+'_' to_write = open("subunits/" + new_str + ".clean.pdb", "w") to_write.write(fin1 + fin2) new_pose = pose_from_pdb("subunits/" + new_str + ".clean.pdb") #relax.relax_pose(pose=new_pose, scorefxn=get_fa_scorefxn(), tag='') new_score = scorefxn(new_pose) diff = og_score - new_score return ({unit1[0]: (diff, new_str, unit2[0]), unit2[0]: (diff, new_str, unit1[0])}, {new_str: new_pose})
def compute_per_residue_energies(pdb): """ Compute per-residue energies for each term in the score function Args: *pdb*: the path to an input PDB file Returns: A dataframe with columns giving energies and rows giving residues """ # Make a list of all score terms in the energy function score_terms = [ str(score_term).replace('ScoreType.', '') for score_term in list(sf.get_nonzero_weighted_scoretypes()) ] # Initiate a dictionary to store per-residue scores scores_dict = { key: [] for key in ['res_n', 'res_aa', 'energy'] + score_terms } # Read in and score pose pose = pyrosetta.pose_from_pdb(pdb) sf(pose) # Make a dataframe with per-residue scores for each energy term # and the total score for res_n in list(range(1, pose.size() + 1)): scores_dict['res_n'].append(res_n) scores_dict['res_aa'].append(pose.residue(res_n).name1()) scores_dict['energy'].append( pose.energies().residue_total_energy(res_n)) for score_term in score_terms: scores_dict[score_term].append( pose.energies().residue_total_energies(res_n)[ pyrosetta.rosetta.core.scoring.score_type_from_name( score_term)]) scores_df = pandas.DataFrame(scores_dict) return scores_df
def compare_openmm_energy_pyrosetta_score(cgmodel, mm=False): """ Given a cgmodel class object, this function determines if PyRosetta and OpenMM give the same score/energy with identical model settings. Parameters ---------- cgmodel: Coarse grained model class object. """ # Build a PyRosetta pose pose = pyrosetta.pose_from_pdb("init.pdb") # Define a PyRosetta scoring function scorefxn = build_scorefxn(cgmodel, mm=mm) # Get the PyRosetta score score = scorefxn(pose) # Get the cg_openmm energy energy = get_mm_energy(cgmodel.topology, cgmodel.system, cgmodel.positions).in_units_of( unit.kilocalorie_per_mole) # Obtain a state for our simulation context #print("The PyRosetta score is: "+str(score)) print("The bond list is: " + str(cgmodel.bond_list)) print("The OpenMM potential energy is: " + str(energy)) file = open("energies.dat", "w") file.write("The nonbonded interaction list is: " + str(cgmodel.nonbonded_interaction_list) + "\n") file.write("The distances between these particles are: " + str([ distance(cgmodel.positions[interaction[0]], cgmodel.positions[ interaction[1]]) for interaction in cgmodel.nonbonded_interaction_list ]) + "\n") file.write("The LJ energy calculated by hand is: " + str([ lj_v(cgmodel.positions[interaction[0]], cgmodel.positions[ interaction[1]], cgmodel.get_sigma(0), cgmodel.get_epsilon(0)) for interaction in cgmodel.nonbonded_interaction_list ]) + "\n") #file.write("The PyRosetta score is: "+str(score)+"\n") file.write("The OpenMM potential energy is: " + str(energy) + "\n") file.close() return
def __init__(self, pdb_file: str, sigmoid_center: float, sigmoid_norm_value: float): """ Create a RosettaFolding landscape from a .pdb file with structure. Args: pdb_file: Path to .pdb file with structure information. sigmoid_center: Center of sigmoid function. sigmoid_norm_value: 1 / scale of sigmoid function. """ super().__init__(name="RosettaFolding") # Inform the user if pyrosetta is not available. try: prs except NameError as e: raise ImportError( "Error: Pyrosetta not installed. " "Source, binary, and conda installations available " "at http://www.pyrosetta.org/dow") from e # Initialize pyrosetta and suppress output messages prs.init("-mute all") # We will reuse this pose over and over, mutating it to match # whatever sequence we are given to measure. # This is necessary since sequence identity can only be mutated # one residue at a time in Rosetta, because the atom coords of the # backbone of the previous residue are copied into the new one. self.pose = prs.pose_from_pdb(pdb_file) self.wt_pose = self.pose.clone() # Change self.pose from full-atom to centroid representation to_centroid_mover = prs.SwitchResidueTypeSetMover("centroid") to_centroid_mover.apply(self.pose) # Use 1 - sigmoid(centroid energy / norm_value) as the fitness score self.score_function = prs.create_score_function("cen_std") self.sigmoid_center = sigmoid_center self.sigmoid_norm_value = sigmoid_norm_value
def bh(eaObj, initcfg): # Create references to variables in ini file eaObj.genN = int(initcfg['nGen']) eaObj.pdbid = initcfg['protein'] eaObj.proteinPath = initcfg['path'] # 3 different initialization states # The protein can be instantiated through 3 different file types: # .fasta, .pdb, or a sequence file. if initcfg['initState'] == 'extended': eaObj.sequence = seq_from_fasta( eaObj.proteinPath + "{0}/{0}.fasta".format(eaObj.pdbid)) eaObj.initialPose = pose_from_sequence(eaObj.sequence) elif initcfg['initState'] == 'pdb': eaObj.initialPose = pose_from_pdb(eaObj.proteinPath + "{0}/{0}.pdb".format(eaObj.pdbid)) elif initcfg['initState'] == 'sequence': eaObj.sequence = initcfg['sequence'] eaObj.initialPose = pose_from_sequence(eaObj.sequence) else: raise Exception("Unrecognized initState!") eaObj.seqLen = eaObj.initialPose.size()
def save_DNA_to_PDB(output, file_Name): with open("DNA_aptamer" + file_Name + ".pdb", "w") as file_pdb: file_pdb.writelines(output) new_pose = pose_from_pdb("DNA_aptamer" + file_Name + ".pdb") return new_pose
sss = list(pose.secstruct()) return sss, abego_list, aa_list #prs.init() args = sys.argv infilename = args[1] outfilename = args[2] file = open(outfilename, mode="w") file.write( "pdbname pdbnameid abego ss1 ss2 ss1_start ss1_end loop_start loop_end ss2_start ss2_end NLangle CLangle HHangle NNangle CCangle HHdihedral NLdihedral CLdihedral distloop aa_seq abego_seq sse_seq\n" ) with open(infilename) as f: #unitcount=0 for line in f: pose = prs.Pose() pose = prs.pose_from_pdb(line.split("\n")[0]) pdbname = line.split("\n")[0].split("/")[-1] ss_list, abego_list, aa_list = pose2sseabego(pose) #print(aa_list) p2f.pose2features(pose, ss_list, abego_list, aa_list=aa_list, pdbname=pdbname, fileobject=file) file.close()
import pyrosetta as pr from pyrosetta.rosetta.protocols.minimization_packing import \ PackRotamersMover from pyrosetta.rosetta.core.pack.rotamer_set import \ RotamerSetFactory import sys pr.init('-extra_res_fa inputs/AZC.params -ex1 -ex2') pose = pr.pose_from_pdb('TEV_solo.pdb') print("emd182::Building score function") sf = pr.rosetta.core.scoring.ScoreFunction() sf.add_weights_from_file('ref2015') print("emd182::Setting up and making a mutation") res_mut = 30 mutater = pr.rosetta.protocols.simple_moves.MutateResidue() mutater.set_target(res_mut) mutater.set_res_name('AZC') mutater.apply(pose) print("emd182::Making Packertask and restricting to repacking") packer_task = pr.standard_packer_task(pose) packer_task.restrict_to_repacking() packer_task.set_bump_check(False) pack_mover = PackRotamersMover(sf, packer_task) rsf = RotamerSetFactory() rs = rsf.create_rotamer_set(pose) rs.set_resid(res_mut) sf(pose)
import sys sys.path.insert(0, '../') import PeptideGlycosylationStartingStructureGenerator as starting import pyrosetta if __name__ == "__main__": peptide_sequence_list = ['STP'] glycosylation_location_list = [2] constraints_file_list = ["3OTK_constraints_file_STP.cst"] repack_interface_list = [False] enzyme_pose_file_list = [ "3OTK-closed-monomer-alpha-GlcNAc-S217C_0005_598_manually_removed.pdb" ] mover = starting.PeptideGlycosylationStartingStructureGenerator() mover.acceptor_peptide_sequence = peptide_sequence_list[0] mover.acceptor_peptide_sugar_name = "core1" mover.acceptor_peptide_glycosylation_location = glycosylation_location_list[ 0] mover.reference_pose_file = "3OTK-closed-monomer-alpha-GlcNAc_2GAM-GalBGalNAc.pdb" mover.constraints_file = constraints_file_list[0] mover.repack_interface = repack_interface_list[0] mover.decoy_numbers = 1 mover.output_pdb = True init_flags = "-include_sugars -maintain_links -auto_detect_glycan_connections -alternate_3_letter_codes pdb_sugar" + " -constraints:cst_fa_file " + mover.constraints_file pyrosetta.init(init_flags) mover.apply(pyrosetta.pose_from_pdb(enzyme_pose_file_list[0]))
def getpose(pdb): return pose_from_pdb(pdb)
pose = pyrosetta.pose_from_pdb(name) for i in range(pose.total_residue()): for j in range(pose.residue(i + 1).natoms()): if pose.residue(i + 1).atom(j + 1).type() == 24: size += 1 break return size psize = get_psize() with open(out, 'wt') as f: for name in names: pose = pyrosetta.pose_from_pdb(name) plen = pose.total_residue() pose_names.append(out) score = scorefxn(pose) f.write("%8d %10.3f %6d %6d\n" % (psize, score, total_pose + 1, total_pose + 1)) for i in range(pose.total_residue()): for j in range(pose.residue(i + 1).natoms()): if pose.residue(i + 1).atom(j + 1).type() == 24: coords = pose.residue(i + 1).atom(j + 1).xyz() f.write(" %9.3f %9.3f %9.3f\n" % (coords.x, coords.y, coords.z)) break
mass = unit.Quantity(10.0, unit.amu) sigma = unit.Quantity(2.4, unit.angstrom) bond_length = unit.Quantity(1.0, unit.angstrom) epsilon = unit.Quantity(0.5, unit.kilocalorie_per_mole) # charge = unit.Quantity(0.0,unit.elementary_charge) # Define PDB files to test our PDB writing ability openmm_pdb_file = 'test_1_1_openmm.pdb' rosetta_pdb_file = 'test_1_1_rosetta.pdb' # Build a coarse grained model cgmodel = basic_cgmodel(polymer_length=polymer_length, backbone_length=backbone_length, sidechain_length=sidechain_length, sidechain_positions=sidechain_positions, mass=mass, bond_length=bond_length, sigma=sigma, epsilon=epsilon) # write_pdbfile_without_topology(cgmodel,openmm_pdb_file) pyrosetta_sequence = ''.join([str('['+str(monomer['monomer_name'])+']') for monomer in cgmodel.sequence]) # Compare OpenMM and PyRosetta energies # (This function is also where we initialize new residue/monomer # types in the PyRosetta database.) compare_openmm_energy_pyrosetta_score(cgmodel) pose_from_sequence = pyrosetta.pose_from_sequence(pyrosetta_sequence, 'coarse_grain') # Test our ability to write a PDB file using our pose and new residue type sets. pyrosetta.rosetta.core.io.pdb.dump_pdb(pose, rosetta_pdb_file) # Test our ability to read a pose from the PDB file we wrote pose_from_pdb = pyrosetta.pose_from_pdb(rosetta_pdb_file) # Define scorefunction terms pyrosetta_scorefxn = build_scorefxn(cgmodel) # Compare poses built from a PDB file and from the polymer sequence compare_pose_scores(pyrosetta_scorefxn, pose_from_pdb, pose_from_sequence, compare_pdb_sequence=True) exit()
for i in range(1, pose.size() + 1): if pose.pdb_info().chain(i) == chain: chain_length += 1 return chain_length if __name__ == '__main__': args = parse_args() print args # initialize Rosetta pyrosetta.init() # load pose pose = pyrosetta.pose_from_pdb(args.infile) # loop over chains and modify secondary structure chains = n_chains(pose) # updated jumps from n-terminal to somewhere else? # slide fold_tree jumps so n-terminal changes do not propegate if args.n_termini: ft = pose.fold_tree() for cn in range(len(chains) - 1): ft.slide_jump(cn + 1, args.start_resn + 1, ft.jump_edge(cn + 1).stop() + args.start_resn) pose.fold_tree(ft) print pose.fold_tree()
""".lstrip() pdbs = ( "loop.pdb", "c3_splay.pdb", "c6.pdb", "c1.pdb", "strand.pdb", "small.pdb", "curved_helix.pdb", "c3het.pdb", "c4.pdb", "c3.pdb", "c2.pdb", "c5.pdb", ) for pdb in pdbs: name = pdb.replace(".pdb", "") p = pr.pose_from_pdb("worms/data/" + pdb) cb = util.get_chain_bounds(p) connections = [] for i, b in enumerate(cb): connections.append( dict(chain=i + 1, residues="%i,:1" % (i + 1), direction="N")) connections.append( dict(chain=i + 1, residues="%i,-1:" % (i + 1), direction="C")) if name in "c1 c2 c3 c4 c5 c6": break print(template % (pdb, name, name, str(connections)))