def pose(self): """ Loads the PDBMapStructure as a Rosetta::Pose object """ import_rosetta() io = PDBIO() io.set_structure(self.structure) with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf: io.save(tf.name) pose = rosetta.Pose() rosetta.pose_from_pdb(pose,tf.name) os.remove(tf.name) return pose
def generate_resfile_from_pdb( pdbfilename , resfilename , pack = True , design = False , input_sc = True , freeze = [] , specific = {} ): """ Writes a resfile for the PDB file <pdbfilename> named <resfilename> <pack> = True allows packing by default <design> = True allows design using all amino acids by default <input_sc> = True allows usage of the original side chain conformation <freeze> is an optional list of (pose) residue numbers to exclude (preserve the side chain conformations of these residues) <specific> is an optional dictionary with (pose) residue numbers as keys and resfile keywords as corresponding values (for setting individual residue options, it may be easier to add these numbers to freeze and edit the resfile manually) example: generate_resfile_from_pdb('1YY8.pdb','1YY8.resfile') See also: generate_resfile_from_pose Pose PackRotamersMover TaskFactory """ p = pose_from_pdb(pdbfilename) generate_resfile_from_pose(p,resfilename,pack,design,input_sc,freeze,specific)
def __init__(self, scaffold_pdb, gap_descriptions, chain, protein_only = True): self.logger = logging.getLogger(__name__) # Scoring function to determine probability of moves self.scorefxn = rosetta.core.scoring.get_score_function() self.res_type = 'fa_standard' # Load pose from input PDB file self.scaffold_pose = rosetta.pose_from_pdb(scaffold_pdb) # Gap descriptions as list of lists: # [[pre_anchor, post_anchor, pre_flank, gap, post_flank], ...] self.gap_descriptions = gap_descriptions self.gap_descriptions.sort(key=lambda x:x[0],reverse=True) self.Loops = rosetta.Loops() self.loop_list = [] self.chain = chain self.protein_only = protein_only return
def setUp(self): import rosetta fragment_db = FragmentDatabase("/work/fordas/workspace/fragment_fitting/threshold_test_fragments/test_sets.h5") self.test_fragments = fragment_db.fragments["source_fragments_4_mer"].read() test_fragment_length = fragment_db.fragments["source_fragments_4_mer"].attrs.fragment_length test_fragment_atoms = fragment_db.fragments["source_fragments_4_mer"].attrs.fragment_atoms.split(",") self.test_fragment_spec = FragmentSpecification(test_fragment_length, tuple(test_fragment_atoms)) pass_test_structure = rosetta.pose_from_pdb(path.join(path.dirname(__file__), "foldit_17_0001.pdb" )) self.pass_test_residues = StructureDatabase.extract_residue_entries_from_pose(pass_test_structure) _, self.pass_test_fragments = self.test_fragment_spec.fragments_from_source_residues(self.pass_test_residues) fail_test_structure = rosetta.pose_from_pdb(path.join(path.dirname(__file__), "foldit_18_0001.pdb" )) self.fail_test_residues = StructureDatabase.extract_residue_entries_from_pose(fail_test_structure) _, self.fail_test_fragments = self.test_fragment_spec.fragments_from_source_residues(self.fail_test_residues)
def pose_from_pubchem(cid, name, temporary=True): pose = Pose() if temporary: # the temporary solution, create an ephemeral ResidueSet params_from_pubchem(cid, name) # generate ResidueSet res_set = generate_nonstandard_residue_set([name]) # fill the pose pose_from_pdb(pose, res_set, name + '_0001.pdb') else: # permanent solution, add to .params list add_cid_to_database(cid, name) # fill the pose pose_from_pdb(pose, name + '_0001.pdb') return pose
def getRMSD(self, pdbName1, pdbName2, pathfile1="", pathfile2=""): if (pathfile1 == ""): fileDir1 = os.path.join(ROOTPATH, 'results') fileDir1 = os.path.join(fileDir1, pdbName1) else: fileDir1 = os.path.join(pathfile1, pdbName1) if (pathfile2 == ""): fileDir2 = os.path.join(ROOTPATH, 'results') fileDir2 = os.path.join(fileDir2, pdbName2) else: fileDir2 = os.path.join(pathfile2, pdbName2) pose1 = rosetta.pose_from_pdb(fileDir1) pose2 = rosetta.pose_from_pdb(fileDir2) scoreRMSD = rosetta.all_atom_rmsd(pose1, pose2) return scoreRMSD
def main(): opts, args = getopt.getopt(sys.argv[3:], 'i') show_index = 0 for o in opts: if '-i' in o: show_index = 1 rosetta.init() wtName = sys.argv[1] compareName = sys.argv[2] outputName = wtName.split('.')[0] + '_vs_' + compareName.split('.')[0] + ".txt" pose1 = rosetta.pose_from_pdb(wtName) pose2 = rosetta.pose_from_pdb(compareName) use_me = True if pose1.total_residue() != pose2.total_residue(): print "Residue number not equal", pose1.total_residue(), \ pose2.total_residue() use_me = False else: output = open(outputName, 'w') total_residue = pose1.total_residue() kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1)) # RMSD calculated by my own function for i in range(1, total_residue + 1): calculateRMS(pose1, pose2, i, output, show_index) # RMSD calculated by PyRosetta ro_rmsd = rosetta.CA_rmsd(pose1, pose2) print "rosetta generated rmsd: " + str(ro_rmsd) if use_me: global total_square me_rmsd = math.sqrt(total_square / total_residue) print "me generated rmsd: " + str(me_rmsd) output.write(outputName.split('.')[0] + ":\t" + str(ro_rmsd)) output.close() print "Done"
def main(): args = sys.argv in_file = args[1] out_file = args[2] distance_cutoff = float(sys.argv[3]) init(extra_options='-mute basic -mute core -mute protocols -mute Warning') all_lines = (open(in_file, 'r')).readlines() print(len(all_lines)) #get the protein used to initalize the forward simulation initial_pose = pose_from_pdb(str('burn1ABC_renumb.pdb')) #save each one of its changes chains=initial_pose.split_by_chain() ancestral1 = chains[1] ancestral2 = chains[2] ancestral3 = chains[3] ancestral1.dump_pdb("Ans_A.pdb") ancestral2.dump_pdb("Ans_B.pdb") ancestral3.dump_pdb("Ans_C.pdb") ancestral_structure1=capture_pdb_one("Ans_A_cap.pdb","Ans_A.pdb") ancestral_structure2=capture_pdb_one("Ans_B_cap.pdb","Ans_B.pdb") ancestral_structure3=capture_pdb_one("Ans_C_cap.pdb","Ans_C.pdb") all_data = [] i=0 for a_line in all_lines: split = a_line.split(',') if split[0] == 'Variant': continue if split[0] == 'WT': continue else: print(split[0]) pos=re.sub("[^0-9^.]", "", split[0]) #figure out if a position is in A B or C print(pos) if int(pos) <= ancestral1.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'A']) i=i+1 if int(pos) > ancestral1.total_residue() and int(pos) <= ancestral1.total_residue()+ancestral2.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'B']) i=i+1 if int(pos) > ancestral1.total_residue()+ancestral2.total_residue(): all_data.append([i,pos,split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8], 'C']) i=i+1
def generate_resfile_from_pdb(pdbfilename, resfilename, input_sc = True ): """ Writes a resfile for PDB file <pdbfilename> named <resfilename>, optionally allowing input side chains to be used in packing. Example: generate_resfile_from_pdb("1YY8.pdb", "1YY8.resfile") See also: Pose PackRotamersMover TaskFactory """ p = rosetta.pose_from_pdb(pdbfilename) generate_resfile_from_pose(p, resfilename, input_sc)
def cleanCRYS(pdb_file, olig=2, out_file=''): """ Writes a PDB file for a monomer of <pdb_file> if it is a <olig>-mer to <out_file> (defaults to <pdb_file>.mono.pdb) note: this is a simple sequence comparison example: cleanCRYS('1YY8.pdb',2) See also: Pose Pose.dump_pdb pose_from_pdb pose_from_rcsb cleanATOM """ # if the file exists if os.path.exists(os.getcwd() + '/' + pdb_file): # load in the PDB...this is really just to get the sequence pose = pose_from_pdb(pdb_file) tot = pose.total_residue() seq = pose.sequence() # generate sequence fragments until frags = [''] * olig match = [False] * (olig - 1) olig = float(olig) frac = int(round(tot / olig)) for f in range(int(olig)): frags[f] = seq[:frac] seq = seq[frac:] # determine if sequence fragments are identical for f in range(int(olig - 1)): match[f] = (frags[0] == frags[f + 1]) # if the protein has repeats, delete all other residues if sum(match) == (olig - 1): for i in range(frac * int(olig - 1)): pose.delete_polymer_residue(frac + 1) # I hope this works! # write the new pdb file if not out_file: out_file = pdb_file[:-4] + '.mono.pdb' print 'if the file', out_file, ' already exists, it will be overwritten' pose.dump_pdb(out_file) print 'PDB', pdb_file, 'successfully cleaned, redundant monomers removed\nmonomer data written to', out_file return True else: print pdb_file, 'is not a ' + str(int(olig)) + '-mer' return False else: print 'No such file or directory named ' + pdb_file return False
def check_sasa(Pdb, ResidueSubsets, StartingResidue, LastResidue, SasaProbeRadius): ''' Uses Alex's AtomicSasaCalculator to calculate average SASA (surface area solvent accessibility) for residue sets input ''' PdbfullPath = ''.join( ['/lab/databases/pdb_clean/', Pdb[1:3].lower(), '/', Pdb[0:4], '.pdb']) # Load pdb into a rosetta pose object PdbPose = rosetta.pose_from_pdb(PdbfullPath) if len(Pdb) > 4: TargetChainIndex = ChainAlphabetIndices[Pdb[4]] PdbChains = PdbPose.split_by_chain() # Silly loop to get pose with only the desired chain, there probably is a better way to do this for i, Chain in enumerate(PdbChains): if i == TargetChainIndex: PdbPose = Chain break # print LastResidue - StartingResidue, PdbPose.n_residue() if (LastResidue - StartingResidue + 1) != PdbPose.n_residue(): print 'Pose werid, returning bogus SASA values' return [999.999 for Set in ResidueSubsets] # initalize Alex's AtomicSasaCalculator SasaCalculator = AtomicSasaCalculator(probe_radius=SasaProbeRadius) # get array of residue sasa's ResidueSasa = SasaCalculator.calculate_per_residue_sasa(PdbPose) SubsetAverageSasas = [] # RepeatMinimumSasas = [] # RepeatMaximumSasas = [] # print PdbPose.n_residue() # print len(ResidueSasa) count = 0 for Residues in ResidueSubsets: # Converts residue number from pdb to appropriate index for sasa array ResidueIndices = [ResNum - StartingResidue for ResNum in Residues] # print ResidueIndices # print Residues SubsetAverageSasas.append( np.mean([ResidueSasa[ResIndex] for ResIndex in ResidueIndices])) # RepeatMinimumSasas.append( min( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) ) # RepeatMaximumSasas.append( max( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) ) count += 1 return SubsetAverageSasas
def load_pdb(self, pdb_name): # store list of ligand params files lig_params = [] for f in os.listdir( self.working_dir ): if f.endswith( ".params" ): lig_params.append( f ) # extra options string creation ext_opts = "-mute basic -mute core -ignore_waters True" for param in lig_params: ext_opts = ext_opts + " -in:file:extra_res_fa %s" %param print "Initializing Rosetta with the following options:", ext_opts from rosetta import init init(extra_options=ext_opts) self.pose = pose_from_pdb( pdb_name )
def check_sasa(Pdb, ResidueSubsets, StartingResidue, LastResidue, SasaProbeRadius): ''' Uses Alex's AtomicSasaCalculator to calculate average SASA (surface area solvent accessibility) for residue sets input ''' PdbfullPath = ''.join( ['/lab/databases/pdb_clean/', Pdb[1:3].lower(), '/', Pdb[0:4], '.pdb'] ) # Load pdb into a rosetta pose object PdbPose = rosetta.pose_from_pdb(PdbfullPath) if len(Pdb) > 4: TargetChainIndex = ChainAlphabetIndices[Pdb[4]] PdbChains = PdbPose.split_by_chain() # Silly loop to get pose with only the desired chain, there probably is a better way to do this for i, Chain in enumerate(PdbChains): if i == TargetChainIndex: PdbPose = Chain break # print LastResidue - StartingResidue, PdbPose.n_residue() if (LastResidue - StartingResidue + 1) != PdbPose.n_residue(): print 'Pose werid, returning bogus SASA values' return [999.999 for Set in ResidueSubsets] # initalize Alex's AtomicSasaCalculator SasaCalculator = AtomicSasaCalculator(probe_radius=SasaProbeRadius) # get array of residue sasa's ResidueSasa = SasaCalculator.calculate_per_residue_sasa(PdbPose) SubsetAverageSasas = [] # RepeatMinimumSasas = [] # RepeatMaximumSasas = [] # print PdbPose.n_residue() # print len(ResidueSasa) count = 0 for Residues in ResidueSubsets: # Converts residue number from pdb to appropriate index for sasa array ResidueIndices = [ResNum - StartingResidue for ResNum in Residues] # print ResidueIndices # print Residues SubsetAverageSasas.append( np.mean( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) ) # RepeatMinimumSasas.append( min( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) ) # RepeatMaximumSasas.append( max( [ ResidueSasa[ResIndex] for ResIndex in ResidueIndices ] ) ) count += 1 return SubsetAverageSasas
def load_pdb(self, pdb_name): # store list of ligand params files lig_params = [] for f in os.listdir(self.working_dir): if f.endswith(".params"): lig_params.append(f) # extra options string creation ext_opts = "-mute basic -mute core -ignore_waters True" for param in lig_params: ext_opts = ext_opts + " -in:file:extra_res_fa %s" % param print "Initializing Rosetta with the following options:", ext_opts from rosetta import init init(extra_options=ext_opts) self.pose = pose_from_pdb(pdb_name)
def cleanCRYS(pdb_file, olig = 2): """ Removes redundant crystal contacts and isolates a monomer by writing a PDB file for a monomer of <pdb_file>, if it is an <olig>-mer, to <pdb_file>.mono. Note: This is by simple sequence comparison. Example: cleanCRYS("1YY8.pdb", 2) See also: Pose Pose.dump_pdb pose_from_pdb pose_from_rcsb cleanATOM """ if os.path.exists(os.getcwd() + '/' + pdb_file): print "If the file", pdb_file[:-4] + ".mono.pdb already exists, " + \ "it will be overwritten." pose = rosetta.pose_from_pdb(pdb_file) tot = pose.total_residue() seq = pose.sequence() frags = [''] * olig match = [False] * (olig - 1) olig = float(olig) frac = int(round(tot / olig)) for f in range(int(olig)): frags[f] = seq[:frac] seq = seq[frac:] for f in range(int(olig-1)): match[f] = (frags[0] == frags[f + 1]) if sum(match) == (olig - 1): for i in range(frac * int(olig - 1)): pose.delete_polymer_residue(frac + 1) pose.dump_pdb(pdb_file[:-4] + ".mono.pdb") print "PDB", pdb_file, "successfully cleaned, redundant " + \ "monomers removed." print "Monomer data written to", pdb_file[:-4] + ".mono.pdb." else: print pdb_file, "is not a " + str(int(olig)) + "-mer." else: raise IOError("No such file or directory named " + pdb_file)
def __init__(self, User, Key, RefPdb, ScoreFxns=[], FxnNames=[], PerResidue=True): ''' Track scores of design trajectories for plotly plots ''' self.User = User self.ApiKey = Key import plotly.graph_objs as Graph import plotly.plotly as py self.Graph = Graph self.py = py self.ScoreFxns = ScoreFxns self.FxnNames = FxnNames assert len(self.ScoreFxns) == len(self.FxnNames) self.PerRes = PerResidue self.RefPdb = RefPdb self.RefPose = rosetta.pose_from_pdb(RefPdb) self.Score2dComboTraces = {} # self.ColorIterator = 0 # self.Colors = [] self.MaxScores = [0 for Fxn in self.ScoreFxns] self.MinScores = [999 for Fxn in self.ScoreFxns] # Scores ordered in all score lists in same order for ploting self.TaggedPoseScores = {} self.PoseTags = [] # Later keyed with index of self.ScoreFxns self.ScoreFunctionScoredPdbs = {} self.CstDict = {}
def idealize_and_relax_pdb_set( PdbCstPairs ): for PdbName, CstName in PdbCstPairs: print '(PdbName, CstName) ', (PdbName, CstName) ''' idealize peptide bonds with command line subprocess ''' subprocess.check_output([ 'idealize_jd2.default.linuxgccrelease', '-s', PdbName ]) IdealizedPdbOldName = re.sub( r'(.*).pdb$', r'\1_0001.pdb', PdbName ) IdealizedPdbNewName = re.sub( r'(.*).pdb$', r'\1_Ideal.pdb', PdbName ) subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.2) IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName ) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(CstName) Constrainer.apply(IdealizedCappedPose) ''' SET UP WEIGHTS ''' Talaris = rosetta.getScoreFunction() TalarisPlusCst = rosetta.getScoreFunction() TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 2.5) print 'relaxing %s with %s'%(IdealizedPdbNewName, CstName) # relax w/ cst rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag') # relax w/o cst rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1_Relax.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
def __init__(self, User, Key, RefPdb, ScoreFxns=[], FxnNames=[], PerResidue=True): ''' Track scores of design trajectories for plotly plots ''' self.User = User self.ApiKey = Key import plotly.graph_objs as Graph import plotly.plotly as py self.Graph = Graph self.py = py self.ScoreFxns = ScoreFxns self.FxnNames = FxnNames assert len(self.ScoreFxns) == len(self.FxnNames) self.PerRes = PerResidue self.RefPdb = RefPdb self.RefPose = rosetta.pose_from_pdb( RefPdb ) self.Score2dComboTraces = {} # self.ColorIterator = 0 # self.Colors = [] self.MaxScores = [ 0 for Fxn in self.ScoreFxns ] self.MinScores = [ 999 for Fxn in self.ScoreFxns ] # Scores ordered in all score lists in same order for ploting self.TaggedPoseScores = {} self.PoseTags = [] # Later keyed with index of self.ScoreFxns self.ScoreFunctionScoredPdbs = {} self.CstDict = {}
def pose_from_rcsb(pdb_code, ATOM = True, CRYS = False): """ Returns a pose for RCSB PDB <pdb_code>, also writes this data to <pdb_code>.pdb, and optionally calls cleanATOM and/or cleanCRYS example: pose = pose_from_rcsb("1YY8") See also: Pose pose_from_pdb pose_from_sequence load_from_rcsb cleanATOM cleanCRYS """ load_from_rcsb(pdb_code) if ATOM: cleanATOM(pdb_code + ".pdb") pdb_code = pdb_code + ".clean" if CRYS: cleanCRYS(pdb_code + ".pdb") pdb_code = pdb_code + ".mono" pose = rosetta.pose_from_pdb(pdb_code + ".pdb") return pose
def main(ExtraResidues=0, ipython=0): ### Required args ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ") ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True ) ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True ) ### Default args ArgParser.add_argument('-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False ) ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[] ) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 ) ### following args are for plotly: ### change if you use this script!!! ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True ) ArgParser.add_argument('-plotly_key', type=str, help=" ", default="cc5z4a8kst") # required=True ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 ) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' ) ArgParser.add_argument('-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and' ", default='and' ) ArgParser.add_argument('-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1 ) Args = ArgParser.parse_args() Pdbs = glob.glob( Args.pdb_glob ) print 'globed %d pdbs'%len(Pdbs) if ExtraResidues == 0 and len(Args.param) > 0: try: ExtraParams = rosetta.Vector1( Args.param ) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams ) except: ExtraParams = rosetta.Vector1( Args.param ) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams ) ### for ipython mode if ipython: return ExtraResidues Args.and_or = Args.and_or.lower() assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' " RepeatLengths = [] ProcessTags = {} TagList = [] TagByPdbName = {} # better to find out of native pdb is wrong before waiting for pdb scoring Check = open(Args.native, 'r') # print ' first loop ' OverlapStarts = [] for Pdb in Pdbs: Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) for OtherPdb in Pdbs: OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) i = 0 if Pdb != OtherPdb: while Pdb[:i] == OtherPdb[:i]: i+=1 Overlap = OtherPdb[:i-1] OverlapStarts.append( ( len(Overlap), Overlap ) ) OverlapStarts.sort() ShortestOverlap = OverlapStarts[0][1] # print 'OverlapStarts', OverlapStarts # print 'ShortestOverlap', ShortestOverlap for Pdb in Pdbs: try: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) except ValueError: RepeatLength = 0 # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) #### re.sub out tag from design process Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) Tag = re.sub(r'^%s(.*)\.pdb$'%(ShortestOverlap), r'\1', Tag) TagByPdbName[Pdb] = Tag try: TagNumber = ProcessTags[Tag] except: TagNumber = len(ProcessTags) + 1 ProcessTags[Tag] = TagNumber TagList.append(TagNumber) # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily # Sort by repeat length, then score if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples = [] else: PoseSortingTuples = [] Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] ProcessNumber = TagList[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) # print 'Pdb', Pdb if Args.multi: MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose) ) else: PoseSortingTuples.append( (RepeatLength, Score, Pose) ) if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples.sort() else: # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() if Args.multi: # print 'MultiPoseSortingTuples', MultiPoseSortingTuples SortedTuples = MultiPoseSortingTuples else: # print 'PoseSortingTuples', PoseSortingTuples SortedTuples = PoseSortingTuples LastLength = 0 LastTag = 0 AllGroups = [] CurrentGroup = [] for PoseTuple in SortedTuples: Length = PoseTuple[0] if Args.multi: Tag = PoseTuple[1] if LastLength and Length != LastLength: AllGroups.append(CurrentGroup) CurrentGroup = [] if Args.multi: if LastTag and Tag != LastTag: AllGroups.append(CurrentGroup) CurrentGroup = [] CurrentGroup.append(PoseTuple) LastLength = Length if Args.multi: LastTag = Tag # for last repeat length AllGroups.append(CurrentGroup) ''' Build score functions here: ''' Talaris = rosetta.getScoreFunction() # This line returns a talaris function with all default weights set to 0 CstScore = set_all_weights_zero( rosetta.getScoreFunction() ) CstScore.set_weight(rosetta.atom_pair_constraint, 10.0) CstScore.set_weight(rosetta.angle_constraint, 5.0) CstScore.set_weight(rosetta.dihedral_constraint, 3.0) HbondScore = set_all_weights_zero( rosetta.getScoreFunction() ) HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170) HbondScore.set_weight(rosetta.hbond_sc, 1.100) Disulfide = set_all_weights_zero( rosetta.getScoreFunction() ) Disulfide.set_weight(rosetta.dslf_fa13, 1.0) if Args.plot: if Args.norm: PerRes = True else: PerRes = False ''' Add and remove score functions here ''' Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native, ScoreFxns=[ CstScore, Talaris, HbondScore, Disulfide ], FxnNames=[ 'ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide' ], PerResidue=PerRes ) XaxisSortingTuples = [] for PoseGroup in AllGroups: # for PoseGroup in [SortedTuples]: if len(PoseGroup): # print # print 'Group:', PoseGroup Poses = [ PoseTuple[-1] for PoseTuple in PoseGroup ] # print PoseGroup RepeatLength = PoseGroup[0][0] # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) # print 'Zero index pose tuple:' # print PoseGroup[0] if Args.plot: GroupPdbName = PoseGroup[0][-1].pdb_info().name() if Args.multi: Tag = TagByPdbName[GroupPdbName] if Args.cst: Plotter.score_poses( Poses, Args.cst, Tag ) else: Plotter.score_poses( Poses, 1, Tag ) # return Plotter Plotter.plot_2d_score_combinations() print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces Plotter.draw_comparisons() print 'plotting...' if len(Args.name): Name = Args.name else: Name = '%s based %d res '%( Args.native, RepeatLength ) Plotter.render_scatter_plot( PlotName=Name ) while 1: ScoreFunctionScoreCutoffs = [] for i, Name in enumerate( Plotter.FxnNames ): while 1: try: Cutoff = float( raw_input('\tEnter cutoff value (maximum) for %s function: '%Name) ) break except ValueError: pass ScoreFunctionScoreCutoffs.append(Cutoff) print 'Cutoff values set at:' for i, Name in enumerate( Plotter.FxnNames ): # print Name, ScoreFunctionScoreCutoffs[i] Plotter.ScoreFunctionScoredPdbs[i].sort() PassingPdbs = [] for i, Name in enumerate( Plotter.FxnNames ): PassThisFxn = [] Cutoff = ScoreFunctionScoreCutoffs[i] # print Plotter.ScoreFunctionScoredPdbs[i] for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]: if Score <= Cutoff: PassThisFxn.append(Pdb) else: break PassingPdbs.append( PassThisFxn ) PdbsPassingAll = PassingPdbs[0] if Args.and_or == 'and': for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list( set(PdbsPassingAll) & set(OtherSet) ) else: for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list( set(PdbsPassingAll + OtherSet) ) Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '%Args.and_or ) if not os.path.isdir(Outdir): subprocess.check_output(['mkdir', Outdir]) if Outdir [-1] != '/': Outdir = Outdir + '/' for Pdb in PdbsPassingAll: subprocess.check_output([ 'cp', Pdb, Outdir ]) if Plotter.CstDict[Pdb] != None: subprocess.check_output([ 'cp', Plotter.CstDict[Pdb], Outdir ]) Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ') ).upper() if Continue == 'Y': pass else: break
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ] # print 'sys.argv', sys.argv # Arg block ArgParser = argparse.ArgumentParser(description=' expand_cst.py ( -help ) %s'%InfoString) # Required args ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True) ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True) ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True) # Optional args ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./') Args = ArgParser.parse_args() if Args.out [-1] != '/': Args.out = Args.out + '/' # default talaris 2013 score function ScoreFunction = rosetta.getScoreFunction() # turning on constraint weights ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0) ScoreFunction.set_weight(rosetta.angle_constraint, 1.0) ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0) RefPdb = Args.ref_pdb # print RefPdb ReferencePose = rosetta.pose_from_pdb( RefPdb ) print 'ReferencePose', ReferencePose # modify rosetta cst w/o rosetta Constrainer = constraint_extrapolator(Args.ref_cst) # RefCst = Args.ref_cst # # make constraint mover # Constrainer = rosetta.ConstraintSetMover() # # get constraints from file # Constrainer.constraint_file(RefCst) # # Apply constraints to pose # Constrainer.apply(ReferencePose) # return Constrainer Pdbs = glob.glob( '*%s*.pdb'%Args.repeat_pdb_tag ) assert len(Pdbs), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag "%Args.repeat_pdb_tag for Pdb in Pdbs: ## For debug put pdb of interest here: # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb': print 'Pdb:', Pdb Pose = rosetta.pose_from_pdb(Pdb) try: SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb) SourceRanges = [ [ int(Number) for Number in Range.split('_') ] for Range in SourceRangeString.split('__') ] except ValueError: print 'No src range tag, skipping: %s '%Pdb continue print 'SourceRanges:', SourceRanges RepeatLength = int( re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb) ) print 'RepeatLength', RepeatLength print # print [Pdb] PdbTag = (Pdb+'!').replace('.pdb!', '').replace('!', '') CstName = PdbTag+'.cst' ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)
'-mute core -mute protocols ' #'-run:constant_seed ' #'-run:jran 618450550 ' #'-out:levels protocols.simple_moves.MinMover:500 ' #'-out:levels core.optimization.AtomTreeMinimizer:500 ' #'-out:levels core.optimization.Minimizer:500 ' #'-out:levels protocols.moves.RigidBodyMover:200 ' #'-out:levels core.optimize:500 ' #'-out:levels core.optimization.LineMinimizer:500 ' #'-out:levels protocols.simple_moves.PackRotamersMover:500 ' #'-out:levels core.pose:500 -out:levels core.io.pdb.file_data:500 -out:levels core.import_pose.import_pose:500' ) # Create pose. print '\nGenerating starting pose...' starting_pose = pose_from_pdb(new_filename) # Display stating pose. if args.pm: starting_pose.pdb_info().name(args.pdb_filename1[:-4] + "-" + \ args.pdb_filename2) pm = PyMOL_Mover() visualize(starting_pose) # Prepare the foldtree. upstream_chains, downstream_chains = \ determine_docking_partners(starting_pose) partners = upstream_chains + "_" + downstream_chains # TODO: Modify C++ so that chemical edges are not removed. setup_foldtree(starting_pose, partners, Vector1([JUMP_NUM]))
def main(argv=None): if argv != None: sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ] print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser(description=' nc_cst_gen.py arguments ( -help ) %s'%InfoString) # Required arguments: ArgParser.add_argument('-pdbs', type=str, nargs='+', help=' input pdbs ', required=True) # Optional arguments: ArgParser.add_argument('-out', type=str, help=' output directory ', default='./') ArgParser.add_argument('-max_dist', type=float, default=3.4, help=' distance between the oxygens and downstreams ') ArgParser.add_argument('-min_seq_sep', type=int, default=3, help=' minimum seperation in primary sequece ') ArgParser.add_argument('-upstream_atom', type=str, default='[ON]\w?\d?', help=' grep for upstream atoms ') ArgParser.add_argument('-downstream_atom', type=str, default='[ON]\w?\d?', help=' grep for downstream atoms ') ArgParser.add_argument('-num_repeats', type=int, default=5, help=' number of repeats to extrapolate contacts for ') ArgParser.add_argument('-min_sasa', type=float, default=0.0, help=' floor for weighting downstream oxygen contacts ') ArgParser.add_argument('-min_sasa_weight', type=float, default=1.0, help=' weight of floor for downstream oxygen contacts ') ArgParser.add_argument('-max_sasa', type=float, default=5.0, help=' ceiling for cst weighting downstream oxygen contacts ') ArgParser.add_argument('-max_sasa_weight', type=float, default=0.1, help=' weight of ceiling for downstream oxygen contacts ') ArgParser.add_argument('-sasa_probe_radius', type=float, default=0.8, help=' probe radius for sasa calculations ') ArgParser.add_argument('-renumber_pose', type=bool, default=True, help='True|False renumber pdb residues ' ) ArgParser.add_argument('-disulfide', type=bool, default=True, help='True|False include disulfide constraints ' ) Args = ArgParser.parse_args() # if len(Args.pdbs[0]) == 1: # Args.pdbs = [''.join(Args.pdbs)] if Args.out [-1] != '/': Args.out = Args.out + '/' import rosetta rosetta.init(extra_options = "-mute basic -mute core -mute protocols") ReportedRepeatCount = 0 TotalPdbs = len(Args.pdbs) # Instance of class to convert sasas to cst weight SasaScale = sasa_scale( Args.min_sasa, Args.min_sasa_weight, Args.max_sasa, Args.max_sasa_weight ) for iPdb, Pdb in enumerate(Args.pdbs): print ' Working with %s; %d of %d total pdbs '%(Pdb, iPdb+1, TotalPdbs) # Starting rosetta Pose = rosetta.pose_from_pdb(Pdb) OutputPdb = Args.out+Pdb # Sets pdb info so residues in dumped pdbs are same as index Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose )) if Args.renumber_pose: rosetta.dump_pdb(Pose, OutputPdb) else: rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb', '_renumbered.pdb')) AllConstraints, SortedConstraints = get_pose_constraints(Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius, SasaScale, Args.upstream_atom, Args.downstream_atom, True) if Args.disulfide: DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints(Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG', False) AllConstraints.extend(DisulfAllConstraints) # print AllConstraints # print SortedConstraints # print # print # print DisulfAllConstraints # print DisulfSortedConstraints # sys.exit() CstName = OutputPdb.replace('.pdb', '_All.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(AllConstraints) BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints CstName = OutputPdb.replace('.pdb', '_BBBB.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(BackboneBackboneCst) CstName = OutputPdb.replace('.pdb', '_BBSC.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(BackboneSidechainCst) CstName = OutputPdb.replace('.pdb', '_SCSC.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(SidechainSidechainCst) CstName = OutputPdb.replace('.pdb', '_Disulf.cst') with open(CstName, 'w') as CstFile: print>>CstFile, '\n'.join(DisulfAllConstraints)
def main(): parser = argparse.ArgumentParser() parser.add_argument('pdb_filename', action="store", type=str) parser.add_argument('replicate_number', action="store", type=int) inputs = parser.parse_args() #takes name of pdb file without the extention pdb_file = inputs.pdb_filename prot_name = pdb_file.split('/')[-1].split('.')[0] #set up timer to figure out how long the code took to run t0 = time() fasta_file = pdb_file.replace('/structures/', '/fastas/').replace('.pdb', '.fasta') records = list(SeqIO.parse(fasta_file, 'fasta')) assert len(records) == 1 wt_seq = str(records[0].seq) # Initialize Rosetta. #init(extra_options='-mute basic -mute core') init(extra_options= '-mute basic -mute core -rebuild_disulf false -detect_disulf false') ######################## # Constants ######################## PACK_RADIUS = 12.0 #Amino acids AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") AAs_choice_dict = {} for aa in AAs: AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa] #Number of mutations to accept max_accept_mut = 10 * len(wt_seq) #max_accept_mut = 2048 #Population size N = 1000 #Beta (temp term) beta = 1 #Fraction of the WT stability value to shoot for threshold_fraction = 0.5 ######################## ######################## #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load a clean pdb file initial_pose = pose_from_pdb(pdb_file) if '.clean' in pdb_file: pdb_file = ''.join(pdb_file.split('.clean')) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Threshold for selection threshold = post_pre_packing_score * threshold_fraction print 'threshold:', threshold data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start evolution i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 #print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to #new_mut_key = random.randint(0,len(AAs)-1) #proposed_res = AAs[new_mut_key] proposed_res = random.choice(AAs_choice_dict[res.name1()]) #make the mutation mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") # if i == (max_accept_mut - 1): # final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i)) # mutant_pose.dump_pdb(final_pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format( prot_name, prot_name, threshold_fraction, N, beta, inputs.replicate_number) with open(output_filename, "w") as outfile: outfile.writelines(data) print 'Data written to:', output_filename print 'program takes %f' % (t1 - t0)
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ] # print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser(description=" args for optimize_repeat_structures ") ArgParser.add_argument('-pdb_stem', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True ) Args = ArgParser.parse_args() Pdbs = glob.glob('*%s.pdb'%Args.pdb_stem) PdbSortTuples = [] Skipped = [] for Pdb in Pdbs: RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb)) SourceStart = int(re.sub(r'.*src(\d+).*pdb', r'\1', Pdb)) try: assert SourceStart != Pdb and RepeatLength != Pdb, 'regular expression substitution failed' except AssertionError: Skipped.append(Pdb) continue PdbSortTuples.append( (RepeatLength, SourceStart, Pdb) ) print 'Skipped:' print Skipped print PdbSortTuples.sort() LastPdb = PdbSortTuples[0][2] Pose = rosetta.pose_from_pdb(LastPdb) LastArray = np.array([ list( Pose.residue(P).xyz('CA') ) for P in range(1, Pose.n_residue()+1) ]) subprocess.check_output(['mkdir', 'Redundant']) for PdbTup in PdbSortTuples[1:]: Pdb = PdbTup[2] Pose = rosetta.pose_from_pdb(Pdb) CA_Array = np.array([ list( Pose.residue(P).xyz('CA') ) for P in range(1, Pose.n_residue()+1) ]) if len(CA_Array) == len(LastArray): RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays(CA_Array, LastArray) print print 'LastPdb, Pdb' print LastPdb print Pdb print 'RMSD:', RMSD if RMSD < 0.001: PdbStem = re.sub(r'(.*).pdb$', r'\1', Pdb) GlobString = '%s*'%PdbStem PdbAssociatedFiles = glob.glob(GlobString) # print PdbAssociatedFiles for File in PdbAssociatedFiles: subprocess.check_output(['mv', File, 'Redundant/']) LastArray = copy.deepcopy(CA_Array) LastPdb = copy.deepcopy(Pdb)
def detect_and_expand_repeats(InputTuple): Args, Pdb = InputTuple print 'Pdb:', Pdb # get name base for output pdbs InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '') print 'StemName:', InputPdbStem # load Pdb into rosetta pose Pose = rosetta.pose_from_pdb(Pdb) Pose.pdb_info(rosetta.core.pose.PDBInfo( Pose )) # Get repeat unit poses from function above if Args.repeat_residues == False: TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose) else: RepeatChains = Args.repeat_residues.split('__') RepeatChains = [ [ int(Number) for Number in Chain.split('_') ] for Chain in RepeatChains] # print 'RepeatChains', RepeatChains # sys.exit() TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder(Pose, RepeatChains) # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]] # ConsolidatedRepeatStarts.extend([45,46,47]) # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash # print 'TandemRepeats', TandemRepeats # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]]) AllExtrapolationsByRepeatLength = {} print 'TandemRepeats:', TandemRepeats print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash # print # MaxTurns = Args.max_turns_per_repeat count = 1 for RepeatUnitLength in RepeatStretchesByLengthHash: # UniformLength = Args.repeat * RepeatUnitLength ExtrapolationList = [] MinLength = 9000000000 # will break if pose has more than 9 billion residues print 'RepeatUnitLength', RepeatUnitLength for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]: print 'RepeatStretch', RepeatStretch # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2): # print 'RepeatUnitCombo', RepeatUnitCombo RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 ' TandemRepeats1 = TandemRepeats[RepeatUnit1Start] TandemRepeats2 = TandemRepeats[RepeatUnit2Start] # Whichever position starts the fewest tandem repeats dicates how far to shift Shifts = min(len(TandemRepeats1), len(TandemRepeats2)) # How max number of turns to include per repeat depends on available repeats, and uner input max MaxTurns = min( Args.max_turns_per_repeat, Shifts) if (RepeatUnit1Start + Args.min_overlap) <= RepeatUnit2Start <= (RepeatUnit1Start + RepeatUnitLength - Args.min_overlap): # print # print 'Selected RepeatUnitCombo:', RepeatUnitCombo # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start] # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start] for NumTurns in range(1, MaxTurns+1): # print '\n'*5 # print 'NumTurns', NumTurns ModLength = NumTurns * RepeatUnitLength # print 'ModLength', ModLength ModUniformLength = Args.repeat * ModLength # print 'ModUniformLength1', ModUniformLength for Shift in range((Shifts/NumTurns)): # print 'Shift', Shift ModRep1Start = RepeatUnit1Start + (Shift*ModLength) ModRep2Start = RepeatUnit2Start + (Shift*ModLength) Overlap = ModRep2Start - ModRep1Start ModRep1End = ModRep1Start + ModLength - 1 ModRep2End = ModRep2Start + ModLength - 1 # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End Repeat1Unit = grafting.return_region(Pose, ModRep1Start, ModRep1End) Repeat2Unit = grafting.return_region(Pose, ModRep2Start, ModRep2End) # print 'Repeat1Unit', Repeat1Unit # print 'Repeat2Unit', Repeat2Unit # use function to extrapolate from a partial repeat try: Extrapolation = extrapolate_repeat_pose(Repeat1Unit, Repeat2Unit, Args.repeat - 1) except AssertionError: 'Extrapolation failed' continue # hacky check finds things that went wrong in extrapolation, sometimes if Extrapolation.n_residue() == ModUniformLength + Overlap: # trim down to uniform length Extrapolation = grafting.return_region(Extrapolation, 1, ModUniformLength) # add extrapolated pose to list Repeat1Range = (ModRep1Start, ModRep1End) Repeat2Range = (ModRep2Start, ModRep2End) ExtrapolationList.append(( Extrapolation, Repeat1Range, Repeat2Range, NumTurns )) else: print 'fail' AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList with open('%s_RepExtra.log'%InputPdbStem, 'w') as LogFile: for BaseUnitLength in AllExtrapolationsByRepeatLength: print 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength) print>>LogFile, 'Extrapolated %d poses with base unit length %d'%(len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength) print>>LogFile, 'Number\tUnit1 range\tUnit2 range' for i, ExtrapolationTuple in enumerate( AllExtrapolationsByRepeatLength[BaseUnitLength] ): # print 'Extrapolation',Extrapolation ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ]) Repeat1Range = ExtrapolationTuple[1] Repeat2Range = ExtrapolationTuple[2] RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3] rosetta.dump_pdb( ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb'%(Args.out, Repeat1Range[0], Repeat1Range[1], Repeat2Range[0], Repeat2Range[1], RepeatUnitLength, InputPdbStem) )
def main(): #read in the file made by the forward sim args = sys.argv inputfile = args[1] data = open(inputfile) first_line = data.readlines()[1] var_line=first_line.split(',') start_stab=var_line[1] #the first entry in the file is the wild type structure, calc the threshold using this threshold=float(start_stab)+10 print(threshold) # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 0 #Population size N = 100 #Beta (temp term) beta = .6 #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') #Prepare data headers data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores=open(inputfile) #get just the mutation we want to revert to lines= variant_scores.readlines() var_line=lines[500] #gets the Nth line how ever long you want the burn to be print "staring here", var_line var_line=var_line.split(',')[0] var_loc=int(filter(str.isdigit, var_line)) var_rev=var_line[:1] gen=1 #get all the pdb files sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort) sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. for i in range(1,len(sort_list)-30): step=-15 #calc reversion for next 15 moves for infile in sort_list[i:i+31]: #for each mutation var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be var_line=var_line.split(',')[0] print(var_line) var_loc=int(filter(str.isdigit, var_line)) var_rev="" old="" if(step<0): var_rev=var_line[len(var_line)-1:len(var_line)] old=var_line[:1] else: var_rev=var_line[:1] old=var_line[len(var_line)-1:len(var_line)] print "Current File Being Processed is: " + infile print "revering to:", var_rev print "at:", var_loc #get the pdb you want to revert and make the reversion initial_pose = pose_from_pdb(infile) mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf) #repack mut task1 = standard_packer_task(mutant_pose) task1.restrict_to_repacking() task1.or_include_current(True) packer_rotamers_mover1 = RotamerTrialsMover(sf,task1) packer_rotamers_mover1.apply(mutant_pose) #repack init task2 = standard_packer_task(initial_pose) task2.restrict_to_repacking() task2.or_include_current(True) pack_rotamers_mover2 = RotamerTrialsMover(sf, task2) pack_rotamers_mover2.apply(initial_pose) #apply min mover min_mover.apply(mutant_pose) min_mover.apply(initial_pose) #get scores variant_score = sf(mutant_pose) initial_score = sf(initial_pose) #get prob probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant _score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") step=step+1 gen+=1 print '\nDONE' data_filename = 'premutate_rep1_bb_T_ch_T.csv' with open(data_filename, "w") as f: f.writelines(data)
def detect_and_expand_repeats(InputTuple): Args, Pdb = InputTuple print 'Pdb:', Pdb # get name base for output pdbs InputPdbStem = Pdb.split('/')[-1].replace('.pdb', '') print 'StemName:', InputPdbStem # load Pdb into rosetta pose Pose = rosetta.pose_from_pdb(Pdb) Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose)) # Get repeat unit poses from function above if Args.repeat_residues == False: TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder( Pose) else: RepeatChains = Args.repeat_residues.split('__') RepeatChains = [[int(Number) for Number in Chain.split('_')] for Chain in RepeatChains] # print 'RepeatChains', RepeatChains # sys.exit() TandemRepeats, RepeatStretchesByLengthHash = pose_repeat_unit_finder( Pose, RepeatChains) # RepeatStretchesByLengthHash[12] = [[14, 15, 16, 17, 18, 21]] # ConsolidatedRepeatStarts.extend([45,46,47]) # print 'ConsolidatedRepeatStarts', ConsolidatedRepeatStarts # print 'RepeatStretchesByLengthHash', RepeatStretchesByLengthHash # print 'TandemRepeats', TandemRepeats # InputPoseRepeatNumber = len(TandemRepeats[ConsolidatedRepeatStarts[0]]) AllExtrapolationsByRepeatLength = {} print 'TandemRepeats:', TandemRepeats print 'RepeatStretchesByLengthHash:', RepeatStretchesByLengthHash # print # MaxTurns = Args.max_turns_per_repeat count = 1 for RepeatUnitLength in RepeatStretchesByLengthHash: # UniformLength = Args.repeat * RepeatUnitLength ExtrapolationList = [] MinLength = 9000000000 # will break if pose has more than 9 billion residues print 'RepeatUnitLength', RepeatUnitLength for RepeatStretch in RepeatStretchesByLengthHash[RepeatUnitLength]: print 'RepeatStretch', RepeatStretch # gets all pairwise combinations of repeat combinations, second arg should ALWAYS be 2, unless manger overhaul is performed for RepeatUnitCombo in itertools.combinations(RepeatStretch, 2): # print 'RepeatUnitCombo', RepeatUnitCombo RepeatUnit1Start, RepeatUnit2Start = RepeatUnitCombo assert RepeatUnit1Start <= RepeatUnit2Start, ' RepeatUnit1 must begin before RepeatUnit2 ' TandemRepeats1 = TandemRepeats[RepeatUnit1Start] TandemRepeats2 = TandemRepeats[RepeatUnit2Start] # Whichever position starts the fewest tandem repeats dicates how far to shift Shifts = min(len(TandemRepeats1), len(TandemRepeats2)) # How max number of turns to include per repeat depends on available repeats, and uner input max MaxTurns = min(Args.max_turns_per_repeat, Shifts) if (RepeatUnit1Start + Args.min_overlap) <= RepeatUnit2Start <= ( RepeatUnit1Start + RepeatUnitLength - Args.min_overlap): # print # print 'Selected RepeatUnitCombo:', RepeatUnitCombo # print 'RepeatUnit1Start, repeats ', RepeatUnit1Start, TandemRepeats[RepeatUnit1Start] # print 'RepeatUnit2Start, repeats ', RepeatUnit2Start, TandemRepeats[RepeatUnit2Start] for NumTurns in range(1, MaxTurns + 1): # print '\n'*5 # print 'NumTurns', NumTurns ModLength = NumTurns * RepeatUnitLength # print 'ModLength', ModLength ModUniformLength = Args.repeat * ModLength # print 'ModUniformLength1', ModUniformLength for Shift in range((Shifts / NumTurns)): # print 'Shift', Shift ModRep1Start = RepeatUnit1Start + (Shift * ModLength) ModRep2Start = RepeatUnit2Start + (Shift * ModLength) Overlap = ModRep2Start - ModRep1Start ModRep1End = ModRep1Start + ModLength - 1 ModRep2End = ModRep2Start + ModLength - 1 # print 'ModRep1Start, ModRep1End', ModRep1Start, ModRep1End # print 'ModRep2Start, ModRep2End', ModRep2Start, ModRep2End Repeat1Unit = grafting.return_region( Pose, ModRep1Start, ModRep1End) Repeat2Unit = grafting.return_region( Pose, ModRep2Start, ModRep2End) # print 'Repeat1Unit', Repeat1Unit # print 'Repeat2Unit', Repeat2Unit # use function to extrapolate from a partial repeat try: Extrapolation = extrapolate_repeat_pose( Repeat1Unit, Repeat2Unit, Args.repeat - 1) except AssertionError: 'Extrapolation failed' continue # hacky check finds things that went wrong in extrapolation, sometimes if Extrapolation.n_residue( ) == ModUniformLength + Overlap: # trim down to uniform length Extrapolation = grafting.return_region( Extrapolation, 1, ModUniformLength) # add extrapolated pose to list Repeat1Range = (ModRep1Start, ModRep1End) Repeat2Range = (ModRep2Start, ModRep2End) ExtrapolationList.append( (Extrapolation, Repeat1Range, Repeat2Range, NumTurns)) else: print 'fail' AllExtrapolationsByRepeatLength[RepeatUnitLength] = ExtrapolationList with open('%s_RepExtra.log' % InputPdbStem, 'w') as LogFile: for BaseUnitLength in AllExtrapolationsByRepeatLength: print 'Extrapolated %d poses with base unit length %d' % ( len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength) print >> LogFile, 'Extrapolated %d poses with base unit length %d' % ( len(AllExtrapolationsByRepeatLength[BaseUnitLength]), BaseUnitLength) print >> LogFile, 'Number\tUnit1 range\tUnit2 range' for i, ExtrapolationTuple in enumerate( AllExtrapolationsByRepeatLength[BaseUnitLength]): # print 'Extrapolation',Extrapolation ### print>>LogFile, '\t\t'.join([ str(i+1), ','.join([str(Number) for Number in ExtrapolationTuple[1]]), ','.join([str(Number) for Number in ExtrapolationTuple[2]]) ]) Repeat1Range = ExtrapolationTuple[1] Repeat2Range = ExtrapolationTuple[2] RepeatUnitLength = BaseUnitLength * ExtrapolationTuple[3] rosetta.dump_pdb( ExtrapolationTuple[0], '%ssrc%d_%d__%d_%d_rep%d_%s.pdb' % (Args.out, Repeat1Range[0], Repeat1Range[1], Repeat2Range[0], Repeat2Range[1], RepeatUnitLength, InputPdbStem))
#print r, N, C pymol.send_colors(pose, C, default_color=rosetta.protocols.moves.XC_blue) #pymol.send_energy( pose_s ) time.sleep(.1) rosetta.init() pose = rosetta.Pose() pose.name = 'CustomNamedPose' pose_s = rosetta.Pose() rosetta.pose_from_pdb(pose, "test/data/test_in.pdb") rosetta.pose_from_pdb(pose_s, "test/data/test_in_short.pdb") scorefxn = rosetta.create_score_function('standard') scorefxn(pose) pymol = rosetta.PyMOL_Mover() pymol.apply(pose_s) coloring_demo(pose_s) seq = rosetta.protocols.moves.SequenceMover() seq.add_mover(pymol) seq.apply(pose) seq.apply(pose_s)
def cap_pdb_make_cst( RepeatPdbFileName, RepeatCstFileName, ReferencePdb, ReferenceCst, Ntrim=0, Ctrim=0, Step=0 ): if Step: if Ntrim: assert Ntrim % Step == 0 if Ctrim: assert Ctrim % Step == 0 # Grep out repeat length and src ranges RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdbFileName)) SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdbFileName) assert SourceRanges != RepeatPdbFileName, 'src string not found in pdb name ' SourceRanges = SourceRanges.split('__') SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ] SourceStart = SourceRanges[0][0] SourceEnd = SourceRanges[0][1] # Load repeat pose RepeatPose = rosetta.pose_from_pdb( RepeatPdbFileName ) # Trim off floppy end residues TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 ) TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) ) # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb') # Load reference (native) pose ReferencePose = rosetta.pose_from_pdb( ReferencePdb ) ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) ) PdbCstPairs = [] ''' Loop through N terminal caps ''' # print '(SourceStart-Ntrim, SourceStart, -1*Step)', (SourceStart-Ntrim, SourceStart, -1*Step) for NcapTrimBackSteps in range(0, (Ntrim/Step) + 1 ): # print 'Ntrils -ltrhm:', NcapTrimBackSteps * Step NcapLastRes = SourceStart - (NcapTrimBackSteps * Step) # print 'NcapLastRes:', NcapLastRes ### Get pose for n-terminal cap with overhang for superimpositions try: NcapPose = grafting.return_region( ReferencePose, 1, NcapLastRes+5 ) except RuntimeError: print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes continue except OverflowError: print 'Requested end of n-terminal cap, %d, beyond range of reference protein. '%NcapLastRes continue try: assert NcapPose.n_residue() > 4 except AssertionError: print 'Too few residues to attach n-terminal cap ending at %d; skipping '%NcapLastRes continue # rosetta.dump_pdb(NcapPose, 'Ncap.pdb') NcapLength = NcapPose.n_residue() NcapOverhangPositions = [ Position for Position in range( NcapLength-3, NcapLength+1 ) ] # print NcapOverhangPositions NcapOverhangArray = generate_backbones.get_residue_array( NcapPose, NcapOverhangPositions ) RepStartOverhangPositions = [1, 2, 3, 4] RepStartOverhangArray = generate_backbones.get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions ) # print RepStartOverhangArray RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v( TrimmedRepeatPose, rMtx, tVec ) # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' ) try: NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = generate_backbones.fuse( NcapPose, TrimmedRepeatPose ) except AssertionError: print ' Not enough structural similarity to attach n-terminal cap ending at %d; skipping '%NcapLastRes continue # print 'Ncap attachment RMSD %f'%RMSD rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' ) NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) ) RepeatCstExtrapolator = expand_cst.constraint_extrapolator(RepeatCstFileName) # print 'NcapLastRes', NcapLastRes # print NcapPlusRepeatPose ''' Shift repeat unit constraints to accomadiate numbering with n-cap length''' Redundict = {} RepeatCsts = [] for RepeatPosition in range(1, RepeatPose.n_residue()+1 ): # print 'RepeatPosition', RepeatPosition try: RepeatPositionCstDict = RepeatCstExtrapolator.Cst[RepeatPosition] except KeyError: continue for AtomName in RepeatPositionCstDict: for Cst in RepeatPositionCstDict[AtomName]: ### unpack tuple values AtomResidueCoords, CstParameters, CstLineNumber, CstType = Cst ### Redundancy check with redundict try: Check = Redundict[CstLineNumber] ### if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 ShiftedPoseAtomResidueCoords = [] ### iterate through atom residue pairs for AtomResiduePair in AtomResidueCoords: # print 'AtomResiduePair', AtomResiduePair RepeatPosePosition = (AtomResiduePair[1]) + NcapLastRes - 1 # print 'RepeatPosePosition', RepeatPosePosition ShiftedPoseAtomResidueCoords.append( ( AtomResiduePair[0], RepeatPosePosition ) ) ShiftedCst = ShiftedPoseAtomResidueCoords, CstParameters, CstLineNumber, CstType if expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords): RepeatCsts.append(ShiftedCst) try: assert expand_cst.pose_has(NcapPlusRepeatPose, ShiftedPoseAtomResidueCoords), ' Cst shifted from repeat pose not found in capped pose' except AssertionError: pass # print 'AtomResidueCoords', AtomResidueCoords # print 'ShiftedPoseAtomResidueCoords', ShiftedPoseAtomResidueCoords ''' Loop through C terminal caps ''' for CcapTrimForwardSteps in range(0, (Ctrim/Step) + 1 ): # print 'CcapTrimForwardSteps', CcapTrimForwardSteps CcapFirstRes = SourceEnd + ( CcapTrimForwardSteps * Step ) # print 'CcapFirstRes:', CcapFirstRes Cshift = CcapFirstRes-6 print 'Cshift', Cshift print 'ReferencePose.n_residue()', ReferencePose.n_residue() try: CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() ) except RuntimeError: print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes continue except OverflowError: print 'Requested start of c-terminal, %d, beyond range of reference protein. '%CcapFirstRes continue # rosetta.dump_pdb(CcapPose, 'Ccap.pdb') try: assert CcapPose.n_residue() > 4 except AssertionError: print 'Too few residues to attach c-terminal cap starting at %d; skipping '%CcapFirstRes continue CcapOverhangPositions = [1, 2, 3, 4] CcapOverhangArray = generate_backbones.get_residue_array( CcapPose, CcapOverhangPositions ) RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ] # print 'RepEndOverhangPositions', RepEndOverhangPositions RepEndOverhangArray = generate_backbones.get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions ) RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec) rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' ) try: CappedRepeatPose, RMSD, CcapCorrespondingResidues = generate_backbones.fuse(NcapPlusRepeatPose, CcapPose) except AssertionError: print 'Not enough structural similarity to attach c-terminal cap starting at %d; skipping '%CcapFirstRes continue CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_%dCap%d.pdb'%(NcapLastRes, CcapFirstRes), RepeatPdbFileName) assert CappedNamePdb != RepeatPdbFileName, 'regular expression substitution failed!' rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb ) ''' Generate csts for cap/repeat edges ''' CapCstExtrapolator = expand_cst.constraint_extrapolator(ReferenceCst) CapCsts = [] ' N cap constraints are easy; no shifts are needed ' # For catching when individual constraints have been considered already Redundict = {} for Position in range(1, NcapLastRes): # print 'Position', Position # Skip positions w/out constraints try: PositionCstDict = CapCstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 CapCsts.append(Constraint) ' C cap constraints are harder; need to shift due to pose expansion ' CcapCstShift = CappedRepeatPose.n_residue() - ReferencePose.n_residue() # CapCstExtrapolator.output_cst(CapCsts, 'NcapConstraints.cst')\ Redundict = {} # print 'CcapCorrespondingResidues', CcapCorrespondingResidues RepeatCcapPositionStart = CcapCorrespondingResidues[0][0] # print 'RepeatCcapPositionStart', RepeatCcapPositionStart ShiftToRepeatPose = RepeatCcapPositionStart - Cshift # print 'ShiftToRepeatPose', ShiftToRepeatPose for Position in range( CcapFirstRes, ReferencePose.n_residue()+1 ): # Skip positions w/out constraints try: PositionCstDict = CapCstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 ExpandedPoseAtomResidueCoords = [] # iterate through atom residue pairs for AtomResiduePair in AtomResidueCoords: # print 'AtomResiduePair', AtomResiduePair ExpandedPosePosition = (AtomResiduePair[1]) + CcapCstShift # print 'ExpandedPosePosition', ExpandedPosePosition ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) ) ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType CapCsts.append(ShiftedConstraint) CappedCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb) with open(CappedCstName, 'w') as OverwriteExistingFile: pass FinalCstSet = [] for Cst in CapCsts: if expand_cst.pose_has(CappedRepeatPose, Cst[0]): FinalCstSet.append(Cst) for Cst in RepeatCsts: if expand_cst.pose_has(CappedRepeatPose, Cst[0]): FinalCstSet.append(Cst) CapCstExtrapolator.output_cst(FinalCstSet, CappedCstName) PdbCstPairs.append((CappedNamePdb, CappedCstName)) return PdbCstPairs
def pose_from_params(filename, params_list): res_set = generate_nonstandard_residue_set(params_list) pose = Pose() pose_from_pdb(pose, res_set, filename) return pose
def optimize_repeat_pdb( (Pdb, CstSets, RepeatLength) ): ''' parallelizable ''' # idealize peptide bonds with command line subprocess subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', Pdb]) IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb') IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb') subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.5) Pose = rosetta.pose_from_pdb(IdealizedPdbNewName) PoseLength = Pose.n_residue() assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues' NumberRepeats = PoseLength / RepeatLength # print 'NumberRepeats', NumberRepeats # print 'RepeatLength', RepeatLength Sequence = Pose.sequence() # print Sequence RepeatRanges = [] Start = 1 for Repeat in range(NumberRepeats): End = Start + RepeatLength - 1 RepeatRanges.append((Start, End)) Start += RepeatLength assert len(RepeatRanges) == NumberRepeats # print 'RepeatRanges', RepeatRanges MidRepeat = ( NumberRepeats / 2 ) - 1 ReferenceRange = RepeatRanges[MidRepeat] # print 'MidRepeat', MidRepeat # print 'ReferenceRange', ReferenceRange SetupNCS = symmetry.SetupNCSMover() for TargetRange in RepeatRanges: if TargetRange != ReferenceRange: # print 'OtherRange', TargetRange # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary if TargetRange[0] == 1: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0]+3, ReferenceRange[1]), "%dA-%dA"%(TargetRange[0]+3, TargetRange[1]) ) # skip last residue (not enougth atoms for torsion) elif TargetRange[1] == PoseLength: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]-3), "%dA-%dA"%(TargetRange[0], TargetRange[1]-3) ) else: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]), "%dA-%dA"%(TargetRange[0], TargetRange[1]) ) SetupNCS.apply(Pose) # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization SymmTalaris = rosetta.getScoreFunction() SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0) TalarisPlusCst = rosetta.getScoreFunction() TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep = rosetta.getScoreFunction() TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25) print 'Pdb:', Pdb OptimizedPoses = [] PoseIDs = [] for Cst in CstSets: print 'Cst:', Cst CstPose = Pose.clone() CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(Cst) Constrainer.apply(CstPose) FxnTags = [ 'TalCst', 'LowFaRep' ] for i, ScoreFunction in enumerate( [ TalarisPlusCst, TalarisPlusCstLowerFaRep ] ): # for AbsoluteWeight in [1, 5, 10, 100]: RelaxPose = CstPose.clone() rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag') rosetta.dump_pdb( RelaxPose, CstStemName+'_%s.pdb'%FxnTags[i] ) # remove all constraints RelaxPose.remove_constraints() # reapply ncs constraints SetupNCS.apply(RelaxPose) rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag') # Trekker.score(RelaxPose) rosetta.dump_pdb( RelaxPose, CstStemName+'_%s_Relax.pdb'%FxnTags[i] ) JustRelaxPose = Pose.clone() SetupNCS.apply( JustRelaxPose ) rosetta.relax_pose( JustRelaxPose, SymmTalaris, 'tag' ) rosetta.dump_pdb( JustRelaxPose, CstStemName+'_JustRelax.pdb' )
def __init__(self, pdb, centroid=False, pdb_file='', frag=False, nine_mer=False, local=False, local_size=3, full=False, rosetta_refinement=False): """ :param pdb: :type string: pdb ID of the protein to be folded :param centroid: :type boolean: Option for use of centroid model """ self.loops = 0 # Stores generation for which energy score was last calculated self.scores = {} # Dictionary container for current gen genomes/scores self.scores_list = [] # List container of current gen scores for search self.gen_added = 0 # Last gen in which a point was added to novelty archive self.threshold = 10 # Novelty threshold for which point is added to archive self.acceptance_threshold = 100 # Novelty threshold for which move is accepted automatically self.num_added = 0 # Number of points added to novelty archive self.switch = False # All atom switch self.temperature = 5 # Monte Carlo temperature self.mover_range = 10 # +-range of the angle in degrees in which mover moves residue self.local_size = local_size # For local mover, size of fragment to move self.local = local # Whether to use local mover self.novelty_archive = deque() # Initialize novelty archive self.centroid = centroid # If true use centroid scoring self.last_lowest = 0 # For use in novelty loop self.last_lowest_10 = 0 # For use in clear main loop self.frag = frag # If true use frag mover self.rosetta_refinement = rosetta_refinement # If true refine rosetta fold # Rosetta inits rosetta.init() # Initialize rosetta libraries pose_native = pose_from_rcsb(pdb) # Create rosetta pose of natively folded protein from pdb file sequence = pose_native.sequence() # Get sequence of protein self.scorefxn = rosetta.get_fa_scorefxn() # Create the rosetta energy score function for all atom if pdb_file != '': self.pose = rosetta.pose_from_pdb(pdb_file) # If a starting pdb is given search from this pose elif rosetta_refinement: # If rosetta refinement, start from fastrelax structure self.pose = rosetta.pose_from_sequence(sequence) relax = rosetta.FastRelax() relax.set_scorefxn(self.scorefxn) relax.apply(self.pose) else: self.pose = rosetta.pose_from_sequence(sequence) # Create the rosetta pose that will be manipulated if centroid: # Switch pose to centroid if centroid option is true switch = rosetta.SwitchResidueTypeSetMover("centroid") switch.apply(self.pose) self.c_size = len(sequence)*2 # Number of residues * 2 (phi and psi for each residue) self.native_energy = self.scorefxn(pose_native) # Energy of the natively folded protein if centroid: # Switch rosetta score function if centroid self.scorefxn = rosetta.create_score_function('score3') self.conformation = [] i = 1 while i <= len(sequence): self.conformation.append(self.pose.phi(i)) self.conformation.append(self.pose.psi(i)) i += 1 self.mc_energy = self.scorefxn(self.pose) + 500 # Energy to be used as minimal criteria self.lowest = self.scorefxn(self.pose) # Lowest energy in archive if frag: if nine_mer: fragset = rosetta.ConstantLengthFragSet(9) fragset.read_fragment_file("aat000_09_05-1.200_v1_3") else: fragset = rosetta.ConstantLengthFragSet(3) fragset.read_fragment_file("aat000_03_05-1.200_v1_3") movemap = rosetta.MoveMap() movemap.set_bb(True) self.mover_3mer = rosetta.ClassicFragmentMover(fragset, movemap) if local: # For local, initialize na with appropriate number of deques self.novelty_archive = [deque() for i in range(self.c_size/2/self.local_size)] self.full = full # If true use full mover
def main(): # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Population size N = 37 #Beta (temp term) beta = 1 #look up what the first stored value was in the files to get the threshold threshold = float(-534.687360627 / 2) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Prepare data headers data = ['Generation,RevertTo,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores = open('mh_rep_3_37.csv') #get just the mutation we want to revert to lines = variant_scores.readlines() var_line = lines[ 2] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] gen = 1 #get all the pdb files sort_list = sorted(glob.glob('*.pdb'), key=numericalSort) for i in range(1, len(sort_list) - 15): #calc reversion for next 15 moves for infile in sorted(glob.glob('*.pdb'), key=numericalSort)[i:i + 15]: #for each mutation var_line = lines[ gen + 1] #gets the Nth line how ever long you want the burn to be var_line = var_line.split(',')[0] var_loc = int(filter(str.isdigit, var_line)) var_rev = var_line[:1] print "Current File Being Processed is: " + infile initial_pose = pose_from_pdb(infile) initial_score = sf(initial_pose) print("init scored") mutant_pose = mutate_residue(initial_pose, var_loc, var_rev, PACK_RADIUS, sf) variant_score = sf(mutant_pose) probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") data.append( str(gen) + "," + var_line + "," + str(initial_score) + "," + str(variant_score) + "," + str(variant_score - initial_score) + "," + str(probability) + "\n") gen += 1 print '\nDONE' data_filename = 'rep_3_mh_37_rev_15_score.csv' with open(data_filename, "w") as f: f.writelines(data)
# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. # (c) For more information, see http://www.rosettacommons.org. Questions about this can be # (c) addressed to University of Washington UW TechTransfer, email: [email protected]. ## @author Sergey Lyskov print '-------- Test/Demo for capturing Tracers output in PyRosetta --------' import rosetta T = rosetta.basic.PyTracer() rosetta.basic.Tracer.set_ios_hook( T, rosetta.basic.Tracer.get_all_channels_string(), False) rosetta.init() pose = rosetta.pose_from_pdb("test/data/test_in.pdb") print '\nCaptured IO:' print T.buf() # More fancy example, using a output callback: class MyPyTracer(rosetta.basic.PyTracer): def __init__(self): rosetta.basic.PyTracer.__init__(self) def output_callback(self, s): print 'MyPyTracer.output_callback with argument:' print s
def main(argv=None): if argv is None: argv = sys.argv ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ") ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True ) ArgParser.add_argument('-native_pdb', type=str, help=" pdb to compare designs against ", required=True ) ArgParser.add_argument('-out', type=str, help=" folder to move files to ", required=True ) ArgParser.add_argument('-score', type=float, help=" select all structures with less than this REU / residue ", default=None ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 ) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 ) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' ) Args = ArgParser.parse_args() print Args Pdbs = glob.glob( Args.pdb_glob ) print 'globed %d pdbs'%len(Pdbs) if not os.path.isdir(Args.out): subprocess.check_output(['mkdir', Args.out]) if Args.out [-1] != '/': Args.out = Args.out + '/' if Args.name != '': Args.out = Args.out + ' ' NativePose = rosetta.pose_from_pdb( Args.native_pdb ) RepeatLengths = [] for Pdb in Pdbs: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) # RepeatLengths.append(SourceStart) PoseSortingTuples = [] # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) PoseSortingTuples.append( (RepeatLength, Score, Pose) ) # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() # print 'PoseSortingTuples', PoseSortingTuples AllRepeatLengthGroups = [] RepeatRepeatLengthGroup = [] LastLength = 0 for PoseTuple in PoseSortingTuples: Length = PoseTuple[0] if LastLength and Length != LastLength: AllRepeatLengthGroups.append(RepeatRepeatLengthGroup) RepeatRepeatLengthGroup = [] RepeatRepeatLengthGroup.append(PoseTuple) LastLength = Length # for last repeat length AllRepeatLengthGroups.append(RepeatRepeatLengthGroup) # print 'AllRepeatLengthGroups', AllRepeatLengthGroups # Add more score functions as wanted if Args.plot: Plotter = plotly_plotter(ScoreFxns=[ Talaris ], FxnNames=[ 'Talaris' ], EnergyPerResidue=True ) for RepeatLengthGroup in AllRepeatLengthGroups: print 'RepeatLengthGroup', RepeatLengthGroup Poses = [ PoseTuple[2] for PoseTuple in RepeatLengthGroup ] RepeatLength = RepeatLengthGroup[0][0] if Args.plot: Plotter.clear_traces() Xaxis = Plotter.score_poses( Poses ) Plotter.add_comparsion_threshold( NativePose, Xaxis ) Plotter.plot_traces( PlotName='%s%s based %d res repeats globed with %s'%(Args.name, Args.native_pdb, RepeatLength, Args.pdb_glob) ) if Args.score != None: with open('%sScores.log'%Args.out, 'a') as Log: for RepLen, Score, Pose in RepeatLengthGroup: if Score > Args.score: break PdbName = Pose.pdb_info().name() subprocess.check_output([ 'cp', PdbName, Args.out ]) print>>Log, '%s\t%.3f'%(PdbName, Score)
def main(ExtraResidues=0, ipython=0): ### Required args ArgParser = argparse.ArgumentParser( description= " for plotting pdb scores and selecting subsets based on absolute or per residue scores " ) ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True) ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True) ### Default args ArgParser.add_argument( '-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False) ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[]) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1) ### following args are for plotly: ### change if you use this script!!! ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True ) ArgParser.add_argument('-plotly_key', type=str, help=" ", default="cc5z4a8kst") # required=True ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='') ArgParser.add_argument( '-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and' ", default='and') ArgParser.add_argument( '-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1) Args = ArgParser.parse_args() Pdbs = glob.glob(Args.pdb_glob) print 'globed %d pdbs' % len(Pdbs) if ExtraResidues == 0 and len(Args.param) > 0: try: ExtraParams = rosetta.Vector1(Args.param) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams) except: ExtraParams = rosetta.Vector1(Args.param) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams) ### for ipython mode if ipython: return ExtraResidues Args.and_or = Args.and_or.lower() assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' " RepeatLengths = [] ProcessTags = {} TagList = [] TagByPdbName = {} # better to find out of native pdb is wrong before waiting for pdb scoring Check = open(Args.native, 'r') # print ' first loop ' OverlapStarts = [] for Pdb in Pdbs: Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) for OtherPdb in Pdbs: OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) i = 0 if Pdb != OtherPdb: while Pdb[:i] == OtherPdb[:i]: i += 1 Overlap = OtherPdb[:i - 1] OverlapStarts.append((len(Overlap), Overlap)) OverlapStarts.sort() ShortestOverlap = OverlapStarts[0][1] # print 'OverlapStarts', OverlapStarts # print 'ShortestOverlap', ShortestOverlap for Pdb in Pdbs: try: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) except ValueError: RepeatLength = 0 # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s " % Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) #### re.sub out tag from design process Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) Tag = re.sub(r'^%s(.*)\.pdb$' % (ShortestOverlap), r'\1', Tag) TagByPdbName[Pdb] = Tag try: TagNumber = ProcessTags[Tag] except: TagNumber = len(ProcessTags) + 1 ProcessTags[Tag] = TagNumber TagList.append(TagNumber) # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily # Sort by repeat length, then score if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples = [] else: PoseSortingTuples = [] Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] ProcessNumber = TagList[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) # print 'Pdb', Pdb if Args.multi: MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose)) else: PoseSortingTuples.append((RepeatLength, Score, Pose)) if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples.sort() else: # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() if Args.multi: # print 'MultiPoseSortingTuples', MultiPoseSortingTuples SortedTuples = MultiPoseSortingTuples else: # print 'PoseSortingTuples', PoseSortingTuples SortedTuples = PoseSortingTuples LastLength = 0 LastTag = 0 AllGroups = [] CurrentGroup = [] for PoseTuple in SortedTuples: Length = PoseTuple[0] if Args.multi: Tag = PoseTuple[1] if LastLength and Length != LastLength: AllGroups.append(CurrentGroup) CurrentGroup = [] if Args.multi: if LastTag and Tag != LastTag: AllGroups.append(CurrentGroup) CurrentGroup = [] CurrentGroup.append(PoseTuple) LastLength = Length if Args.multi: LastTag = Tag # for last repeat length AllGroups.append(CurrentGroup) ''' Build score functions here: ''' Talaris = rosetta.getScoreFunction() # This line returns a talaris function with all default weights set to 0 CstScore = set_all_weights_zero(rosetta.getScoreFunction()) CstScore.set_weight(rosetta.atom_pair_constraint, 10.0) CstScore.set_weight(rosetta.angle_constraint, 5.0) CstScore.set_weight(rosetta.dihedral_constraint, 3.0) HbondScore = set_all_weights_zero(rosetta.getScoreFunction()) HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170) HbondScore.set_weight(rosetta.hbond_sc, 1.100) Disulfide = set_all_weights_zero(rosetta.getScoreFunction()) Disulfide.set_weight(rosetta.dslf_fa13, 1.0) if Args.plot: if Args.norm: PerRes = True else: PerRes = False ''' Add and remove score functions here ''' Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native, ScoreFxns=[CstScore, Talaris, HbondScore, Disulfide], FxnNames=['ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide'], PerResidue=PerRes) XaxisSortingTuples = [] for PoseGroup in AllGroups: # for PoseGroup in [SortedTuples]: if len(PoseGroup): # print # print 'Group:', PoseGroup Poses = [PoseTuple[-1] for PoseTuple in PoseGroup] # print PoseGroup RepeatLength = PoseGroup[0][0] # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) # print 'Zero index pose tuple:' # print PoseGroup[0] if Args.plot: GroupPdbName = PoseGroup[0][-1].pdb_info().name() if Args.multi: Tag = TagByPdbName[GroupPdbName] if Args.cst: Plotter.score_poses(Poses, Args.cst, Tag) else: Plotter.score_poses(Poses, 1, Tag) # return Plotter Plotter.plot_2d_score_combinations() print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces Plotter.draw_comparisons() print 'plotting...' if len(Args.name): Name = Args.name else: Name = '%s based %d res ' % (Args.native, RepeatLength) Plotter.render_scatter_plot(PlotName=Name) while 1: ScoreFunctionScoreCutoffs = [] for i, Name in enumerate(Plotter.FxnNames): while 1: try: Cutoff = float( raw_input( '\tEnter cutoff value (maximum) for %s function: ' % Name)) break except ValueError: pass ScoreFunctionScoreCutoffs.append(Cutoff) print 'Cutoff values set at:' for i, Name in enumerate(Plotter.FxnNames): # print Name, ScoreFunctionScoreCutoffs[i] Plotter.ScoreFunctionScoredPdbs[i].sort() PassingPdbs = [] for i, Name in enumerate(Plotter.FxnNames): PassThisFxn = [] Cutoff = ScoreFunctionScoreCutoffs[i] # print Plotter.ScoreFunctionScoredPdbs[i] for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]: if Score <= Cutoff: PassThisFxn.append(Pdb) else: break PassingPdbs.append(PassThisFxn) PdbsPassingAll = PassingPdbs[0] if Args.and_or == 'and': for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list(set(PdbsPassingAll) & set(OtherSet)) else: for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list(set(PdbsPassingAll + OtherSet)) Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: ' % Args.and_or) if not os.path.isdir(Outdir): subprocess.check_output(['mkdir', Outdir]) if Outdir[-1] != '/': Outdir = Outdir + '/' for Pdb in PdbsPassingAll: subprocess.check_output(['cp', Pdb, Outdir]) if Plotter.CstDict[Pdb] != None: subprocess.check_output(['cp', Plotter.CstDict[Pdb], Outdir]) Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ' )).upper() if Continue == 'Y': pass else: break
def sequence_mapping(pdb_file, sequence_file, score_file, relax, jobs): if os.path.exists( os.getcwd() + '/' + pdb_file ) and pdb_file: init() pose = Pose() score_fxn = create_score_function('talaris2014') if (relax): refinement = FastRelax(score_fxn) pose_from_pdb(pose, pdb_file) if os.path.exists( os.getcwd() + '/' + sequence_file ) and sequence_file: fid = open(sequence_file,'r') fod = open(score_file,'w') data = fid.readlines() fid.close() sequences = [] read_seq = False for i in data: if not len(i): continue elif i[0] == '>': read_seq = True fasta_line = re.split(':|\s+|\||\\n',i[1:]) name_cpt=0 while (name_cpt<len(fasta_line) and not fasta_line[name_cpt]): name_cpt+=1 if name_cpt<len(fasta_line): job_output = fasta_line[name_cpt] else: print 'Error: Please enter an identifier for sequences in your fasta file' exit(1) elif read_seq: seq=list(i) resn=1 for j in i: if j!='\n' and resn<=pose.total_residue(): mutator = MutateResidue( resn , one_to_three[j] ) mutator.apply( pose ) resn+=1 elif resn>pose.total_residue(): print 'WARNING: couldn\'t mutate residue number '+str(resn)+', sequence too long for backbone...' resn+=1 if (relax): jd = PyJobDistributor(job_output, jobs, score_fxn) jd.native_pose = pose scores = [0]*(jobs) counter = 0 decoy=Pose() while not jd.job_complete: decoy.assign(pose) resn=1 refinement.apply(decoy) jd.output_decoy(decoy) scores[counter]=score_fxn(decoy) counter+=1 for i in range(0, len(scores)): fod.writelines(job_output + '_' + str(i+1) + ' : '+str(scores[i])+'\n') else: pose_packer = standard_packer_task(pose) pose_packer.restrict_to_repacking() packmover = PackRotamersMover(score_fxn, pose_packer) packmover.apply(pose) fod.writelines(job_output+' : '+str(score_fxn(pose))+'\n') pose.dump_pdb(job_output+'_1.pdb') else: print 'Bad fasta format' exit(1) fod.close() else: print 'Please provide a valid sequence file, '+sequence_file+' doesn\'t exist' else: print 'Please provide a valid backbone file, '+pdb_file+' doesn\'t exist'
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] out_file = args[2] score_type = int(args[3]) #set up timer to figure out how long the code took to run t0=time() # Initialize Rosetta. init(extra_options='-mute basic -mute core -mute protocol -mute warn') # Constants PACK_RADIUS = 5 #Amino acids, notice there is no C AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") #Number of mutations to accept max_accept_mut = 2000 #Population size N = 1 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n'] initial_pose = pose_from_pdb(pdb_file) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap This is where you turn the bb and side chain flexibility on and off mm = MoveMap() mm.set_bb(False) #Get the init score of the struct to calc the threshold pre_pre_packing_score = sf(initial_pose) print(pre_pre_packing_score) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') cp_init_pdb = Pose() cp_init_pdb.assign(initial_pose) chains=cp_init_pdb.split_by_chain() #split up AB inter and AC inter initial_poseAB = Pose() initial_poseAB.assign(initial_pose) initial_poseAC = Pose() initial_poseAC.assign(initial_pose) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) #score the inital stabs of each chain wt_a=sf(chains[1]) wt_b=sf(chains[2]) wt_c=sf(chains[3]) #score the intial interfaces inter_AB=InterfaceEnergy_split(initial_poseAB) inter_AC=InterfaceEnergy_split(initial_poseAC) #init thresholds set to half of the init stabilities, if you want to do a different protein change these threshold_a=-138.41754752 threshold_b=-61.378619136 threshold_c=-61.378619136 threshold_inter_ab=-10.3726691079 threshold_inter_ac=-10.3726691079 data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n') #check the inital starting score init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) print(init_score) #number of residues to select from n_res = initial_pose.total_residue() print(n_res) #start sim i=0 gen=0 while i < max_accept_mut: #update the number of generations that have pased gen+=1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #mut_location = random.randint(1, 10) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while(res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to toname = res.name1() new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while(proposed_res == res.name1()): new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #init mutant with current mutant_pose = Pose() mutant_pose.assign(initial_pose) #mutate mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) #get the probability that the mutation will be accepted probability = calc_prob_scores(mut_score['score'], init_score['score'], N) rand = random.random() #test to see if mutation is accepted if float(rand) < float(probability): print "accepted" #make a name for the new mutant variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res) # Assuming some burn in phase, make this zero if you want to store everything if i>=0: #save name and energy change data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n") #save the new accepted mutation pdb_name=str(i)+".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose init_score = mut_score #update number of accepts i+=1
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 1500 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() #change these for more or less flexability mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Set threshold for selection threshold = pre_pre_packing_score / 2 data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start sim i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while (res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while (proposed_res == res.name1()): new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #make the mutation #this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it. mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) # Assuming 1000 burn in phase, take this if out if you want to store everything if i > 1000: #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") pdb_name = str(i) + ".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename print 'program takes %f' % (t1 - t0)
def main(argv=None): if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv] print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser( description=' nc_cst_gen.py arguments ( -help ) %s' % InfoString) # Required arguments: ArgParser.add_argument('-pdbs', type=str, nargs='+', help=' input pdbs ', required=True) # Optional arguments: ArgParser.add_argument('-out', type=str, help=' output directory ', default='./') ArgParser.add_argument( '-max_dist', type=float, default=3.4, help=' distance between the oxygens and downstreams ') ArgParser.add_argument('-min_seq_sep', type=int, default=3, help=' minimum seperation in primary sequece ') ArgParser.add_argument('-upstream_atom', type=str, default='[ON]\w?\d?', help=' grep for upstream atoms ') ArgParser.add_argument('-downstream_atom', type=str, default='[ON]\w?\d?', help=' grep for downstream atoms ') ArgParser.add_argument( '-num_repeats', type=int, default=5, help=' number of repeats to extrapolate contacts for ') ArgParser.add_argument( '-min_sasa', type=float, default=0.0, help=' floor for weighting downstream oxygen contacts ') ArgParser.add_argument( '-min_sasa_weight', type=float, default=1.0, help=' weight of floor for downstream oxygen contacts ') ArgParser.add_argument( '-max_sasa', type=float, default=5.0, help=' ceiling for cst weighting downstream oxygen contacts ') ArgParser.add_argument( '-max_sasa_weight', type=float, default=0.1, help=' weight of ceiling for downstream oxygen contacts ') ArgParser.add_argument('-sasa_probe_radius', type=float, default=0.8, help=' probe radius for sasa calculations ') ArgParser.add_argument('-renumber_pose', type=bool, default=True, help='True|False renumber pdb residues ') ArgParser.add_argument('-disulfide', type=bool, default=True, help='True|False include disulfide constraints ') Args = ArgParser.parse_args() # if len(Args.pdbs[0]) == 1: # Args.pdbs = [''.join(Args.pdbs)] if Args.out[-1] != '/': Args.out = Args.out + '/' import rosetta rosetta.init(extra_options="-mute basic -mute core -mute protocols") ReportedRepeatCount = 0 TotalPdbs = len(Args.pdbs) # Instance of class to convert sasas to cst weight SasaScale = sasa_scale(Args.min_sasa, Args.min_sasa_weight, Args.max_sasa, Args.max_sasa_weight) for iPdb, Pdb in enumerate(Args.pdbs): print ' Working with %s; %d of %d total pdbs ' % (Pdb, iPdb + 1, TotalPdbs) # Starting rosetta Pose = rosetta.pose_from_pdb(Pdb) OutputPdb = Args.out + Pdb # Sets pdb info so residues in dumped pdbs are same as index Pose.pdb_info(rosetta.core.pose.PDBInfo(Pose)) if Args.renumber_pose: rosetta.dump_pdb(Pose, OutputPdb) else: rosetta.dump_pdb(Pose, OutputPdb.replace('.pdb', '_renumbered.pdb')) AllConstraints, SortedConstraints = get_pose_constraints( Pose, Args.max_dist, Args.min_seq_sep, Args.sasa_probe_radius, SasaScale, Args.upstream_atom, Args.downstream_atom, True) if Args.disulfide: DisulfAllConstraints, DisulfSortedConstraints = get_pose_constraints( Pose, 3.5, 2, Args.sasa_probe_radius, SasaScale, 'SG', 'SG', False) AllConstraints.extend(DisulfAllConstraints) # print AllConstraints # print SortedConstraints # print # print # print DisulfAllConstraints # print DisulfSortedConstraints # sys.exit() CstName = OutputPdb.replace('.pdb', '_All.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(AllConstraints) BackboneBackboneCst, BackboneSidechainCst, SidechainSidechainCst = SortedConstraints CstName = OutputPdb.replace('.pdb', '_BBBB.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(BackboneBackboneCst) CstName = OutputPdb.replace('.pdb', '_BBSC.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(BackboneSidechainCst) CstName = OutputPdb.replace('.pdb', '_SCSC.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(SidechainSidechainCst) CstName = OutputPdb.replace('.pdb', '_Disulf.cst') with open(CstName, 'w') as CstFile: print >> CstFile, '\n'.join(DisulfAllConstraints)
def cap_and_relax_pdb( (RepeatPdb, ReferencePdb, ReferenceCst) ): RepeatPose = rosetta.pose_from_pdb(RepeatPdb) TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 ) TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) ) ReferencePose = rosetta.pose_from_pdb( ReferencePdb ) ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) ) # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb') RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdb)) SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdb) SourceRanges = SourceRanges.split('__') SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ] SourceStart = SourceRanges[0][0] SourceEnd = SourceRanges[0][1] ''' Add N terminal cap ''' NcapPose = grafting.return_region( ReferencePose, 1, SourceStart+5 ) # rosetta.dump_pdb(NcapPose, 'Ncap.pdb') NcapLength = NcapPose.n_residue() NcapOverhangPositions = [ Position for Position in range(NcapLength-3, NcapLength+1) ] # print NcapOverhangPositions NcapOverhangArray = get_residue_array( NcapPose, NcapOverhangPositions ) RepStartOverhangPositions = [1,2,3,4] RepStartOverhangArray = get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions ) # print RepStartOverhangArray RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v(TrimmedRepeatPose, rMtx, tVec) # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' ) NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = fuse(NcapPose, TrimmedRepeatPose) print 'Ncap attachment RMSD %f'%RMSD # rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' ) NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) ) ''' Add C terminal cap ''' Cshift = SourceEnd-6 CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() ) # rosetta.dump_pdb(CcapPose, 'Ccap.pdb') CcapOverhangPositions = [1,2,3,4] CcapOverhangArray = get_residue_array( CcapPose, CcapOverhangPositions ) RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ] # print 'RepEndOverhangPositions', RepEndOverhangPositions RepEndOverhangArray = get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions ) RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec) # rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' ) CappedRepeatPose, RMSD, CcapCorrespondingResidues = fuse(NcapPlusRepeatPose, CcapPose) print 'Ccap attachment RMSD %f'%RMSD CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_Cap.pdb', RepeatPdb) assert CappedNamePdb != RepeatPdb, 'regular expression substitution failed!' rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb ) ''' Generate csts for cap/repeat edges ''' CstExtrapolator = constraint_extrapolator(ReferenceCst) ConstraintSet = [] ' N cap constraints are easy; no shifts are needed ' # For catching when individual constraints have been considered already Redundict = {} for Position in range(1, SourceStart+6): # print 'Position', Position # Skip positions w/out constraints try: PositionCstDict = CstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 if pose_has(CappedRepeatPose, AtomResidueCoords): ConstraintSet.append(Constraint) ' C cap constraints are harder; need to shift due to pose expansion ' # CstExtrapolator.output_cst(ConstraintSet, 'NcapConstraints.cst')\ Redundict = {} # print 'CcapCorrespondingResidues', CcapCorrespondingResidues RepeatCcapPositionStart = CcapCorrespondingResidues[0][0] # print 'RepeatCcapPositionStart', RepeatCcapPositionStart ShiftToRepeatPose = RepeatCcapPositionStart - Cshift # print 'ShiftToRepeatPose', ShiftToRepeatPose for Position in range( Cshift, ReferencePose.n_residue()+1 ): # Skip positions w/out constraints try: PositionCstDict = CstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 ExpandedPoseAtomResidueCoords = [] # iterate through atom residue pairs for AtomResiduePair in AtomResidueCoords: # print 'AtomResiduePair', AtomResiduePair ExpandedPosePosition = (AtomResiduePair[1]) + ShiftToRepeatPose # print 'ExpandedPosePosition', ExpandedPosePosition ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) ) ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType if pose_has(CappedRepeatPose, ExpandedPoseAtomResidueCoords): ConstraintSet.append(ShiftedConstraint) CapCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb) CstExtrapolator.output_cst(ConstraintSet, CapCstName) ''' idealize peptide bonds with command line subprocess ''' subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', CappedNamePdb]) IdealizedPdbOldName = re.sub(r'(.*).pdb$', r'\1_0001.pdb', CappedNamePdb) IdealizedPdbNewName = re.sub(r'(.*).pdb$', r'\1_Ideal.pdb', CappedNamePdb) subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.2) IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName ) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(CapCstName) Constrainer.apply(IdealizedCappedPose) ''' SET UP WEIGHTS AS decided ''' # RelativeWeight = 0.1 Talaris = rosetta.getScoreFunction() TalarisPlusCst = rosetta.getScoreFunction() AtomPairCst = set_all_weights_zero( rosetta.getScoreFunction() ) AtomPairCst.set_weight(rosetta.atom_pair_constraint, 1.0) # RosettaScore = Talaris(IdealizedCappedPose) # AtomPairCstScore = AtomPairCst(IdealizedCappedPose) # Weight = ( RosettaScore * RelativeWeight ) / AtomPairCstScore Weight = 1.0 TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, Weight) TalarisPlusCst.set_weight(rosetta.angle_constraint, Weight) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, Weight) print 'relaxing %s with %s'%(IdealizedPdbNewName, CapCstName) print ' Weight %d '%Weight rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName) rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax2.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv] # print 'sys.argv', sys.argv ArgParser = argparse.ArgumentParser( description=" args for optimize_repeat_structures ") ArgParser.add_argument('-pdb_stem', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True) Args = ArgParser.parse_args() Pdbs = glob.glob('*%s.pdb' % Args.pdb_stem) PdbSortTuples = [] Skipped = [] for Pdb in Pdbs: RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb)) SourceStart = int(re.sub(r'.*src(\d+).*pdb', r'\1', Pdb)) try: assert SourceStart != Pdb and RepeatLength != Pdb, 'regular expression substitution failed' except AssertionError: Skipped.append(Pdb) continue PdbSortTuples.append((RepeatLength, SourceStart, Pdb)) print 'Skipped:' print Skipped print PdbSortTuples.sort() LastPdb = PdbSortTuples[0][2] Pose = rosetta.pose_from_pdb(LastPdb) LastArray = np.array([ list(Pose.residue(P).xyz('CA')) for P in range(1, Pose.n_residue() + 1) ]) subprocess.check_output(['mkdir', 'Redundant']) for PdbTup in PdbSortTuples[1:]: Pdb = PdbTup[2] Pose = rosetta.pose_from_pdb(Pdb) CA_Array = np.array([ list(Pose.residue(P).xyz('CA')) for P in range(1, Pose.n_residue() + 1) ]) if len(CA_Array) == len(LastArray): RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays( CA_Array, LastArray) print print 'LastPdb, Pdb' print LastPdb print Pdb print 'RMSD:', RMSD if RMSD < 0.001: PdbStem = re.sub(r'(.*).pdb$', r'\1', Pdb) GlobString = '%s*' % PdbStem PdbAssociatedFiles = glob.glob(GlobString) # print PdbAssociatedFiles for File in PdbAssociatedFiles: subprocess.check_output(['mv', File, 'Redundant/']) LastArray = copy.deepcopy(CA_Array) LastPdb = copy.deepcopy(Pdb)
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv] # print 'sys.argv', sys.argv # Arg block ArgParser = argparse.ArgumentParser( description=' expand_cst.py ( -help ) %s' % InfoString) # Required args ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True) ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True) ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True) # Optional args ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./') Args = ArgParser.parse_args() if Args.out[-1] != '/': Args.out = Args.out + '/' # default talaris 2013 score function ScoreFunction = rosetta.getScoreFunction() # turning on constraint weights ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0) ScoreFunction.set_weight(rosetta.angle_constraint, 1.0) ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0) RefPdb = Args.ref_pdb # print RefPdb ReferencePose = rosetta.pose_from_pdb(RefPdb) print 'ReferencePose', ReferencePose # modify rosetta cst w/o rosetta Constrainer = constraint_extrapolator(Args.ref_cst) # RefCst = Args.ref_cst # # make constraint mover # Constrainer = rosetta.ConstraintSetMover() # # get constraints from file # Constrainer.constraint_file(RefCst) # # Apply constraints to pose # Constrainer.apply(ReferencePose) # return Constrainer Pdbs = glob.glob('*%s*.pdb' % Args.repeat_pdb_tag) assert len( Pdbs ), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag " % Args.repeat_pdb_tag for Pdb in Pdbs: ## For debug put pdb of interest here: # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb': print 'Pdb:', Pdb Pose = rosetta.pose_from_pdb(Pdb) try: SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb) SourceRanges = [[int(Number) for Number in Range.split('_')] for Range in SourceRangeString.split('__')] except ValueError: print 'No src range tag, skipping: %s ' % Pdb continue print 'SourceRanges:', SourceRanges RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb)) print 'RepeatLength', RepeatLength print # print [Pdb] PdbTag = (Pdb + '!').replace('.pdb!', '').replace('!', '') CstName = PdbTag + '.cst' ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit( SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)