def construct_pose_from_matching_domains( host_pose, # pose host_res1, # int host_res2, # int guest_name, # '1aaaA.pdb' guest_res1, # int guest_res2 ): # int # build guest pose guest_pose = Pose() pose_from_pdb( guest_pose, 'alpha-beta-hydrolases/'+guest_name ) # rotate guest pose to align with host's ref residues kabsch_alignment( host_pose, guest_pose, [ host_res1 - 1 , host_res1 , host_res1 + 1 , host_res2 - 1 , host_res2 , host_res2 + 1 ], [ guest_res1 - 1 , guest_res1 , guest_res1 + 1 , guest_res2 - 1 , guest_res2 , guest_res2 + 1 ] ) pymover.apply(host_pose) pymover.apply(guest_pose) raw_input('see pymol for match') # generate new pose from aligned domains new_pose = Pose() f.write( str(host_pose) ) f.write( str(guest_pose) ) f.flush()
def main(): opts, args = getopt.getopt(sys.argv[3:], 'i') show_index = 0 for o in opts: if '-i' in o: show_index = 1 rosetta.init() wtName = sys.argv[1] compareName = sys.argv[2] outputName = wtName.split('.')[0] + '_vs_' + compareName.split('.')[0] + ".txt" pose1 = rosetta.pose_from_pdb(wtName) pose2 = rosetta.pose_from_pdb(compareName) use_me = True if pose1.total_residue() != pose2.total_residue(): print "Residue number not equal", pose1.total_residue(), \ pose2.total_residue() use_me = False else: output = open(outputName, 'w') total_residue = pose1.total_residue() kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1)) # RMSD calculated by my own function for i in range(1, total_residue + 1): calculateRMS(pose1, pose2, i, output, show_index) # RMSD calculated by PyRosetta ro_rmsd = rosetta.CA_rmsd(pose1, pose2) print "rosetta generated rmsd: " + str(ro_rmsd) if use_me: global total_square me_rmsd = math.sqrt(total_square / total_residue) print "me generated rmsd: " + str(me_rmsd) output.write(outputName.split('.')[0] + ":\t" + str(ro_rmsd)) output.close() print "Done"
def main(): opts, args = getopt.getopt(sys.argv[3:], 'i') show_index = 0 for o in opts: if '-i' in o: show_index = 1 rosetta.init() file_index = 1 wtName = sys.argv[1] compareName = sys.argv[2] relax_name = relax_filename(compareName, file_index) outputName = wtName.split('.')[0] + '_vs_' + compareName + ".txt" output = open(outputName, 'w') pose1 = rosetta.Pose(wtName) total_residue = pose1.total_residue() rmsdList = [0.00] * total_residue totalRMSD = [] while os.path.isfile(relax_name): pose2 = rosetta.Pose(relax_name) if pose1.total_residue() != pose2.total_residue(): print "Residue number not equal" break else: kabsch_alignment(pose1, pose2, range(1, total_residue + 1), range(1, total_residue + 1)) for i in range(1, total_residue + 1): calculateRMS(pose1, pose2, i, rmsdList) ro_rmsd = rosetta.CA_rmsd(pose1, pose2) totalRMSD.append(ro_rmsd) print "rosetta generated rmsd: " + str(ro_rmsd) global total_square me_rmsd = math.sqrt(total_square / total_residue) print "me generated rmsd: " + str(me_rmsd) total_square = 0.0 file_index = file_index + 1 relax_name = relax_filename(compareName, file_index) if file_index == 1: print "No relaxation file" else: file_index = file_index - 1 rmsd_total = 0.0 if file_index > 0: for rmsd in totalRMSD: rmsd_total = rmsd_total + rmsd averageRMSD = rmsd_total / file_index print "average rmsd: ", str(averageRMSD) outputMinMax(totalRMSD, compareName) print "outputing " + outputName + "..." for index in range(1, total_residue + 1): rmsdList[index - 1] = rmsdList[index - 1] / file_index if show_index: output.write(str(index) + '\t' + str(rmsdList[index - 1]) + '\n') else: output.write(str(rmsdList[index - 1]) + '\n') output.write(outputName.split('.')[0] + "_relax\taverage rmsd: " + str(averageRMSD)) output.close() print "Done"
def construct_pose_from_matching_domains( old_host_pose, # pose host_res1, # int host_res2, # int old_guest_pose, # pose guest_res1, # int guest_res2 ): # int # copy poses # TODO is this step actually necessary? any significant speed toll? host_pose = Pose() host_pose.assign( old_host_pose ) guest_pose = Pose() guest_pose.assign( old_guest_pose ) # rotate guest pose to align with host's ref residues kabsch_alignment( host_pose, guest_pose, [ host_res1 - 1 , host_res1 , host_res1 + 1 ], # host_res2 - 1 , # host_res2 , # host_res2 + 1 ], [ guest_res1 - 1 , guest_res1 , guest_res1 + 1 ])#, # guest_res2 - 1 , # guest_res2 , # guest_res2 + 1 ] ) # define residues for new pose host_rsds = [r for r in range(1, host_res1+1)] + [r for r in range(host_res2, host_pose.total_residue())] guest_rsds = [r for r in range(guest_res1, guest_res2+1)] # check clashiness n=1 close_ones = 0 for gn in guest_rsds: for hn in host_rsds: dist = guest_pose.residue( gn ).xyz('CA').distance( host_pose.residue( hn ).xyz('CA') ) if dist < 3.0: close_ones += 1 if close_ones > .01*args.clash_percentage*len(guest_rsds): return 1 # too much trouble just to get pdb_id.pdb, annoying. # hope that pymol chooses this as the name host_pose_name = host_pose.pdb_info().name().split('/')[-1].split('.')[0] guest_pose_name = guest_pose.pdb_info().name().split('/')[-1].split('.')[0] print 'HOST: ', host_pose_name print 'GUEST: ', guest_pose_name print "####################" print 'rmsd ', rmsd print "CLASHES: ", close_ones print 'guest_res1 ', guest_res1 print 'guest_res2 ', guest_res2 print "####################" if args.visualize: pymover = PyMOL_Mover() pymover.apply(host_pose) time.sleep(.1) # prettify the visualization pymol.cmd.hide('everything') pymol.cmd.show('cartoon') time.sleep(.1) # easiest to see is pink cartoon for host pose and yellow lines for guest loop pymol.cmd.color('pink') pymover.apply(guest_pose) time.sleep(.1) pymol.cmd.hide('everything') pymol.cmd.show('cartoon') time.sleep(.1) pymol.cmd.color('yellow', guest_pose_name) # hide guest pose residues outside of the loop of interest pymol.cmd.hide( '( not resi ' + str(guest_res1)+'-'+str(guest_res2) + ' and '+guest_pose_name+' )' ) # hide host pose residues outside of lame loop pymol.cmd.hide( '( '+host_pose_name+' and resi ' + str(host_res1)+'-'+str(host_res2) + ' )' ) # show old loop slightly transparent and gray? raw_input('hit enter for CE') # CEALIGN pymol.cmd.cealign(host_pose_name, guest_pose_name) # analyze pymol.cmd.save('ce_host.pdb', host_pose_name) #time.sleep? pymol.cmd.save('ce_guest.pdb', guest_pose_name) ce_host_pose = Pose() ce_guest_pose = Pose() pose_from_pdb(ce_host_pose, 'ce_host.pdb') pose_from_pdb(ce_guest_pose, 'ce_guest.pdb') print ce_host_pose print ce_guest_pose if args.find_residues: ##### MIN DISTS ##### #print "* see file ce_dists.csv for output" #all_ce_dist_mins = [] #for r1 in range(1,ce_host_pose.total_residue()+1): # ce_dists = [] # for r2 in range(1, ce_guest_pose.total_residue()+1): # try: # dist = ce_host_pose.residue( r1 ).xyz('CA').distance( ce_guest_pose.residue( r2 ).xyz('CA') ) # ce_dists.append(dist) # except: # print "exception" # min_ce_dist = min(ce_dists) # all_ce_dist_mins.append(min_ce_dist) #with open('ce_dists.csv', 'w') as dists_file: # for item in all_ce_dist_mins: # dists_file.write( str(item)+'\n' ) # any need for csv module? #print "ce_dists file written" ##### SEC STRUCT ##### print "checking secondary structures to preserve" # USING PDB NUMBERING print "\n\nUSING PDB NUMBERING\n\n" # populate pose secstructs DsspMover().apply(ce_host_pose) DsspMover().apply(ce_guest_pose) ce_host_ss = ce_host_pose.secstruct() ce_guest_ss = ce_guest_pose.secstruct() print ce_host_ss print ce_guest_ss print "# host res1" print 'pose_num:', host_res1 print 'pdb_num: ', PDBN(ce_host_pose,host_res1) # find min_dist-guest-res from manually chosen host res ce_res_dists = {} for r1 in range(1, ce_guest_pose.total_residue()+1): try: dist = ce_host_pose.residue( host_res1 ).xyz('CA').distance( ce_guest_pose.residue( r1 ).xyz('CA') ) ce_res_dists[PDBN(ce_guest_pose,r1)] = dist except: print "exception" closest_res = min(ce_res_dists, key=ce_res_dists.get) print "# dists" print [str(res)+': '+str(dist) for res, dist in ce_res_dists.iteritems() if closest_res-10 < res < closest_res+10 ] print "# closest guest res" print closest_res # move to end of ss if present #TODO confirm accuracy in indexing for r1 in range(closest_res-1, ce_guest_pose.total_residue()): # index offset if ce_guest_ss[r1] == 'L': guest_res1_ss_end = PDBN(ce_guest_pose,r1) break print "# last res of guest ss" print guest_res1_ss_end print "# guest's regional ss" for a in range(closest_res-10,closest_res+10): print PDBN(ce_guest_pose,a+1), ce_guest_ss[a] # closest guest ss-end upstream # closest guest ss-end downstream raw_input('hit enter to continue to next match') if args.outfiles: # already working with duplicate poses, just modify in place # to remove residues, delete backwards; rosetta updates sequence index to keep continuity from 1 for r in reversed(range(host_res1, host_res2+1)): # WILL SEGFAULT AT r = 1 host_pose.delete_polymer_residue( r ) for r in reversed(guest_rsds): host_pose.append_polymer_residue_after_seqpos( guest_pose.residue( r ), host_res1, 0 ) lego_pose = Pose() lego_pose.assign( host_pose ) # TODO shorten pose name after debugged and stuff ref_pose = host_pose_name+'-'+str(host_res1)+'-'+str(host_res2) origin_pose = host_pose_name+'-'+str(host_res1)+'-'+str(host_res2) match_pose = guest_pose_name+'-'+str(guest_res1)+'-'+str(guest_res2) #match_params = '%.4f-%i' % (rmsd, close_ones) try: os.mkdir( 'pose-dumps/' ) except OSError: pass # dir exists dump_name = 'pose-dumps/'+origin_pose+'_'+match_pose+'.pdb' lego_pose.dump_pdb( dump_name ) print 'successfully dumped to file ' + dump_name if __name__ == '__main__': parser = argparse.ArgumentParser() # not optional parser.add_argument( "ref_pdb", help="path to host pdb file" ) parser.add_argument( "ref_res_1", type=int, help="residue 1 for reference pdb" ) parser.add_argument( "ref_res_2", type=int, help="residue 2 for reference pdb" ) m_group = parser.add_mutually_exclusive_group() # directory of guests m_group.add_argument( "-d", "--directory", help="directory of pdbs to check against" ) # specific guest m_group.add_argument( "-g", "--guest_pdb", help="path to a single pdb to check against" ) # specific reses in guest parser.add_argument( "--res1", type=int, help="use only these residues in guest_pdb" ) parser.add_argument( "--res2", type=int, help="use only these residues in guest_pdb" ) # output file parser.add_argument( "-o", "--outfiles", help="whether or not pdbs are created (in pose-dumps/ dir) from the matches", action="store_true" ) # visualize parser.add_argument( "-V", "--visualize", help="use import-capable pymol (not MacPyMOL) to visualize matches as they are found", action="store_true" ) # tolerances parser.add_argument( "--rmsd", default=3, help="maximum root-mean-squared deviation between transforms (default is 3)" ) parser.add_argument( "--length", default=100,help="maximum number of residues in matching loop (default is 100)" ) parser.add_argument( "--clash_percentage", default=10, help="maximum number of residues that clash between new loop and host, as a percentage of the length of the new loop (default is 10)" ) parser.add_argument( "--loop_percentage", default=50, help="maximum percentage of new loop as 'loop' secondary structure (default is 50)" ) parser.add_argument( "--open_tolerances", help="set all tolerances to maximum", action="store_true" ) # "--half-tolerances"; if passed, set all tolerances to half # cealign distances parser.add_argument( "--find_residues", help="guess which residues to clip at", action="store_true" ) args = parser.parse_args() # maximize tolerances if args.open_tolerances: args.rmsd = 100 args.length = 10000 args.clash_percentage = 100 args.loop_percentage = 100 print 'tolerances have been set to maximum' # start pymol if appropriate if args.visualize: print 'importing pymol...' sys.path.append('/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/') import pymol silence = pymol.finish_launching() silence = pymol.cmd.do('run ~/Desktop/PyRosetta/PyMOLPyRosettaServer.py') time.sleep(2) if True: #with open( 'thorp-log', 'w') as loggy, open('matching_domains.'+str(time.strftime("%Y%m%d-%H%M%S")), 'w') as matchy: orig_pdb_file = args.ref_pdb # ./path/to/aaaa.pdb orig_pdb_id = orig_pdb_file.split('/')[-1] # aaaa.pdb residue1 = args.ref_res_1 residue2 = args.ref_res_2 # DEBUG OUTPUT #output_string = '\n\n#######################################\n'+ \ # 'This file contains debugging output for thorp.py\n'+ \ # 'invoked at: '+str(time.strftime("%Y/%m/%d %H:%M:%S"))+'\n\n\n'+ \ # 'Reference PDB: '+str(orig_pdb_file)+'\n'+ \ # 'Residues '+str(residue1)+' and '+str(residue2)+'\n' #+ \ # #str(len(os.listdir(family_dir)))+' dir items (PDBs to posify and traverse for transforms)\n\n' #loggy.write(output_string) #loggy.flush() # TRANSFORMS OUTPUT # TODO output tolerances #matchy_string = \ # 'This list from *** directory, ### pdbs, *** tolerances, etc \n' + \ # 'saved at: '+str(time.strftime("%Y/%m/%d %H:%M:%S"))+'\n\n'+ \ # str(sys.argv)+'\n\n\n' #matchy.write(matchy_string) #matchy.flush() find_matching_domains( args )
def construct_pose_from_matching_domains( old_host_pose, # pose host_res1, # int host_res2, # int old_guest_pose, # pose guest_res1, # int guest_res2): # int # copy poses # TODO is this step actually necessary? any significant speed toll? host_pose = Pose() host_pose.assign(old_host_pose) guest_pose = Pose() guest_pose.assign(old_guest_pose) # rotate guest pose to align with host's ref residues kabsch_alignment( host_pose, guest_pose, [host_res1 - 1, host_res1, host_res1 + 1], # host_res2 - 1 , # host_res2 , # host_res2 + 1 ], [guest_res1 - 1, guest_res1, guest_res1 + 1]) #, # guest_res2 - 1 , # guest_res2 , # guest_res2 + 1 ] ) # define residues for new pose host_rsds = [r for r in range(1, host_res1 + 1) ] + [r for r in range(host_res2, host_pose.total_residue())] guest_rsds = [r for r in range(guest_res1, guest_res2 + 1)] # check clashiness n = 1 close_ones = 0 for gn in guest_rsds: for hn in host_rsds: dist = guest_pose.residue(gn).xyz('CA').distance( host_pose.residue(hn).xyz('CA')) if dist < 3.0: close_ones += 1 if close_ones > .01 * args.clash_percentage * len(guest_rsds): return 1 # too much trouble just to get pdb_id.pdb, annoying. # hope that pymol chooses this as the name host_pose_name = host_pose.pdb_info().name().split('/')[-1].split('.')[0] guest_pose_name = guest_pose.pdb_info().name().split('/')[-1].split('.')[0] print 'HOST: ', host_pose_name print 'GUEST: ', guest_pose_name print "####################" print 'rmsd ', rmsd print "CLASHES: ", close_ones print 'guest_res1 ', guest_res1 print 'guest_res2 ', guest_res2 print "####################" if args.visualize: pymover = PyMOL_Mover() pymover.apply(host_pose) time.sleep(.1) # prettify the visualization pymol.cmd.hide('everything') pymol.cmd.show('cartoon') time.sleep(.1) # easiest to see is pink cartoon for host pose and yellow lines for guest loop pymol.cmd.color('pink') pymover.apply(guest_pose) time.sleep(.1) pymol.cmd.hide('everything') pymol.cmd.show('cartoon') time.sleep(.1) pymol.cmd.color('yellow', guest_pose_name) # hide guest pose residues outside of the loop of interest pymol.cmd.hide('( not resi ' + str(guest_res1) + '-' + str(guest_res2) + ' and ' + guest_pose_name + ' )') # hide host pose residues outside of lame loop pymol.cmd.hide('( ' + host_pose_name + ' and resi ' + str(host_res1) + '-' + str(host_res2) + ' )') # show old loop slightly transparent and gray? raw_input('hit enter for CE') # CEALIGN pymol.cmd.cealign(host_pose_name, guest_pose_name) # analyze pymol.cmd.save('ce_host.pdb', host_pose_name) #time.sleep? pymol.cmd.save('ce_guest.pdb', guest_pose_name) ce_host_pose = Pose() ce_guest_pose = Pose() pose_from_pdb(ce_host_pose, 'ce_host.pdb') pose_from_pdb(ce_guest_pose, 'ce_guest.pdb') print ce_host_pose print ce_guest_pose if args.find_residues: ##### MIN DISTS ##### #print "* see file ce_dists.csv for output" #all_ce_dist_mins = [] #for r1 in range(1,ce_host_pose.total_residue()+1): # ce_dists = [] # for r2 in range(1, ce_guest_pose.total_residue()+1): # try: # dist = ce_host_pose.residue( r1 ).xyz('CA').distance( ce_guest_pose.residue( r2 ).xyz('CA') ) # ce_dists.append(dist) # except: # print "exception" # min_ce_dist = min(ce_dists) # all_ce_dist_mins.append(min_ce_dist) #with open('ce_dists.csv', 'w') as dists_file: # for item in all_ce_dist_mins: # dists_file.write( str(item)+'\n' ) # any need for csv module? #print "ce_dists file written" ##### SEC STRUCT ##### print "checking secondary structures to preserve" # USING PDB NUMBERING print "\n\nUSING PDB NUMBERING\n\n" # populate pose secstructs DsspMover().apply(ce_host_pose) DsspMover().apply(ce_guest_pose) ce_host_ss = ce_host_pose.secstruct() ce_guest_ss = ce_guest_pose.secstruct() print ce_host_ss print ce_guest_ss print "# host res1" print 'pose_num:', host_res1 print 'pdb_num: ', PDBN(ce_host_pose, host_res1) # find min_dist-guest-res from manually chosen host res ce_res_dists = {} for r1 in range(1, ce_guest_pose.total_residue() + 1): try: dist = ce_host_pose.residue(host_res1).xyz('CA').distance( ce_guest_pose.residue(r1).xyz('CA')) ce_res_dists[PDBN(ce_guest_pose, r1)] = dist except: print "exception" closest_res = min(ce_res_dists, key=ce_res_dists.get) print "# dists" print[ str(res) + ': ' + str(dist) for res, dist in ce_res_dists.iteritems() if closest_res - 10 < res < closest_res + 10 ] print "# closest guest res" print closest_res # move to end of ss if present #TODO confirm accuracy in indexing for r1 in range(closest_res - 1, ce_guest_pose.total_residue()): # index offset if ce_guest_ss[r1] == 'L': guest_res1_ss_end = PDBN(ce_guest_pose, r1) break print "# last res of guest ss" print guest_res1_ss_end print "# guest's regional ss" for a in range(closest_res - 10, closest_res + 10): print PDBN(ce_guest_pose, a + 1), ce_guest_ss[a] # closest guest ss-end upstream # closest guest ss-end downstream raw_input('hit enter to continue to next match') if args.outfiles: # already working with duplicate poses, just modify in place # to remove residues, delete backwards; rosetta updates sequence index to keep continuity from 1 for r in reversed(range(host_res1, host_res2 + 1)): # WILL SEGFAULT AT r = 1 host_pose.delete_polymer_residue(r) for r in reversed(guest_rsds): host_pose.append_polymer_residue_after_seqpos( guest_pose.residue(r), host_res1, 0) lego_pose = Pose() lego_pose.assign(host_pose) # TODO shorten pose name after debugged and stuff ref_pose = host_pose_name + '-' + str(host_res1) + '-' + str(host_res2) origin_pose = host_pose_name + '-' + str(host_res1) + '-' + str( host_res2) match_pose = guest_pose_name + '-' + str(guest_res1) + '-' + str( guest_res2) #match_params = '%.4f-%i' % (rmsd, close_ones) try: os.mkdir('pose-dumps/') except OSError: pass # dir exists dump_name = 'pose-dumps/' + origin_pose + '_' + match_pose + '.pdb' lego_pose.dump_pdb(dump_name) print 'successfully dumped to file ' + dump_name
if terms[2]=='O1' and terms[3]=='TST' and terms[4]=='X': tst_o1 = [ float(terms[6]), float(terms[7]), float(terms[8]) ] if terms[2]=='C2' and terms[3]=='TST' and terms[4]=='X': tst_c2 = [ float(terms[6]), float(terms[7]), float(terms[8]) ] break except: pass pose = pose_from_pdb( tst_type_set, pathname ) ap1_tmp, pose1 = structural_alignment.kabsch_alignment( ap1_tmp, pose, [ ap1_c33, ap1_s1, ap1_o9, ap1_c29 ], [ tst_c1, tst_s1, tst_o1, tst_c2 ] ) ap1_out = None ap1_out = Pose() ap1_out.assign(ap1_tmp) pose = pose_from_pdb( tst_type_set, pathname ) ap2_tmp, pose2 = structural_alignment.kabsch_alignment( ap2_tmp, pose, [ ap2_c1, ap2_s1, ap2_o1, ap2_c3 ], [ tst_c1,