def makeFoldxRepair(name): #initial_pose = pose_from_pdb(name) # Set up ScoreFunction. #sf = get_fa_scorefxn() # Set up MoveMap. #mm = MoveMap() #mm.set_bb(True) #mm.set_chi(True) # Pack and minimize initial pose to remove clashes. #pre_pre_packing_score = sf(initial_pose) #task = standard_packer_task(initial_pose) #task.restrict_to_repacking() #task.or_include_current(True) #pack_rotamers_mover = RotamerTrialsMover(sf, task) #pack_rotamers_mover.apply(initial_pose) #min_mover = MinMover() #min_mover.movemap(mm) #min_mover.score_function(sf) #min_mover.min_type('linmin') #min_mover.apply(initial_pose) #initial_pose.dump_pdb(str(name)) print("cleaning:", name) cleanATOM(name)
def main(): CN_len=[] from toolbox import cleanATOM #Note that I could not access the WHATIF set so I used a set of structures from the PDB pdb_ID=['EGFR','centuximab','1BKR','5P21','1E6K','1F21','1R9H','2HDA','2O72','2IT6'] for i in range(len(pdb_ID)): cleanATOM(pdb_ID[i]+'.pdb') #create pose from PDB pose=pose_from_pdb(pdb_ID[i]+'.clean.pdb') #find CN lengths for all bonds in PDB #updates list that contains bond lengths of all poses CN_len=find_CN_lens(CN_len,pose) #write CN lengths to a file write_file=open('CA_N_Bond_Lens.txt','w') for i in range(len(CN_len)): write_file.write(str(CN_len[i])+'\n') #generate plots make_plots(CN_len)
def main(): #initializes input as False, must be found to be valid to proceed valid=False while valid==False: #prompts user for input #accepts XXXX or XXXX.pdb pdb=raw_input('Please input PDB ID: ') #gives user the ability to leave the loop if pdb=='stop': print 'You have chosen to leave the program. Goodbye!' sys.exit() if len(pdb)==4: pdb+='.pdb' valid=check_valid(pdb) if valid==False: print "PDB ID was not valid. Please input only the 4 character PDB code" print "If you would like to stop inputting ID's, type in 'stop' when prompted for ID" from toolbox import cleanATOM, pose_from_rcsb cleanATOM(pdb) #create pose pose=pose_from_pdb(pdb[0:4]+'.clean'+pdb[4:]) seq=pose.sequence() #initialize all variables that will count sec. struct. types for ala and non_ala resi non_ala_h=0 non_ala_s=0 non_ala_l=0 ala_h=0 ala_s=0 ala_l=0 for i in range(len(seq)-1): #find sec_struct of particular residue sec_struct=determine_sec_struct(seq[i],i+1,seq[i+1],i+2,pose) #classify residue structure, update appropriate variable if sec_struct==0: non_ala_h+=1 elif sec_struct==1: non_ala_s+=1 elif sec_struct==2: ala_h+=1 elif sec_struct==3: ala_s+=1 elif sec_struct==4: ala_l+=1 elif sec_struct==5: non_ala_l+=1 #divide ala totals by totals to get propensity p_ala_h=ala_h/float(non_ala_h+ala_h) p_ala_s=ala_s/float(non_ala_s+ala_s) p_ala_l=ala_l/float(non_ala_l+ala_l) print "Helix Propensity=",p_ala_h*100 print "Sheet Propensity=",p_ala_s*100 print "Loop Propensity=",p_ala_l*100
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 1500 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() #change these for more or less flexability mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Set threshold for selection threshold = pre_pre_packing_score / 2 data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start sim i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while (res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while (proposed_res == res.name1()): new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #make the mutation #this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it. mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) # Assuming 1000 burn in phase, take this if out if you want to store everything if i > 1000: #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") pdb_name = str(i) + ".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename print 'program takes %f' % (t1 - t0)
#!/usr/bin/env python '''This python file is for Homework 2, Problem 4. I didn't use the template. I created a torsion angle file in my code but did not use the torsion angles to calculate the L/H/E propensities. --by Xiaotong Zuo, Feb. 2016 ''' # import from rosetta import * init() from toolbox import get_secstruct from toolbox import cleanATOM import sys # use 1m40.pdb as template, first, cleanATOM cleanATOM("1m40.pdb") # load pose pose=pose_from_pdb("1m40.clean.pdb") ### I did not use torsion angles to calculate the propensities! # create torsion.dat: phi and psi f=open("/Users/XT/Downloads/1yy8.pdb","r") g=open("torsion.dat","w+") for line in f.readlines(): a=line.split() if "ATOM"==a[0] and "CA"==a[2]: g.write(str(a[3])+"\t"+str(a[5])+"\t"+str(pose.phi(int(a[5])))+"\t"+str(pose.psi(int(a[5])))+"\t"+str(pose.psi(int(a[5]))+pose.phi(int(a[5])))+'\n') print a f.close() g.close()
## http://graylab.jhu.edu/pyrosetta/downloads/documentation/Workshop6_PyRosetta_Packing_Design.pdf from toolbox import generate_resfile_from_pdb # generate mutations using resfiles from toolbox import mutate_residue # generate mutations using mutate_residue ## changing directory to where PDB's are located (aka where PDB files are located ) os.chdir("~\Dropbox\Waterloo-iGEM-2015") #alter to your specific dropbox path os.chdir("\Math Modelling\cas9_modification") ##where the WT cas9 should be located??? ## not sure if completely correct???? add changes if not. ## initializing rosetta: rosetta.init() # import cleaning module for PDB to be usable from toolbox import cleanATOM cleanATOM("\4UN3.pdb") # cleaned PDB file to use for analysis var_pose = pose_from_pdb("\4UN3.pdb") # initial pose created from clean pdb #inputted residue number of interest Num = raw_input("enter residue number:\n") for i in range(0, 20): # list of Amino Acids to substitute AA_lst = "ACDEFGHIKLMNPQRSTVWY" AA_var = AA_lst[i] var_pose = pose_from_pdb("4UN3." + AA_var + ".clean.pdb") mutate_residue(var_pose, Num , AA_var) # where Num = residue number to substitute AA # for sanity checking purposes: prints out changed 4UN3 pdb pose protein profile # (sequence, # of res, what is located at Num residue - if the substitution occured)
from rosetta import * init() import glob from toolbox import cleanATOM # step 1, locate the right directory, and cleanATOM filenames = glob.glob('*.pdb') for i, filename in enumerate(filenames): cleanATOM(filename) # step 2, calculate the bond length and write into a .txt file path = '/Users/XT/Downloads/top8000_chains_70/random_10_pdb/' filenames = glob.glob(path+'*.clean.pdb') f=open("NClength.txt","w+") for i, filename in enumerate(filenames): #f.write(str(i+1)+'\t'+str(filename)+'\n') pose=pose_from_pdb(filename) for resi_num in range(1,pose.total_residue()+1): N_xyz = pose.residue(resi_num).xyz("N") CA_xyz = pose.residue(resi_num).xyz("CA") N_CA_vector = CA_xyz-N_xyz f.write(str(N_CA_vector.norm)+"\n") f.close()
def main(argv): #open list of pdbs with open(argv[0]) as afile: pdbs = afile.readlines() #open chain id dictionary with open(argv[1]) as afile: d = { item[0]: [ item[1].split(","), item[2].split(","), item[3].split(","), item[4], item[5] ] for item in (line.split() for line in afile) } #loop through pdbs for p in pdbs: #get the pdb name of the pdb in question basename_p = os.path.basename(p).rstrip() #create lists of chain ids from d all_chains = d[basename_p][0] prot_chains = d[basename_p][1] pept_chains = d[basename_p][2] #clean pdb cleanATOM(p.rstrip()) #new filename consisting of the path name + .clean.pdb filename = p.rsplit('.', 1)[0] + ".clean.pdb" #input the pose, split it by chain, and create a new pose with the correct PDBInfo pose = pose_from_pdb(filename.rstrip()) pose.update_pose_chains_from_pdb_chains() chains = pose.split_by_chain() print len(chains) newpose = Pose() newpose.pdb_info(PDBInfo(newpose)) #iterate through all chains in protein by iterating through the list of all_chains #if chain is found in list of protein chains append it to newpose by appending after seqpos pdb_counter = 0 for idx, chain in enumerate(all_chains): if chain in prot_chains: newpose.append_residue_by_jump(chains[idx + 1].residue(1), newpose.total_residue(), "", "", 0) pdb_counter += 1 newpose.pdb_info().number(newpose.total_residue(), pdb_counter) newpose.pdb_info().chain(newpose.total_residue(), 'A') for i in range(2, chains[idx + 1].total_residue() + 1): newpose.append_polymer_residue_after_seqpos( chains[idx + 1].residue(i), newpose.total_residue(), 0) pdb_counter += 1 newpose.pdb_info().number(newpose.total_residue(), pdb_counter) newpose.pdb_info().chain(newpose.total_residue(), 'A') #then check list of peptide chains and append to newpose after jump idx_pept = all_chains.index(pept_chains[0]) + 1 #determine which res to start and end from using the dict pdb_num_res_start = int(d[basename_p][3]) pdb_num_res_end = int(d[basename_p][4]) num_res_start = chains[idx_pept].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_start) num_res_end = chains[idx_pept].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_end) print num_res_start print num_res_end #append first residue of the trimmed peptide newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_start), newpose.total_residue(), "", "", 1) newpose.pdb_info().number(newpose.total_residue(), 1) newpose.pdb_info().chain(newpose.total_residue(), 'B') #append remaining residues of the trimmed peptide for ind, i in zip(range(2, num_res_end + (2 - num_res_start)), range(num_res_start + 1, num_res_end + 1)): newpose.append_polymer_residue_after_seqpos( chains[idx_pept].residue(i), newpose.total_residue(), 0) newpose.pdb_info().number(newpose.total_residue(), ind) newpose.pdb_info().chain(newpose.total_residue(), 'B') newpose.pdb_info().obsolete(0) #output Trimmed pdb tokens = p.rsplit('.', 1) file = tokens[0] print '%sTrimmedPep.pdb' % (file) newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
def main(argv): #open list of pdbs with open(argv[0]) as afile: pdbs = afile.readlines() #open chain id dictionary with open(argv[1]) as afile: d = { item[0] : [item[1].split(","),item[2].split(","),item[3].split(","),item[4],item[5]] for item in (line.split() for line in afile) } #loop through pdbs for p in pdbs: #get the pdb name of the pdb in question basename_p = os.path.basename(p).rstrip() #create lists of chain ids from d all_chains = d[basename_p][0] prot_chains = d[basename_p][1] pept_chains = d[basename_p][2] #clean pdb cleanATOM(p.rstrip()) #new filename consisting of the path name + .clean.pdb filename = p.rsplit('.',1)[0] + ".clean.pdb" #input the pose, split it by chain, and create a new pose with the correct PDBInfo pose = pose_from_pdb(filename.rstrip()) pose.update_pose_chains_from_pdb_chains() chains=pose.split_by_chain() print len(chains) newpose = Pose() newpose.pdb_info( PDBInfo( newpose ) ) #iterate through all chains in protein by iterating through the list of all_chains #if chain is found in list of protein chains append it to newpose by appending after seqpos pdb_counter = 0 for idx, chain in enumerate(all_chains): if chain in prot_chains: newpose.append_residue_by_jump(chains[idx+1].residue(1),newpose.total_residue(),"","",0) pdb_counter+=1 newpose.pdb_info().number(newpose.total_residue(),pdb_counter) newpose.pdb_info().chain(newpose.total_residue(),'A') for i in range(2,chains[idx+1].total_residue()+1): newpose.append_polymer_residue_after_seqpos(chains[idx+1].residue(i), newpose.total_residue(),0) pdb_counter+=1 newpose.pdb_info().number(newpose.total_residue(),pdb_counter) newpose.pdb_info().chain(newpose.total_residue(),'A') #then check list of peptide chains and append to newpose after jump idx_pept = all_chains.index( pept_chains[0])+1 #determine which res to start and end from using the dict pdb_num_res_start = int(d[basename_p][3]) pdb_num_res_end = int(d[basename_p][4]) num_res_start = chains[ idx_pept ].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_start ) num_res_end = chains[ idx_pept ].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_end ) print num_res_start print num_res_end #append first residue of the trimmed peptide newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_start),newpose.total_residue(), "","",1) newpose.pdb_info().number(newpose.total_residue(),1) newpose.pdb_info().chain(newpose.total_residue(),'B') #append remaining residues of the trimmed peptide for ind,i in zip(range(2,num_res_end+(2-num_res_start)),range(num_res_start+1,num_res_end+1)): newpose.append_polymer_residue_after_seqpos(chains[idx_pept].residue(i), newpose.total_residue(),0) newpose.pdb_info().number(newpose.total_residue(),ind) newpose.pdb_info().chain(newpose.total_residue(),'B') newpose.pdb_info().obsolete(0) #output Trimmed pdb tokens=p.rsplit('.',1) file=tokens[0] print '%sTrimmedPep.pdb' % (file) newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
def main(): parser = argparse.ArgumentParser() parser.add_argument('pdb_directory', action="store", type=str) inputs = parser.parse_args() #takes name of pdb file without the extention for pdb_file in glob.glob(inputs.pdb_directory + '*.pdb'): clean_pdb_file = pdb_file.replace('.pdb', '.clean.pdb') print('#######################') print('#######################{}'.format(pdb_file)) if 'clean' in pdb_file: print('Will overwrite an existing clean pdb so am skipping') continue fasta_outfile_loc = pdb_file.replace('/PDBs/', '/wt_fastas/').replace( '.pdb', '.fasta') #Load and clean up pdb file cleanATOM(pdb_file) with open(clean_pdb_file, 'r') as infile: old_lines = infile.readlines() pdb_io = PDB.PDBIO() pdb_parser = PDB.PDBParser() structure = pdb_parser.get_structure(" ", clean_pdb_file) if len(structure) != 1: print( 'THERE APPEARS TO BE MORE THAN ONE MODEL IN THIS STRUCTURE BEHAVIOR OF PRORAM IS UNKNOWN ({}). EXITING' .format(clean_pdb_file)) continue chain_counts = {} for model in structure: for chain in model: new_number = 1 for i, residue in enumerate(chain.get_residues()): res_id = list(residue.id) if res_id[1] != new_number: res_id[1] = new_number residue.id = tuple(res_id) new_number += 1 chain_counts[chain.id] = new_number chains = sorted(chain_counts.items(), key=lambda x: x[1]) chain_to_keep = chains[-1][0] chains_to_delete = chains[:-1] chains_to_delete = [i for i, j in chains_to_delete] for i, j in enumerate(chains_to_delete): structure[0].detach_child(chains_to_delete[i]) pdb_io.set_structure(structure) pdb_io.save(clean_pdb_file) for model in structure: for chain in model: print('kept ID {} and deleted {}'.format( chain.id, chains_to_delete)) seq_list = [] chainID = chain.get_id() for residue in chain: if is_aa(residue.get_resname(), standard=True): seq_list.append(three_to_one(residue.get_resname())) else: seq_list.append('X') wt_seq = ''.join(seq_list) with open(fasta_outfile_loc, 'w') as outfile: outfile.write('>{}\n{}\n'.format('WT', wt_seq))
return VDW def calc_Hbond(structure): ''''Calculates hydrogen using PyRosetta scoring function''' E=0 #H_bond types H_bond_lst=[hbond_sr_bb,hbond_lr_bb,hbond_bb_sc,hbond_sc] for i in range(len(H_bond_lst)): #gets Hbond, adds to total energy E+=scorefxn.score_by_scoretype(structure,H_bond_lst[i]) return E scorefxn=get_fa_scorefxn() cleanATOM('1YY9.pdb') cleanATOM('EGFR.pdb') cleanATOM('centuximab.pdb') #Gets complexes, makes them into pose complex=pose_from_pdb('1YY9.clean.pdb') EGFR=pose_from_pdb('EGFR.clean.pdb') cent=pose_from_pdb('centuximab.clean.pdb') structures=[complex,EGFR,cent] #for labeling energies as they are printed struct_names=['1YY9','EGFR','Centuximab'] def main(): for j in range(len(structures)): #total pose energy FA score Energy=scorefxn(structures[j])
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 5000 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) pdb_name = str(pdb_file) + "_min.pdb" initial_pose.dump_pdb(pdb_name) #Set threshold for selection #threshold = post_pre_packing_score/2 #threshold = post_pre_packing_score data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n') data_filename = pdb_file + '.score' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename '''
from toolbox import generate_resfile_from_pdb # generate mutations using resfiles from toolbox import mutate_residue # generate mutations using mutate_residue ## changing directory to where PDB's are located (aka where PDB files are located ) os.chdir("~\Dropbox\Waterloo-iGEM-2015") #alter to your specific dropbox path os.chdir("\Math Modelling\cas9_modification" ) ##where the WT cas9 should be located??? ## not sure if completely correct???? add changes if not. ## initializing rosetta: rosetta.init() # import cleaning module for PDB to be usable from toolbox import cleanATOM cleanATOM("\4UN3.pdb") # cleaned PDB file to use for analysis var_pose = pose_from_pdb("\4UN3.pdb") # initial pose created from clean pdb #inputted residue number of interest Num = raw_input("enter residue number:\n") for i in range(0, 20): # list of Amino Acids to substitute AA_lst = "ACDEFGHIKLMNPQRSTVWY" AA_var = AA_lst[i] var_pose = pose_from_pdb("4UN3." + AA_var + ".clean.pdb") mutate_residue(var_pose, Num, AA_var) # where Num = residue number to substitute AA # for sanity checking purposes: prints out changed 4UN3 pdb pose protein profile
def makeFoldxRepair(name): print("cleaning:", name) cleanATOM(name)
cleanpdb = "%s.clean.pdb" % pdb alphaName = "%s_alpha.txt" % pdb betaName = "%s_beta.txt" % pdb testName = "test_%s.txt" % pdb alphalst = [] with open(alphaName, 'r') as fa: lst = fa.read() alphalst.extend(lst.split(',')) betalst = [] with open(betaName, 'r') as fb: lst = fb.read() betalst.extend(lst.split(',')) cleanATOM(pdbName) pose = pose_from_pdb(cleanpdb) seq = pose.sequence() print (pose.total_residue(), len(seq)) with open(testName, 'w') as ft: lines = [] for i in range(1,pose.total_residue() + 1): if str(i) in alphalst: lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 1)) elif str(i) in betalst: lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 2)) else: lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 0)) ft.writelines(lines)
def main(argv): #args = sys.argv with open(argv[0]) as afile: pdbs = afile.readlines() with open(argv[1]) as afile: d = { item[0]: [item[1].split(","), item[2].split(","), item[3].split(",")] for item in (line.split() for line in afile) } for p in pdbs: basename_p = os.path.basename(p).rstrip() all_chains = d[basename_p][0] prot_chains = d[basename_p][1] pept_chains = d[basename_p][2] cleanATOM(p.rstrip()) filename = p.split('.')[0] + ".clean.pdb" pose = pose_from_pdb(filename.rstrip()) pose.update_pose_chains_from_pdb_chains() chains = pose.split_by_chain() newpose = Pose() newpose.pdb_info(PDBInfo(newpose)) #iterate through all chains in protein by iterating through the list if chain is found in list of protein chains append it to newpose by appending after seqpos pdb_counter = 0 for idx, chain in enumerate(all_chains): if chain in prot_chains: newpose.append_residue_by_jump(chains[idx + 1].residue(1), newpose.total_residue(), "", "", 0) pdb_counter += 1 newpose.pdb_info().number(newpose.total_residue(), pdb_counter) newpose.pdb_info().chain(newpose.total_residue(), 'A') for i in range(2, chains[idx + 1].total_residue() + 1): newpose.append_polymer_residue_after_seqpos( chains[idx + 1].residue(i), newpose.total_residue(), 0) pdb_counter += 1 newpose.pdb_info().number(newpose.total_residue(), pdb_counter) newpose.pdb_info().chain(newpose.total_residue(), 'A') #then check list of peptide chains and append to newpose after jump idx_pept = all_chains.index(pept_chains[0]) + 1 num_res_extra = chains[idx_pept].total_residue() - 8 print num_res_extra num_res_offset = num_res_extra / 2 + 1 print num_res_offset print newpose.pdb_info() newpose.append_residue_by_jump( chains[idx_pept].residue(num_res_offset), newpose.total_residue(), "", "", 1) newpose.pdb_info().number(newpose.total_residue(), 1) newpose.pdb_info().chain(newpose.total_residue(), 'B') for ind, i in zip(range(2, 9), range(num_res_offset + 1, num_res_offset + 8)): newpose.append_polymer_residue_after_seqpos( chains[idx_pept].residue(i), newpose.total_residue(), 0) newpose.pdb_info().number(newpose.total_residue(), ind) newpose.pdb_info().chain(newpose.total_residue(), 'B') print chains[idx_pept] print newpose newpose.pdb_info().obsolete(0) print newpose.pdb_info() tokens = p.split('.') file = tokens[0] print '%sTrimmedPep.pdb' % (file) newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
def main(argv): #args = sys.argv with open(argv[0]) as afile: pdbs = afile.readlines() with open(argv[1]) as afile: d = { item[0] : [item[1].split(","),item[2].split(","),item[3].split(",")] for item in (line.split() for line in afile) } for p in pdbs: basename_p = os.path.basename(p).rstrip() all_chains = d[basename_p][0] prot_chains = d[basename_p][1] pept_chains = d[basename_p][2] cleanATOM(p.rstrip()) filename = p.split('.')[0] + ".clean.pdb" pose = pose_from_pdb(filename.rstrip()) pose.update_pose_chains_from_pdb_chains() chains=pose.split_by_chain() newpose = Pose() newpose.pdb_info( PDBInfo( newpose ) ) #iterate through all chains in protein by iterating through the list if chain is found in list of protein chains append it to newpose by appending after seqpos pdb_counter = 0 for idx, chain in enumerate(all_chains): if chain in prot_chains: newpose.append_residue_by_jump(chains[idx+1].residue(1),newpose.total_residue(),"","",0) pdb_counter+=1 newpose.pdb_info().number(newpose.total_residue(),pdb_counter) newpose.pdb_info().chain(newpose.total_residue(),'A') for i in range(2,chains[idx+1].total_residue()+1): newpose.append_polymer_residue_after_seqpos(chains[idx+1].residue(i), newpose.total_residue(),0) pdb_counter+=1 newpose.pdb_info().number(newpose.total_residue(),pdb_counter) newpose.pdb_info().chain(newpose.total_residue(),'A') #then check list of peptide chains and append to newpose after jump idx_pept = all_chains.index( pept_chains[0])+1 num_res_extra = chains[idx_pept].total_residue() - 8 print num_res_extra num_res_offset = num_res_extra/2 + 1 print num_res_offset print newpose.pdb_info() newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_offset),newpose.total_residue(), "","",1) newpose.pdb_info().number(newpose.total_residue(),1) newpose.pdb_info().chain(newpose.total_residue(),'B') for ind,i in zip(range(2,9),range(num_res_offset+1,num_res_offset+8)): newpose.append_polymer_residue_after_seqpos(chains[idx_pept].residue(i), newpose.total_residue(),0) newpose.pdb_info().number(newpose.total_residue(),ind) newpose.pdb_info().chain(newpose.total_residue(),'B') print chains[idx_pept] print newpose newpose.pdb_info().obsolete(0) print newpose.pdb_info() tokens=p.split('.') file=tokens[0] print '%sTrimmedPep.pdb' % (file) newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))