def load_structures(self): for eg in self.alignment: fname = self.alignment[nativeid].getMasterEntry( ).getStructureFilename() if fname: code = eg.getCode() self.structures[code] = Pdb(code, file(fname))
def __init__(self, pdb, code=None): "Takes a list or tuple or Pdb object as argument. Otherwise passes the argument to Pdb() first." if not pdb: pass elif isinstance(pdb, list) or isinstance(pdb, tuple): for r in pdb: self.append(r) else: if not isinstance(pdb, Pdb): pdb = Pdb(pdb) for r in pdb.xresidues(): self.append(Residue(r)) if pdb.code: self.code = pdb.code if code is not None: self.code = code
def __init__(self, pdb): "Takes a Pdb object as argument, or otherwise passes the argument to Pdb() first." if len(pdb) == 0: pass elif isinstance(pdb, ResidueList) or isinstance(pdb[0], Residue): for r in pdb: self.append(r) else: if not isinstance(pdb, Pdb): pdb = Pdb(pdb) for r in pdb.xresidues(): self.append(Residue(r)) try: self.code = pdb.code except: self.code = None
def to_pdb(self, atomfilter=lambda atom: True): "Returns a Pdb object containing the Atom objects within this ResidueList" p = Pdb(self.code, []) for res in self: for atm in res: if atomfilter(atm): p.data.append(atm) return p
def __init__(self, pdb, code=None): "Takes a Pdb object or a list of ResidueLists as the first argument. Otherwise passes the argument to Pdb() first, which can deal with filenames. Second argument is an optional short decription (usually a PDB code)." self.code = "" self.ligands = [] if not pdb: pass elif not isinstance(pdb, Pdb): if isinstance(pdb[0], ResidueList): for reslist in pdb: self.append(reslist) self.code = pdb[0].code else: pdb = Pdb(pdb) if isinstance(pdb, Pdb): self.extend(ResidueList(pdb).split_chains()) self.code = pdb.code self.ligands = Protein(ResidueList(pdb.ligands).split_chains()) if code is not None: self.code = code
def superimpose(struc1_allchains, struc2_allchains, subset1=None, subset2=None, fname1=None, fname2=None, align_atoms=("N", "CA", "C", "O"), options="", modify_structures=True, normalise_by_first=False): """If modify_structures=True, structure1 will be rotated/translated onto structure2.""" assert type(struc1_allchains) == type(struc2_allchains) assert type(subset1) == type(subset2) pdb1_filename = fname1 pdb2_filename = fname2 if not isinstance(struc1_allchains, Pdb): pdb1_filename = struc1_allchains pdb2_filename = struc2_allchains struc1_allchains = Pdb("pdb1", file(pdb1_filename)) struc2_allchains = Pdb("pdb2", file(pdb2_filename)) modify_structures = False # We're not returning the structures to the caller, so no use modifying them if None == subset1: subset1 = struc1_allchains if None == subset2: subset2 = struc2_allchains # if structure has more than 1 chain, only use the first one if subset1.chaincount() > 1: subset1 = subset1.get_first_chain() if subset2.chaincount() > 1: subset2 = subset2.get_first_chain() if normalise_by_first: options += " -L %d" % (subset1.rescount()) #~ # align structures and get the sequence alignment #~ if pdb1_filename and pdb2_filename: #~ # This will run TMalign on the original structure files. #~ transform, alignment_info = tmalign_files(pdb1_filename, pdb2_filename, options) #~ else: #~ # This will create temporary PDB files and run TMalign on those. #~ transform, alignment_info = tmalign_objects(subset1, subset2, options) # FORCE PRE-PARSING AND CREATION OF TEMPORARY FILES!!! # This ensures predictable behaviour with regards to insertion codes, etc., which TM-align just removes from input files. # # This will create temporary PDB files and run TMalign on those. transform, alignment_info = tmalign_objects(subset1, subset2, options) if modify_structures: if not transform: raise PoorSuperpositionError( "Poor superposition. TM-align did not generate a rotation matrix." ) transform_structure(struc1_allchains, transform) #rmsd_value = pdb3dsuperimpose(struc1_allchains, struc2_allchains, seq1, seq2, subset1, subset2, align_atoms, modify_structures) return alignment_info["seq1"], alignment_info["seq2"], alignment_info
def getpdb(pdb_code, pdb_dir): openfile = get_pdb_file(pdb_code, pdb_dir) if openfile is None: raise NotFoundError("PDB file '%s' not found in database dir '%s'" % (pdb_code, pdb_dir)) return Pdb(pdb_code, openfile)
def reduceToAlignable(struc1_allchains, struc2_allchains, seq1, seq2, subset1=None, subset2=None, atom_types=("N", "CA", "C", "O"), modify_structures=True): assert type(struc1_allchains) == type(struc2_allchains) assert type(subset1) == type(subset2) assert isinstance(struc1_allchains, Pdb) assert None == subset1 or isinstance(subset1, Pdb) if subset1 is None: subset1 = struc1_allchains if subset2 is None: subset2 = struc2_allchains if seq1 is None: seq1 = subset1.get_seq() if seq2 is None: seq2 = subset2.get_seq() if not (seq1 and seq2): raise ValueError( "Need to have non-empty sequence to align proteins:\nseq1:%s\nseq2:%s\n" % (seq1, seq2)) ## if structure has more than 1 chain, only use the first one #if subset1.chaincount() > 1: # subset1 = subset1.get_first_chain() #if subset2.chaincount() > 1: # subset2 = subset2.get_first_chain() subset1_resbounds = subset1.residue_boundaries() subset2_resbounds = subset2.residue_boundaries() # residue count, according to the structure data pdb1_rescount = len(subset1_resbounds) pdb2_rescount = len(subset2_resbounds) #print deGappify(seq1) #print deGappify(subset1.get_seq()) #print deGappify(seq2) #print deGappify(subset2.get_seq()) # Make sure the residue counts coincide in sequence and structure data # assert length_ungapped( seq1 ) == pdb1_rescount, "length_ungapped(seq1) = %d, pdb1_rescount = %d" % ( length_ungapped(seq1), pdb1_rescount) assert length_ungapped( seq2 ) == pdb2_rescount, "length_ungapped(seq2) = %d, pdb2_rescount = %d" % ( length_ungapped(seq2), pdb2_rescount) # Get the residue indeces of aligned residues # aligned_indeces1, aligned_indeces2 = find_aligned_residues(seq1, seq2) assert len(aligned_indeces1) == len(aligned_indeces2) if not aligned_indeces1: raise ParsingError("No aligned residues?") # # Get the subset of backbone atoms corresponding to the aligned residues # # #subset1_CA = subset1.get_CA() #subset2_CA = subset2.get_CA() aligned_pdb1 = Pdb(subset1, []) aligned_pdb2 = Pdb(subset2, []) for ix1, ix2 in zip(aligned_indeces1, aligned_indeces2): #residue1 = subset1.get_residue(subset1_CA[ix1]) #residue2 = subset2.get_residue(subset2_CA[ix2]) #residue1 = residue1.get_atoms_by_type(atom_types) #residue2 = residue2.get_atoms_by_type(atom_types) residue1 = subset1.get_atoms(slice=subset1_resbounds[ix1], atom_types=atom_types) residue2 = subset2.get_atoms(slice=subset2_resbounds[ix2], atom_types=atom_types) if len(atom_types) != len(residue1) or len(atom_types) != len( residue2): residue1, residue2 = intersectAtomTypes(residue1, residue2) assert len(residue1) == len(residue2) aligned_pdb1.append_atoms(residue1) aligned_pdb2.append_atoms(residue2) assert len(aligned_pdb1) == len(aligned_pdb2) return aligned_pdb1, aligned_pdb2
def splitchains(files, options, doprint=False): for pdb_file in files: path, basename, ext = splitpath(pdb_file) pdb_code = basename a = Pdb(pdb_code, file(pdb_file)) chains = a.get_chain_codes() if not chains or (len(chains) == 1 and not chains[0]): #sys.stderr.write("No chain information found in PDB file '%s'. Not splitting it.\n" % (pdb_file)) if ('f' in options) and ('a' in options): c = '' a_chain = a cgdb_id = "" if 'c' in options: cgdb_id = "CGDB{%s}" % (pdb_code.upper()) text = ">%s\n%s%s\n%s\n" % (pdb_code + c, a_chain.get_structure_lign(), cgdb_id, a_chain.get_seq()) f = open(basename + c + ".ali", 'w') f.write(text) f.close() if os.path.isfile(basename + c + ".ali"): if doprint: print basename + c + ".ali" else: sys.stderr.write("ERROR creating file: %s", basename + c + ".ali") else: if 'p' in options: outfiles = splitstructure(file(pdb_file), basename, chains, ext) for f in outfiles: if os.path.isfile(f): if doprint: print f else: sys.stderr.write("ERROR creating file: %s", f) if 'a' in options: for c in chains: #if 'p' in options: #os.system("cutchain %s %s > %s" % (c, pdb_file, basename+c+ext)) #if os.path.isfile(basename+c+ext): #if doprint: #print basename+c+ext #else: #sys.stderr.write("ERROR creating file: %s", basename+c+ext) a_chain = a.get_chain(c) #print "\n\n\n", str(a_chain), "\n\n\n" text = ">%s\n%s\n%s\n" % (pdb_code + c, a_chain.get_structure_lign(), a_chain.get_seq()) f = open(basename + c + ".ali", 'w') f.write(text) f.close() if os.path.isfile(basename + c + ".ali"): if doprint: print basename + c + ".ali" else: sys.stderr.write("ERROR creating file: %s", basename + c + ".ali")
def load_structure(self, code, fname): self.structures[code] = Pdb(code, file(fname))
def kink_finder(directory, pdb_extension, tem_extension, filename, output_path, jobid, soluble, display, break_angle, pymol_file_directory, in_out, max_loop_length, user_helices,path): if user_helices != 'none': pdb_helices = False else: pdb_helices = True find_helices_from_tem = False # this allows the user to input a .tem file #(produced by JOY), and Kink Finder to identify helices from this .tem file #in_out = 'outside' #are we going for the kink to be annotated on the inside or outside? in_out = 'inside' if pymol_file_directory == 'none': #Decide if we are going to write a pymol file pymol = False else: pymol = True if not os.path.exists(pymol_file_directory): os.makedirs(pymol_file_directory) num_atoms = 24 # helix_vector_length = int(math.ceil(num_atoms / 4)) #check binary is there if not os.path.exists(path+os.sep+'cylinder'): print "Cylinder binary not in Kink Finder's directory. See readme.txt for further instructions" exit() #check that the cylinder binary works: command_string = [path+os.sep+'cylinder','6', '0','0','0', '0','0','1', '-1','0','0', '0','1','1', '1','0','2', '0','-1','3', '-1','0','4', '0','1','5'] try: p = sub.check_output(command_string) except: print 'Cylinder fitting binary not functioning properly. See readme.txt for further instructions' exit() #pdbfiles # we can feed it all of the pdbfiles in the folder if filename == 'all': pdbfiles = glob("%s%s*%s" % (directory, os.sep, pdb_extension)) pdbfiles=sorted(pdbfiles) else: pdbfiles = [filename] #make a folder for the results if not os.path.exists(output_path): os.makedirs(output_path) ############################### # Open some files to write output to ##open a file for writing the all angles to angleWriter = csv.writer(open(output_path + 'angles.csv','w')) kinkWriter = csv.writer(open(output_path + 'kinks.csv','w')) helixWriter = csv.writer(open(output_path + 'helices.csv','w')) kinkWriter.writerow(["pdb_code","Helix_Start", "Helix_End", "Kink_Position", "Kink_Start", "Kink_End", "Kink_Angle", "sequence", "n_radius", "n_rmsd", "c_radius","c_rmsd", 'I/O Kink Pos']) helixWriter.writerow(["pdb_code","Helix_Start", "Helix_End", "Kink_Position", "Kink_Angle", "sequence"]) number_formatter = format('{:.3f}') display_formatter = format('{:10.3f}') res_number_formatter = format('{:6n}') #for each PDB file for pdbfile in pdbfiles: #print pdbfile pdb_code = pdbfile.split(os.sep)[-1].split('.')[0] try: assert os.path.exists(pdbfile) except: print 'File % could not be found' % pdbfile exit() #check_for_multiple chains: pdb_backbone_atoms = Pdb(pdbfile).get_backbone() chains = pdb_backbone_atoms.get_chain_codes() for chain in chains: pdb_code = pdb_code[0:4] + chain print 'Analysing chain %s of %s' % (chain, pdb_code) pdb = ResidueList(Pdb(pdbfile).get_chain(chain).get_backbone()) if find_helices_from_tem: temfile = pdbfile[:-len(pdb_extension)] + tem_extension try: assert os.path.exists(temfile) except: print 'no',temfile exit() tem = Ali(temfile) try: assert len(pdb) == len(tem[0][0].seq) except(AssertionError): print 'pdb and tem are different lengths', len(pdb), len(tem[0][0].seq), tem[0][0].seq exit() print len(pdb), tem[0][0].seq continue # Extract secondary structure and membrane layer annotation from TEM file sequence = tem[0][0].seq sstruc = tem[0]["secondary structure and phi angle"].seq #print sstruc tm_helices = find_helices(pdb,sstruc,sequence,'', soluble, max_loop_length) else: #user defined helices if user_helices != 'none': helix_string = user_helices.split(' ') #parse the string input by users helices = [] for helix in helix_string: #print helix, pdbstart=int(helix.split('-')[0]) pdbend = int(helix.split('-')[1]) # find where the residue with pbdnumber pdbend is in the list of residues #print pdbstart,pdbend, '##', helices.append([pdbstart,pdbend]) else: helices = parse_file_for_helices(pdbfile, chain) if len(helices) == 0: print 'No helix definitions for chain %s in file %s . Either include header in PDB file, or manually specify helix limits' % (chains[0], pdbfile) continue tm_helices=[] #now covert from the PDB index, to the index in the pdb object for helix in helices: for j in xrange(len(pdb)): if pdb[j].CA.ires == helix[0]: start = j if pdb[j].CA.ires == helix[1]: end = j+1 tm_helices.append([start,end]) sequence = pdb.get_seq() sstruc = 'H' * len(sequence) #============================================================================ #Loop over helices #============================================================================ if display: print ' Largest Largest ' print ' First Last Kink Kink Helix' print 'Chain Resi Resi Resi Angle Sequence' # print tm_helices results = [] for start, end in tm_helices: # stop if it is too short - i.e we cannot fit two consecutive cylinders to it if end-start < 2*helix_vector_length: continue #get the coordinates of the helix helix=pdb[start:end] #check we have all backbone atoms if len(helix.get_coords())!= 4*(end-start): print "Only %i of %i backbone atoms in coordinate file for helix starting at residue %i of %s. Kink Finder needs all backbone atoms (CA, C, O, N)" % ( len(helix.get_coords()),4*(end-start),start, pdb_code) continue #lets initialise some things maxangle=0.0 #the biggest angle maxpos=-1 #position of the kinks angles = [] #angles of the helix angles2 = [] #wobble angles #initialize some arrays for unitvectors and cylinder results unitvector= np.zeros([end-start-helix_vector_length+1,3]) # an array of vectors cylinder = np.zeros([end-start-helix_vector_length+1,9]) # an array of fitted cylinders #========================================================================== # loop over the helix, calculating initial vectors #========================================================================== for i in xrange(end-start-helix_vector_length+1): #choose our fragment to fit fragment = helix[i:i+helix_vector_length] #simple least squares to get a sensible starting point n_unitvector, n_linepoints = fit_line(fragment.get_coords()[0:21]) #now cylinder fit, using the least squares cylinder[i] = cylinder_fit_c(n_linepoints[0], n_unitvector, fragment, num_atoms,path) #======================================================================== # Deal with situations where the initial cylinder fit is poor. This # occurs where the helix is distorted, and so a least squares fit to 21 # atoms does not give a good approximation, so the cylinder fitting routine # gets stuck in a local minimum. Pi and tight turns are better approximated # by 16 (tight), 26(pi) and 28 (tight) atom fits. Using a longer region to # fit to also gives a better approximation of the helix axis, so a 36 atom # least-squares fit is also included gives # a 9 residue section, whic #======================================================================== #if the fit is not great, try a series of other starting points - this uses #26, 16, 28 and 36 atoms to fit a least squares line to. Then uses that for #a starting point for the 24 atom cylinder fit if cylinder[i][7] > 0.31: #make a new object, to include the cylinder fits we do cylinder2 = np.ones([6,9])*10 #try a 26 atom starting fit, in case of pi helix f_start = max(0,(i-1)) f_end = f_start + 7 fragment2 = helix[f_start:f_end] n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:26]) cylinder2[0] = cylinder_fit_c(n_linepoints[0], n_unitvector, fragment, 24,path) # try a 16 atom starting fit, in case of 3_10 f_start = i+1 f_end = i+helix_vector_length-1 fragment2 = helix[f_start:f_end] n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:16]) cylinder2[1] = cylinder_fit_c(n_linepoints[0], n_unitvector, fragment, 24,path) #try a 30 atom fit if i == 0: f_start = 0 else: f_start = min((i-1),(end-start-helix_vector_length-2)) f_end = f_start+8 fragment2 = helix[f_start:f_end] n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:28]) cylinder2[2] = cylinder_fit_c(n_linepoints[0], n_unitvector, fragment, 24,path) #try a 36 atom fit if i == 0: f_start = 0 else: f_start = min((i-1),(end-start-helix_vector_length-3)) f_end = f_start+9 fragment2 = helix[f_start:f_end] n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:36]) cylinder2[3] = cylinder_fit_c(n_linepoints[0], n_unitvector, fragment, num_atoms,path) #try the fitted axis from the previous section of the helix #(if this is not the first section) if i !=0: cylinder2[4] = cylinder_fit_c(cylinder[i-1][0:3],cylinder[i-1][3:6],fragment,num_atoms,path) # and lets include the initial fit in this, in case it is the best cylinder2[5] = cylinder[i] #====================================================================== # now want the fit with the best rmsd, given that it has a sensible diameter #====================================================================== okay_radius = (abs(cylinder2[:,6]-2.0)<0.3) # we are within the range of good radii okay_rmsd = cylinder2[:,7]<0.38 # we have a low rmsd okay = ((okay_radius + okay_rmsd) == 2) cylinder_rms_okay = cylinder2[okay] if len(cylinder_rms_okay) != 0: #if we have something(s) that are within both of these constraints, #pick the one with the best (lowest) rmsd best_cylinder2 = cylinder_rms_okay[cylinder_rms_okay[:,7].argmin()] #now replace the original cylinder fit with this one cylinder[i]=best_cylinder2 #print best_cylinder2 else: #now we have no fit with a sensible radius, so pick the one with the best score #the score is |radius -2 | + 5*(rmsd - 0.25) scores = abs(cylinder2[:,6]-2.0) + 5*(cylinder2[:,7]-0.25) cylinder[i] = cylinder2[scores.argmin()] #======================================================================== # We now have a cylinder axis for each sliding window of 6 residues #======================================================================== unitvector[i] = cylinder[i][3:6] #transfer to unitvector #========================================================================== # now run back and forth down the helix, trying the next fit as a starting point # do this until it has been done 10 times, or there has been no change in the fits #========================================================================== return_changes =1 iterations = 0 while return_changes > 0 and iterations < 10: iterations +=1 return_changes = 0 #print return_changes, 'hi' #now go back and try the next fit for each for i in xrange((end-start-helix_vector_length+1-2),-1,-1): #choose our fragment to fit fragment = helix[i:i+helix_vector_length] #fit using the fit from the previous chunk cylinder3 = cylinder_fit_c(cylinder[i+1][0:3],cylinder[i+1][3:6],fragment,num_atoms,path) #if the r is sensible, and the rmsd is better by a significant margin, if (abs(cylinder3[6]-2.0) < 0.3) and cylinder[i][7] - cylinder3[7] > 0.01: #print return_changes, cylinder3[6:8], cylinder[i][6:8] cylinder[i] = cylinder3 return_changes +=1 elif (abs(cylinder[i][6]-2.0) > 0.3) or cylinder[i][7] > 0.38: #else if we have a really quite bad previous fit score_old = abs(cylinder[i][6]-2.0) + 5*(cylinder[i][7]-0.25) score_new = scores = abs(cylinder3[6]-2.0) + 5*(cylinder3[7]-0.25) if score_new < score_old: cylinder[i] = cylinder3 return_changes +=1 #and try the following fit: for i in xrange(1,end-start-helix_vector_length+1): #choose our fragment to fit fragment = helix[i:i+helix_vector_length] #fit using the fit from the previous chunk cylinder3 = cylinder_fit_c(cylinder[i-1][0:3],cylinder[i-1][3:6],fragment,num_atoms,path) #if the r is sensible, and the rmsd is better by a significant margin, if (abs(cylinder3[6]-2.0) < 0.3) and cylinder[i][7] - cylinder3[7] > 0.01: #print return_changes, cylinder3[6:8], cylinder[i][6:8] cylinder[i] = cylinder3 return_changes +=1 elif (abs(cylinder[i][6]-2.0) > 0.3) or cylinder[i][7] > 0.38: #else if we have a really quite bad previous fit score_old = abs(cylinder[i][6]-2.0) + 5*(cylinder[i][7]-0.25) score_new = scores = abs(cylinder3[6]-2.0) + 5*(cylinder3[7]-0.25) if score_new < score_old: cylinder[i] = cylinder3 return_changes +=1 #========================================================================== # #Now, have best 6 residue fits # #Want to see if longer fits may be better #========================================================================== number_of_possible_angles = len(helix)-2*helix_vector_length+1 new_cylinder_n = np.zeros([number_of_possible_angles,9]) #for fits on the n side of the kink new_cylinder_c = np.zeros([number_of_possible_angles,9]) #for fits on the c side of the kink ############### try longer helices everywhere: 3/8/12 #print start, end, len(helix), helix_vector_length, number_of_possible_angles for j in xrange(number_of_possible_angles): #for each possible kink point #initialise an array #want to go from 5 to 10, but only if that is possible - so take min of 6 # and j, which is 0 if at the first point in the helix n_cylinders = np.zeros([min(6,j+1),9]) #so for up to 6 different fits #n_cylinders[1] for i in xrange(min(6,j+1)): # +1 due to python counting (if # it was 0, then there would be no calculation done. #work out some longer vectors - #take the fragment n_fragment = helix[(j-i):(j+helix_vector_length)] #fit, using the original vector as a start #n_linepoints, n_unitvector, fragment, num_atoms number_of_fragment_atoms = (i+6)*4 n_cylinders[i]=cylinder_fit_c(cylinder[j][0:3], cylinder[j][3:6], n_fragment, number_of_fragment_atoms,path) #print n_cylinders[i] #print n_cylinders new_cylinder_n[j] = n_cylinders[n_cylinders[:,7].argmin()] #best is one with smallest rmsd c_cylinders = np.zeros([min(6,number_of_possible_angles-j),9]) #print i, 'c_cylinders', (len(helix)-j-1) for i in xrange(min(6,(number_of_possible_angles-j))): #work out some longer vectors #take the fragment #print i, 'c_cylinders', (len(helix)-j) c_fragment = helix[(j+helix_vector_length): (j+2*helix_vector_length+i)] #fit, using the original vector as a start #c_linepoints, n_unitvector, fragment, num_atoms number_of_fragment_atoms = ((i+6)*4) c_cylinders[i]=cylinder_fit_c(cylinder[j+helix_vector_length][0:3], cylinder[j+helix_vector_length][3:6], c_fragment, number_of_fragment_atoms,path) #print c_cylinders #print n_cylinders try: new_cylinder_c[j] = c_cylinders[c_cylinders[:,7].argmin()] except(ValueError): print len(helix), j, number_of_possible_angles,c_cylinders,min(6,(number_of_possible_angles-j)) print 'error! Should exit' #best is one with smallest rmsd #========================================================================== # We now have the cylinder fits # Now, Calculate a kink angle for each residue #========================================================================== # initialise: wobble_angles = [] outside_helix =[] angles = [] for i in xrange(len(new_cylinder_c)): angles.append(abs(math.degrees(angle(new_cylinder_n[i][3:6], new_cylinder_c[i][3:6])))) #and the 2nd (wobble) angle is given by: wobble_angles.append(wobble2(helix[i+helix_vector_length-1].CA.xyz, new_cylinder_n[i][0:3], new_cylinder_n[i][3:6], new_cylinder_c[i][3:6])) angles2.append(wobble_angles[i]) #near to 0 for outside the kink outside_helix.append(abs(wobble_angles[i]-180)) if maxangle < angles[i]: maxangle = angles[i] max_position_helix = i maxpos = max_position_helix+helix_vector_length-1+start ## want to cut the helix if it contains a kink angle > 100 if maxangle > break_angle: # make 2 new helices - start: max pos, maxpos:end tm_helices.append([start, maxpos-1]) tm_helices.append([maxpos+1, end]) #continue to next helix, without including this in the written documents continue #=========================================================================== # this is where we identify the kink point(s) #=========================================================================== ##list the angles, and sort them sorted_angles = list(angles) # make a copy of the list sorted_angles.sort() # sort it sorted_angles.reverse() # biggest first helix_kink_angle = sorted_angles[0] #work out the position of the angles in decreasing order kink_indices = [] for i in xrange(len(sorted_angles)): kink_indices.append(angles.index(sorted_angles[i])) #this is a list of the index of the largest to smallest kink angle #now go down this index list picking out kinks kink_pos = list([kink_indices[0]]) #the first entry is obviously a kink k=1 #counter #step down the index list for i in xrange(len(kink_indices)-1): #stop if the angle is less than 10 # perhaps these should be a while loop, #but it didnt seem to work if angles[kink_indices[k]] > 10.0 : #have to be at least n away from the last kink #calculate how close this residue is to an exsisting kink min_distance = 1000 for j in xrange(len(kink_pos)): #calculate distance from each other kink distance = abs(kink_indices[k] - kink_pos[j]) #take the minimum distance min_distance = min(min_distance,distance) #providing we are not within 6 of an existing kink if min_distance > 6: # and there is an angle under 10 degrees in between straight_in_between = 1 for j in xrange(len(kink_pos)): if min(angles[min(kink_indices[j], kink_indices[k]): max(kink_indices[j], kink_indices[k])]) > 10.0: straight_in_between = 0 # there is no angle between this proposed kink and any of the others which is under 10 degrees #then we have another kink! if straight_in_between: kink_pos.append(kink_indices[k]) #add one to our counter k=k+1 ##start writing a pymol file if pymol: start_pymol_script(pdbfile, pdb_code,chain, pdb[start].CA.ires, pdb[end-1].CA.ires, pymol_file_dir=pymol_file_directory, structure_filename = pdbfile) # for each kink #=========================================================================== # work out some longer vectors, and compare the rmsds - picking the fit with # the smallest rmsd # the indices for the following things are as follows: # # RRRRRRRRRRRRRRRRRRR # 0123456789... Residues, annotatation # 000000 } # 111111 } # 222222 } # 333333 }cylinders # .... } # 666666 } # ..... } # 0123456789... Angles # ..0123456789.... C-alpha #=========================================================================== #print start, end, kink_pos a = 0 for maxang_helix in kink_pos: #maxang_helix indices are offset from the helix indices by 6 #initialise an array best_n_cylinder = new_cylinder_n[maxang_helix] best_c_cylinder = new_cylinder_c[maxang_helix] n_fragment = helix[maxang_helix:(maxang_helix+helix_vector_length)] c_fragment = helix[(maxang_helix+helix_vector_length):(maxang_helix+2*helix_vector_length)] #print kink_indices #print len(helix), (maxang_helix-helix_vector_length),maxang_helix,(maxang_helix+helix_vector_length) #print maxang_helix, maxang_helix+2*helix_vector_length, len(helix) #print helix[0] #print n_fragment.get_coords() #print c_fragment.get_coords() ################ # Work out the kink_angle kink_angle = math.degrees(angle (best_n_cylinder[3:6], best_c_cylinder[3:6])) #work out some wobble angles (+- six residues) wobble_angles_kink = np.zeros([12]) for i in xrange(helix_vector_length): if (i+maxang_helix)<0 : wobble_angles_kink[i] = 400 else: wobble_angles_kink[i] = (wobble2(helix[i+maxang_helix].CA.xyz, best_n_cylinder[0:3], best_n_cylinder[3:6], best_c_cylinder[3:6])) for i in xrange(helix_vector_length,2*helix_vector_length): #these must have the c and v vectors reversed. As now effectively looking #from the other end of the helix, these are 360-angle if len(helix)<= (i+maxang_helix): wobble_angles_kink[i] = np.nan else: wobble_angles_kink[i] = (360-wobble2(helix[i+maxang_helix].CA.xyz, best_c_cylinder[0:3], -1*best_c_cylinder[3:6], -1*best_n_cylinder[3:6])) #position_of_kink_in_section = min(6,maxang_helix) ## outside is wobble = 180 #look at the wobble angles around the kink, and pick the one nearest 180 # position_correction = ((abs(180 - wobble_angles_kink # [(helix_vector_length - 2): # (helix_vector_length + 2)])).argmin()) #========================================================================= # This is repositioning the kink #========================================================================= if in_out == 'outside': position_correction = np.nanargmin(abs(180 - wobble_angles_kink [(helix_vector_length - 2): (helix_vector_length + 2)])) elif in_out == 'inside': position_correction = np.nanargmax(abs(180 - wobble_angles_kink [(helix_vector_length - 2): (helix_vector_length + 2)])) corrected_kink_position_protein = maxang_helix+helix_vector_length-1+start-1+position_correction uncorrected_kink_position_protein = maxang_helix+helix_vector_length-1+start if a ==0: helix_corrected_biggest_kink_position_protein = corrected_kink_position_protein a+=1 #======================================================================== # Write the information to the Kink file #======================================================================== kink_start = max(corrected_kink_position_protein - helix_vector_length,0) kink_end = min(kink_start+2*helix_vector_length+1,len(pdb)) #write kink, starting with general info about the kink, then, specifically about the kink kinkWriter.writerow([pdb_code, pdb[start].CA.ires, pdb[end-1].CA.ires, pdb[corrected_kink_position_protein].CA.ires, pdb[kink_start].CA.ires, pdb[kink_end-1].CA.ires,number_formatter.format(kink_angle), sequence[kink_start:kink_end], number_formatter.format(best_n_cylinder[6]), number_formatter.format(best_n_cylinder[7]), number_formatter.format(best_c_cylinder[6]), number_formatter.format(best_c_cylinder[7])]) #======================================================================== # Write the kink information to the pymol file #======================================================================== if pymol: write_pymol_kink(best_n_cylinder, best_c_cylinder, n_fragment, c_fragment, pdbfile, pdb_code, chain, pdb[start].CA.ires, pdb[end-1].CA.ires, pdb[corrected_kink_position_protein].CA.ires, kink_angle, pymol_file_dir=pymol_file_directory) #========================================================================== # save to mongo #========================================================================== results.append({ '_id': ObjectId(), 'chain': pdb_code, 'firstresi': res_number_formatter.format(pdb[start].CA.ires), 'lastresi': res_number_formatter.format(pdb[end-1].CA.ires), 'kinkresi': res_number_formatter.format(pdb[helix_corrected_biggest_kink_position_protein].CA.ires), 'kinkang': display_formatter.format(helix_kink_angle), 'helix': sequence[start:end] }) #========================================================================== # Now, print the helix info, displaying it if desired #========================================================================== #print the info about the helix if display == True: #print pdb_code, maxangle, pdb[maxpos].CA.ires, angles print pdb_code, res_number_formatter.format(pdb[start].CA.ires), \ res_number_formatter.format(pdb[end-1].CA.ires), \ res_number_formatter.format(pdb[helix_corrected_biggest_kink_position_protein].CA.ires), \ display_formatter.format(helix_kink_angle), ' ', \ sequence[start:end] helixWriter.writerow([pdb_code, pdb[start].CA.ires, pdb[end-1].CA.ires, pdb[helix_corrected_biggest_kink_position_protein].CA.ires, number_formatter.format(helix_kink_angle), sequence[start:end]]) angle_strings = [pdb_code+str(pdb[start].CA.ires),'0','0','0','0','0'] #the code of the helix + first residue number, then 0 for the first 5 residues where no angle has been calculated. for angle1 in angles: angle_strings.append(number_formatter.format(angle1)) angle_strings.extend(['0','0','0','0','0','0']) #no angle for last 6 residues angleWriter.writerow(angle_strings) #========================================================================== # Close the pymol file for this helix #========================================================================== end_pymol_file(pdb_code, pdb[start].CA.ires,pymol_file_dir=pymol_file_directory) mongo = pymongo.MongoClient('mongodb://*****:*****@ds035014.mlab.com:35014/kinks') db = mongo.kinks db.jobs.update({'_id': ObjectId(jobid)}, {'$set':{'results': results}}) return