Ejemplo n.º 1
0
Archivo: tm.py Proyecto: xiongzhp/Kinks
 def load_structures(self):
     for eg in self.alignment:
         fname = self.alignment[nativeid].getMasterEntry(
         ).getStructureFilename()
         if fname:
             code = eg.getCode()
             self.structures[code] = Pdb(code, file(fname))
Ejemplo n.º 2
0
 def __init__(self, pdb, code=None):
     "Takes a list or tuple or Pdb object as argument. Otherwise passes the argument to Pdb() first."
     if not pdb:
         pass
     elif isinstance(pdb, list) or isinstance(pdb, tuple):
         for r in pdb:
             self.append(r)
     else:
         if not isinstance(pdb, Pdb):
             pdb = Pdb(pdb)
         for r in pdb.xresidues():
             self.append(Residue(r))
         if pdb.code:
             self.code = pdb.code
     if code is not None:
         self.code = code
Ejemplo n.º 3
0
 def __init__(self, pdb):
     "Takes a Pdb object as argument, or otherwise passes the argument to Pdb() first."
     if len(pdb) == 0:
         pass
     elif isinstance(pdb, ResidueList) or isinstance(pdb[0], Residue):
         for r in pdb:
             self.append(r)
     else:
         if not isinstance(pdb, Pdb):
             pdb = Pdb(pdb)
         for r in pdb.xresidues():
             self.append(Residue(r))
     try:
         self.code = pdb.code
     except:
         self.code = None
Ejemplo n.º 4
0
 def to_pdb(self, atomfilter=lambda atom: True):
     "Returns a Pdb object containing the Atom objects within this ResidueList"
     p = Pdb(self.code, [])
     for res in self:
         for atm in res:
             if atomfilter(atm):
                 p.data.append(atm)
     return p
Ejemplo n.º 5
0
 def __init__(self, pdb, code=None):
   "Takes a Pdb object or a list of ResidueLists as the first argument. Otherwise passes the argument to Pdb() first, which can deal with filenames. Second argument is an optional short decription (usually a PDB code)."
   self.code = ""
   self.ligands = []
   
   if not pdb:
     pass
   elif not isinstance(pdb, Pdb):
     if isinstance(pdb[0], ResidueList):
       for reslist in pdb:
         self.append(reslist)
       self.code = pdb[0].code
     else:
       pdb = Pdb(pdb)
   
   if isinstance(pdb, Pdb):
     self.extend(ResidueList(pdb).split_chains())
     self.code = pdb.code
     self.ligands = Protein(ResidueList(pdb.ligands).split_chains())
   
   if code is not None:
     self.code = code
Ejemplo n.º 6
0
def superimpose(struc1_allchains,
                struc2_allchains,
                subset1=None,
                subset2=None,
                fname1=None,
                fname2=None,
                align_atoms=("N", "CA", "C", "O"),
                options="",
                modify_structures=True,
                normalise_by_first=False):
    """If modify_structures=True, structure1 will be rotated/translated onto structure2."""
    assert type(struc1_allchains) == type(struc2_allchains)
    assert type(subset1) == type(subset2)

    pdb1_filename = fname1
    pdb2_filename = fname2

    if not isinstance(struc1_allchains, Pdb):
        pdb1_filename = struc1_allchains
        pdb2_filename = struc2_allchains
        struc1_allchains = Pdb("pdb1", file(pdb1_filename))
        struc2_allchains = Pdb("pdb2", file(pdb2_filename))
        modify_structures = False  # We're not returning the structures to the caller, so no use modifying them

    if None == subset1:
        subset1 = struc1_allchains
    if None == subset2:
        subset2 = struc2_allchains

    # if structure has more than 1 chain, only use the first one
    if subset1.chaincount() > 1:
        subset1 = subset1.get_first_chain()
    if subset2.chaincount() > 1:
        subset2 = subset2.get_first_chain()

    if normalise_by_first:
        options += " -L %d" % (subset1.rescount())

    #~ # align structures and get the sequence alignment
    #~ if pdb1_filename and pdb2_filename:
    #~ # This will run TMalign on the original structure files.
    #~ transform, alignment_info = tmalign_files(pdb1_filename, pdb2_filename, options)
    #~ else:
    #~ # This will create temporary PDB files and run TMalign on those.
    #~ transform, alignment_info = tmalign_objects(subset1, subset2, options)

    # FORCE PRE-PARSING AND CREATION OF TEMPORARY FILES!!!
    # This ensures predictable behaviour with regards to insertion codes, etc., which TM-align just removes from input files.
    #
    # This will create temporary PDB files and run TMalign on those.
    transform, alignment_info = tmalign_objects(subset1, subset2, options)

    if modify_structures:
        if not transform:
            raise PoorSuperpositionError(
                "Poor superposition. TM-align did not generate a rotation matrix."
            )
        transform_structure(struc1_allchains, transform)

    #rmsd_value = pdb3dsuperimpose(struc1_allchains, struc2_allchains, seq1, seq2, subset1, subset2, align_atoms, modify_structures)

    return alignment_info["seq1"], alignment_info["seq2"], alignment_info
Ejemplo n.º 7
0
def getpdb(pdb_code, pdb_dir):
    openfile = get_pdb_file(pdb_code, pdb_dir)
    if openfile is None:
        raise NotFoundError("PDB file '%s' not found in database dir '%s'" %
                            (pdb_code, pdb_dir))
    return Pdb(pdb_code, openfile)
Ejemplo n.º 8
0
def reduceToAlignable(struc1_allchains,
                      struc2_allchains,
                      seq1,
                      seq2,
                      subset1=None,
                      subset2=None,
                      atom_types=("N", "CA", "C", "O"),
                      modify_structures=True):
    assert type(struc1_allchains) == type(struc2_allchains)
    assert type(subset1) == type(subset2)
    assert isinstance(struc1_allchains, Pdb)
    assert None == subset1 or isinstance(subset1, Pdb)

    if subset1 is None:
        subset1 = struc1_allchains
    if subset2 is None:
        subset2 = struc2_allchains
    if seq1 is None:
        seq1 = subset1.get_seq()
    if seq2 is None:
        seq2 = subset2.get_seq()

    if not (seq1 and seq2):
        raise ValueError(
            "Need to have non-empty sequence to align proteins:\nseq1:%s\nseq2:%s\n"
            % (seq1, seq2))
    ## if structure has more than 1 chain, only use the first one
    #if subset1.chaincount() > 1:
    #  subset1 = subset1.get_first_chain()
    #if subset2.chaincount() > 1:
    #  subset2 = subset2.get_first_chain()

    subset1_resbounds = subset1.residue_boundaries()
    subset2_resbounds = subset2.residue_boundaries()

    # residue count, according to the structure data
    pdb1_rescount = len(subset1_resbounds)
    pdb2_rescount = len(subset2_resbounds)

    #print deGappify(seq1)
    #print deGappify(subset1.get_seq())
    #print deGappify(seq2)
    #print deGappify(subset2.get_seq())

    # Make sure the residue counts coincide in sequence and structure data
    #
    assert length_ungapped(
        seq1
    ) == pdb1_rescount, "length_ungapped(seq1) = %d, pdb1_rescount = %d" % (
        length_ungapped(seq1), pdb1_rescount)
    assert length_ungapped(
        seq2
    ) == pdb2_rescount, "length_ungapped(seq2) = %d, pdb2_rescount = %d" % (
        length_ungapped(seq2), pdb2_rescount)

    # Get the residue indeces of aligned residues
    #
    aligned_indeces1, aligned_indeces2 = find_aligned_residues(seq1, seq2)
    assert len(aligned_indeces1) == len(aligned_indeces2)

    if not aligned_indeces1:
        raise ParsingError("No aligned residues?")

    #
    # Get the subset of backbone atoms corresponding to the aligned residues
    #
    #

    #subset1_CA = subset1.get_CA()
    #subset2_CA = subset2.get_CA()

    aligned_pdb1 = Pdb(subset1, [])
    aligned_pdb2 = Pdb(subset2, [])

    for ix1, ix2 in zip(aligned_indeces1, aligned_indeces2):
        #residue1 = subset1.get_residue(subset1_CA[ix1])
        #residue2 = subset2.get_residue(subset2_CA[ix2])
        #residue1 = residue1.get_atoms_by_type(atom_types)
        #residue2 = residue2.get_atoms_by_type(atom_types)
        residue1 = subset1.get_atoms(slice=subset1_resbounds[ix1],
                                     atom_types=atom_types)
        residue2 = subset2.get_atoms(slice=subset2_resbounds[ix2],
                                     atom_types=atom_types)

        if len(atom_types) != len(residue1) or len(atom_types) != len(
                residue2):
            residue1, residue2 = intersectAtomTypes(residue1, residue2)

        assert len(residue1) == len(residue2)
        aligned_pdb1.append_atoms(residue1)
        aligned_pdb2.append_atoms(residue2)

    assert len(aligned_pdb1) == len(aligned_pdb2)

    return aligned_pdb1, aligned_pdb2
Ejemplo n.º 9
0
def splitchains(files, options, doprint=False):
    for pdb_file in files:
        path, basename, ext = splitpath(pdb_file)

        pdb_code = basename

        a = Pdb(pdb_code, file(pdb_file))
        chains = a.get_chain_codes()

        if not chains or (len(chains) == 1 and not chains[0]):
            #sys.stderr.write("No chain information found in PDB file '%s'. Not splitting it.\n" % (pdb_file))
            if ('f' in options) and ('a' in options):
                c = ''
                a_chain = a
                cgdb_id = ""
                if 'c' in options:
                    cgdb_id = "CGDB{%s}" % (pdb_code.upper())
                text = ">%s\n%s%s\n%s\n" % (pdb_code + c,
                                            a_chain.get_structure_lign(),
                                            cgdb_id, a_chain.get_seq())
                f = open(basename + c + ".ali", 'w')
                f.write(text)
                f.close()
                if os.path.isfile(basename + c + ".ali"):
                    if doprint:
                        print basename + c + ".ali"
                else:
                    sys.stderr.write("ERROR creating file: %s",
                                     basename + c + ".ali")
        else:
            if 'p' in options:
                outfiles = splitstructure(file(pdb_file), basename, chains,
                                          ext)
                for f in outfiles:
                    if os.path.isfile(f):
                        if doprint:
                            print f
                    else:
                        sys.stderr.write("ERROR creating file: %s", f)

            if 'a' in options:
                for c in chains:
                    #if 'p' in options:
                    #os.system("cutchain %s %s > %s" % (c, pdb_file, basename+c+ext))
                    #if os.path.isfile(basename+c+ext):
                    #if doprint:
                    #print basename+c+ext
                    #else:
                    #sys.stderr.write("ERROR creating file: %s", basename+c+ext)

                    a_chain = a.get_chain(c)

                    #print "\n\n\n", str(a_chain), "\n\n\n"

                    text = ">%s\n%s\n%s\n" % (pdb_code + c,
                                              a_chain.get_structure_lign(),
                                              a_chain.get_seq())
                    f = open(basename + c + ".ali", 'w')
                    f.write(text)
                    f.close()
                    if os.path.isfile(basename + c + ".ali"):
                        if doprint:
                            print basename + c + ".ali"
                    else:
                        sys.stderr.write("ERROR creating file: %s",
                                         basename + c + ".ali")
Ejemplo n.º 10
0
Archivo: tm.py Proyecto: xiongzhp/Kinks
 def load_structure(self, code, fname):
     self.structures[code] = Pdb(code, file(fname))
Ejemplo n.º 11
0
def kink_finder(directory, pdb_extension, tem_extension, filename, output_path,
                jobid, soluble, display, break_angle, pymol_file_directory, 
                in_out, max_loop_length, user_helices,path):
  
  
  if user_helices != 'none':
    pdb_helices = False
  else:
    pdb_helices = True
  
  find_helices_from_tem = False # this allows the user to input a .tem file 
  #(produced by JOY), and Kink Finder to identify helices from this .tem file
  
  
  #in_out = 'outside' #are we going for the kink to be annotated on the inside or outside?
  in_out = 'inside'
    
  if pymol_file_directory == 'none': #Decide if we are going to write a pymol file
    pymol = False
  else:
    pymol = True
    if not os.path.exists(pymol_file_directory):
      os.makedirs(pymol_file_directory)
  
  num_atoms = 24 #
  helix_vector_length = int(math.ceil(num_atoms / 4))  
  
  #check binary is there
  if not os.path.exists(path+os.sep+'cylinder'):
    print "Cylinder binary not in Kink Finder's directory. See readme.txt for further instructions"
    exit()
  
  #check that the cylinder binary works:
  command_string = [path+os.sep+'cylinder','6',
                     '0','0','0',
                     '0','0','1',
                     '-1','0','0',
                     '0','1','1',
                     '1','0','2',
                     '0','-1','3',
                     '-1','0','4',
                     '0','1','5']
  
  try:
    p = sub.check_output(command_string)
  except:
    print 'Cylinder fitting binary not functioning properly. See readme.txt for further instructions'
    exit()
  
  
  #pdbfiles # we can feed it all of the pdbfiles in the folder
  if filename == 'all':
    pdbfiles = glob("%s%s*%s" % (directory, os.sep,  pdb_extension))
    pdbfiles=sorted(pdbfiles)
  
  else:
    pdbfiles = [filename]
  
  #make a folder for the results
  if not os.path.exists(output_path):
    os.makedirs(output_path)
  
 
  ###############################
  # Open some files to write output to
  ##open a file for writing the all angles to
  angleWriter = csv.writer(open(output_path + 'angles.csv','w'))
  kinkWriter = csv.writer(open(output_path + 'kinks.csv','w'))
  helixWriter = csv.writer(open(output_path + 'helices.csv','w'))
  
  
  kinkWriter.writerow(["pdb_code","Helix_Start", "Helix_End", "Kink_Position",
          "Kink_Start", "Kink_End", "Kink_Angle", "sequence", "n_radius", 
          "n_rmsd", "c_radius","c_rmsd", 'I/O Kink Pos'])
  helixWriter.writerow(["pdb_code","Helix_Start", "Helix_End", "Kink_Position",
           "Kink_Angle", "sequence"])
  
  number_formatter = format('{:.3f}')
  display_formatter = format('{:10.3f}')
  res_number_formatter = format('{:6n}')
  
  #for each PDB file
  for pdbfile in pdbfiles:
    #print pdbfile
    pdb_code = pdbfile.split(os.sep)[-1].split('.')[0]
    try:
      assert os.path.exists(pdbfile)
    except:
      print 'File % could not be found' % pdbfile 
      exit() 
    
    #check_for_multiple chains:
    pdb_backbone_atoms = Pdb(pdbfile).get_backbone()
    chains = pdb_backbone_atoms.get_chain_codes()
    
    for chain in chains:
      pdb_code = pdb_code[0:4] + chain
      print 'Analysing chain %s of %s' % (chain, pdb_code)
      pdb = ResidueList(Pdb(pdbfile).get_chain(chain).get_backbone())
      
      
      if find_helices_from_tem:
        temfile = pdbfile[:-len(pdb_extension)] + tem_extension
        try:
          assert os.path.exists(temfile)
        except:
          print 'no',temfile 
          exit()
        tem = Ali(temfile)
        try:
          assert len(pdb) == len(tem[0][0].seq)
        except(AssertionError):
          print 'pdb and tem are different lengths', len(pdb), len(tem[0][0].seq), tem[0][0].seq
          exit()
          
          print len(pdb), tem[0][0].seq
          continue
        
        # Extract secondary structure and membrane layer annotation from TEM file
        sequence = tem[0][0].seq
        sstruc = tem[0]["secondary structure and phi angle"].seq
        #print sstruc
        tm_helices = find_helices(pdb,sstruc,sequence,'', soluble, max_loop_length)
      
      else: #user defined helices
        if user_helices != 'none':
          helix_string = user_helices.split(' ') #parse the string input by users
          helices = []
          for helix in helix_string:
            
            #print helix,
            pdbstart=int(helix.split('-')[0])
            pdbend = int(helix.split('-')[1]) # find where the residue with pbdnumber pdbend is in the list of residues
            #print pdbstart,pdbend, '##',      
            helices.append([pdbstart,pdbend])
          
        else:
          helices = parse_file_for_helices(pdbfile, chain)
          if len(helices) == 0:
            print 'No helix definitions for chain %s in file %s . Either include header in PDB file, or manually specify helix limits' % (chains[0], pdbfile)
            continue
        tm_helices=[]
        
        
        #now covert from the PDB index, to the index in the pdb object
        for helix in helices:
          for j in xrange(len(pdb)):
            if pdb[j].CA.ires == helix[0]:
              start = j
            if pdb[j].CA.ires == helix[1]:
              end = j+1     
          
          tm_helices.append([start,end])
        
        sequence = pdb.get_seq()
        sstruc = 'H' * len(sequence)
      
      
      #============================================================================
      #Loop over helices 
      #============================================================================
      if display:
        print '                   Largest    Largest       '
        print '       First   Last   Kink       Kink     Helix'
        print 'Chain   Resi   Resi   Resi      Angle     Sequence'
      # print tm_helices
      results = []
      for start, end in tm_helices:
        # stop if it is too short - i.e we cannot fit two consecutive cylinders to it
        if end-start < 2*helix_vector_length:
          continue
        #get the coordinates of the helix
        helix=pdb[start:end]
        #check we have all backbone atoms
        if len(helix.get_coords())!= 4*(end-start):
          print "Only %i of %i backbone atoms in coordinate file for helix starting at residue %i of %s. Kink Finder needs all backbone atoms (CA, C, O, N)" % ( len(helix.get_coords()),4*(end-start),start, pdb_code)
          continue 
        
        
        #lets initialise some things
        maxangle=0.0 #the biggest angle
        maxpos=-1 #position of the kinks
        angles = [] #angles of the helix
        angles2 = [] #wobble angles
        #initialize some arrays for unitvectors and cylinder results
        unitvector= np.zeros([end-start-helix_vector_length+1,3]) # an array of vectors
        cylinder = np.zeros([end-start-helix_vector_length+1,9]) # an array of fitted cylinders
        
        #==========================================================================
        # loop over the helix, calculating initial vectors
        #==========================================================================
        for i in xrange(end-start-helix_vector_length+1):
          #choose our fragment to fit
          fragment = helix[i:i+helix_vector_length]
          #simple least squares to get a sensible starting point       
          n_unitvector, n_linepoints = fit_line(fragment.get_coords()[0:21])
          #now cylinder fit, using the least squares 
          cylinder[i] = cylinder_fit_c(n_linepoints[0], n_unitvector,  fragment, num_atoms,path)
          
          #========================================================================
          # Deal with situations where the initial cylinder fit is poor. This 
          # occurs where the helix is distorted, and so a least squares fit to 21 
          # atoms does not give a good approximation, so the cylinder fitting routine
          # gets stuck in a local minimum. Pi and tight turns are better approximated 
          # by 16 (tight), 26(pi) and 28 (tight) atom fits. Using a longer region to
          # fit to also gives a better approximation of the helix axis, so a 36 atom
          # least-squares fit is also included gives
          # a 9 residue section, whic 
          #========================================================================
          
          #if the fit is not great, try a series of other starting points - this uses
          #26, 16, 28 and 36 atoms to fit a least squares line to. Then uses that for 
          #a starting point for the 24 atom cylinder fit
          if cylinder[i][7] > 0.31:
            #make a new object, to include the cylinder fits we do
            cylinder2 = np.ones([6,9])*10
            #try a 26 atom starting fit, in case of pi helix
            f_start = max(0,(i-1))
            f_end = f_start + 7
            fragment2 = helix[f_start:f_end]
            n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:26])
            cylinder2[0] = cylinder_fit_c(n_linepoints[0], n_unitvector,  fragment, 24,path)
            
            # try a 16 atom starting fit, in case of 3_10 
            f_start = i+1
            f_end = i+helix_vector_length-1
            fragment2 = helix[f_start:f_end]
            n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:16])
            cylinder2[1] = cylinder_fit_c(n_linepoints[0], n_unitvector,  fragment, 24,path)
            
            #try a 30 atom fit
            if i == 0:
              f_start = 0
            else:
              f_start = min((i-1),(end-start-helix_vector_length-2))
            f_end = f_start+8
            fragment2 = helix[f_start:f_end]
            n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:28])
            cylinder2[2] = cylinder_fit_c(n_linepoints[0], n_unitvector,  fragment, 24,path)
            
            #try a 36 atom fit
            if i == 0:
              f_start = 0
            else:
              f_start = min((i-1),(end-start-helix_vector_length-3))
            f_end = f_start+9
            fragment2 = helix[f_start:f_end]
            n_unitvector, n_linepoints = fit_line(fragment2.get_coords()[0:36])
            cylinder2[3] = cylinder_fit_c(n_linepoints[0], n_unitvector,  fragment, num_atoms,path)
            
            #try the fitted axis from the previous section of the helix 
            #(if this is not the first section)
            if i !=0:
              cylinder2[4] = cylinder_fit_c(cylinder[i-1][0:3],cylinder[i-1][3:6],fragment,num_atoms,path)
            
            # and lets include the initial fit in this, in case it is the best
            cylinder2[5] = cylinder[i]
            
            #======================================================================
            # now want the fit with the best rmsd, given that it has a sensible diameter
            #======================================================================
            okay_radius = (abs(cylinder2[:,6]-2.0)<0.3) # we are within the range of good radii 
            okay_rmsd = cylinder2[:,7]<0.38 # we have a low rmsd
            okay = ((okay_radius + okay_rmsd) == 2)
            cylinder_rms_okay = cylinder2[okay]
            
            if len(cylinder_rms_okay) != 0:
              #if we have something(s) that are within both of these constraints, 
              #pick the one with the best (lowest) rmsd
              best_cylinder2 = cylinder_rms_okay[cylinder_rms_okay[:,7].argmin()]
              #now replace the original cylinder fit with this one
              cylinder[i]=best_cylinder2 
              #print best_cylinder2
            else:
              #now we have no fit with a sensible radius, so pick the one with the best score
              #the score is |radius -2 | + 5*(rmsd - 0.25)
              scores = abs(cylinder2[:,6]-2.0) + 5*(cylinder2[:,7]-0.25) 
              cylinder[i] = cylinder2[scores.argmin()]
          
          #========================================================================
          # We now have a cylinder axis for each sliding window of 6 residues
          #========================================================================
          
          
          unitvector[i] = cylinder[i][3:6] #transfer to unitvector
        
        #==========================================================================
        # now run back and forth down the helix, trying the next fit as a starting point
        # do this until it has been done 10 times, or there has been no change in the fits
        #==========================================================================
        return_changes =1
        iterations = 0
        while return_changes > 0 and iterations < 10:
          iterations +=1
          return_changes = 0
          #print return_changes, 'hi'
          #now go back and try the next fit for each
          for i in xrange((end-start-helix_vector_length+1-2),-1,-1):
            #choose our fragment to fit
            fragment = helix[i:i+helix_vector_length]
            #fit using the fit from the previous chunk
            cylinder3 = cylinder_fit_c(cylinder[i+1][0:3],cylinder[i+1][3:6],fragment,num_atoms,path)
            
            #if the r is sensible, and the rmsd is better by a significant margin,
            if (abs(cylinder3[6]-2.0) < 0.3) and cylinder[i][7] - cylinder3[7] > 0.01:
              #print return_changes, cylinder3[6:8], cylinder[i][6:8]
              cylinder[i] = cylinder3
              return_changes +=1
              
            elif (abs(cylinder[i][6]-2.0) > 0.3) or cylinder[i][7] > 0.38: 
              #else if we have a really quite bad previous fit
              score_old = abs(cylinder[i][6]-2.0) + 5*(cylinder[i][7]-0.25)
              score_new = scores = abs(cylinder3[6]-2.0) + 5*(cylinder3[7]-0.25)
              if score_new < score_old:
                cylinder[i] = cylinder3
                return_changes +=1
          
          #and try the following fit:
          for i in xrange(1,end-start-helix_vector_length+1):
            #choose our fragment to fit
            fragment = helix[i:i+helix_vector_length]
            #fit using the fit from the previous chunk
            cylinder3 = cylinder_fit_c(cylinder[i-1][0:3],cylinder[i-1][3:6],fragment,num_atoms,path)
            
            #if the r is sensible, and the rmsd is better by a significant margin,
            if (abs(cylinder3[6]-2.0) < 0.3) and cylinder[i][7] - cylinder3[7] > 0.01:
              #print return_changes, cylinder3[6:8], cylinder[i][6:8]
              cylinder[i] = cylinder3
              return_changes +=1
              
            elif (abs(cylinder[i][6]-2.0) > 0.3) or cylinder[i][7] > 0.38: 
              #else if we have a really quite bad previous fit
              score_old = abs(cylinder[i][6]-2.0) + 5*(cylinder[i][7]-0.25)
              score_new = scores = abs(cylinder3[6]-2.0) + 5*(cylinder3[7]-0.25)
              if score_new < score_old:
                cylinder[i] = cylinder3
                return_changes +=1
                
        
        #==========================================================================
        # #Now, have best 6 residue fits
        # #Want to see if longer fits may be better
        #==========================================================================
        number_of_possible_angles = len(helix)-2*helix_vector_length+1
        new_cylinder_n = np.zeros([number_of_possible_angles,9]) #for fits on the n side of the kink
        new_cylinder_c = np.zeros([number_of_possible_angles,9]) #for fits on the c side of the kink
        ############### try longer helices everywhere: 3/8/12
        
        #print start, end, len(helix), helix_vector_length, number_of_possible_angles
        for j in xrange(number_of_possible_angles): #for each possible kink point
          #initialise an array
          #want to go from 5 to 10, but only if that is possible - so take min of 6
          # and j, which is 0 if at the first point in the helix
          n_cylinders = np.zeros([min(6,j+1),9]) #so for up to 6 different fits
          #n_cylinders[1]
          
          for i in xrange(min(6,j+1)): # +1 due to python counting (if 
            # it was 0, then there would be no calculation done.
            #work out some longer vectors - 
            #take the fragment
            n_fragment = helix[(j-i):(j+helix_vector_length)]  
            
            #fit, using the original vector as a start
            #n_linepoints, n_unitvector,  fragment, num_atoms
            number_of_fragment_atoms = (i+6)*4
            n_cylinders[i]=cylinder_fit_c(cylinder[j][0:3], 
                                          cylinder[j][3:6],
                                          n_fragment, number_of_fragment_atoms,path)
            #print n_cylinders[i]
          #print n_cylinders
          new_cylinder_n[j] = n_cylinders[n_cylinders[:,7].argmin()] #best is one with smallest rmsd
          
          c_cylinders = np.zeros([min(6,number_of_possible_angles-j),9]) 
          #print i, 'c_cylinders', (len(helix)-j-1)
          for i in xrange(min(6,(number_of_possible_angles-j))):
            #work out some longer vectors
            #take the fragment
            #print i, 'c_cylinders', (len(helix)-j)
            c_fragment = helix[(j+helix_vector_length):
                              (j+2*helix_vector_length+i)]
                
            #fit, using the original vector as a start
            #c_linepoints, n_unitvector,  fragment, num_atoms
            number_of_fragment_atoms = ((i+6)*4)
            
            
            c_cylinders[i]=cylinder_fit_c(cylinder[j+helix_vector_length][0:3], 
                                          cylinder[j+helix_vector_length][3:6],
                                          c_fragment, number_of_fragment_atoms,path)
          #print c_cylinders
          #print n_cylinders
          try:
            new_cylinder_c[j] = c_cylinders[c_cylinders[:,7].argmin()]
          except(ValueError):
            print len(helix), j, number_of_possible_angles,c_cylinders,min(6,(number_of_possible_angles-j))
            print 'error! Should exit'
            #best is one with smallest rmsd
        
        
        
        #==========================================================================
        # We now have the cylinder fits
        # Now, Calculate a kink angle for each residue
        #==========================================================================
        # initialise:
        wobble_angles = []
        outside_helix =[] 
        angles = []
        for i in xrange(len(new_cylinder_c)):
          angles.append(abs(math.degrees(angle(new_cylinder_n[i][3:6], 
                                     new_cylinder_c[i][3:6]))))
          #and the 2nd (wobble) angle is given by: 
          wobble_angles.append(wobble2(helix[i+helix_vector_length-1].CA.xyz,
                                           new_cylinder_n[i][0:3],
                                           new_cylinder_n[i][3:6],
                                           new_cylinder_c[i][3:6]))
          angles2.append(wobble_angles[i])
          #near to 0 for outside the kink
          outside_helix.append(abs(wobble_angles[i]-180))
          if maxangle < angles[i]:
            maxangle = angles[i]
            max_position_helix = i
        maxpos = max_position_helix+helix_vector_length-1+start
        ## want to cut the helix if it contains a kink angle > 100
        if maxangle > break_angle:
          # make 2 new helices - start: max pos, maxpos:end
          tm_helices.append([start, maxpos-1])
          tm_helices.append([maxpos+1, end])
          #continue to next helix, without including this in the written documents
          continue 
        #===========================================================================
        # this is where we identify the kink point(s) 
        #===========================================================================
        ##list the angles, and sort them    
        sorted_angles = list(angles) # make a copy of the list
        sorted_angles.sort() # sort it
        sorted_angles.reverse() # biggest first
        
        helix_kink_angle = sorted_angles[0]
        #work out the position of the angles in decreasing order
        kink_indices = []
        for i in xrange(len(sorted_angles)):
          kink_indices.append(angles.index(sorted_angles[i]))
          #this is a list of the index of the largest to smallest kink angle
        
        #now go down this index list picking out kinks
        kink_pos = list([kink_indices[0]]) #the first entry is obviously a kink
        k=1 #counter
        #step down the index list
        for i in xrange(len(kink_indices)-1):
          #stop if the angle is less than 10 # perhaps these should be a while loop,
          #but it didnt seem to work
          if angles[kink_indices[k]] > 10.0 :
            #have to be at least n away from the last kink
            #calculate how close this residue is to an exsisting kink
            min_distance = 1000
            for j in xrange(len(kink_pos)):
              #calculate distance from each other kink
              distance = abs(kink_indices[k] - kink_pos[j])
              #take the minimum distance
              min_distance = min(min_distance,distance)
            #providing we are not within 6 of an existing kink     
            if min_distance > 6:
              # and there is an angle under 10 degrees in between
              straight_in_between = 1
              for j in xrange(len(kink_pos)):
                if min(angles[min(kink_indices[j],
                                  kink_indices[k]):
                              max(kink_indices[j],
                                  kink_indices[k])]) > 10.0:
                  straight_in_between = 0 # there is no angle between this proposed kink and any of the others which is under 10 degrees
                #then we have another kink!
              if straight_in_between:
                kink_pos.append(kink_indices[k])
            #add one to our counter
          k=k+1
        
        
        ##start writing a pymol file 
        if pymol:
          start_pymol_script(pdbfile, pdb_code,chain, pdb[start].CA.ires, 
                       pdb[end-1].CA.ires, pymol_file_dir=pymol_file_directory,
                       structure_filename = pdbfile)
        
        # for each kink
        #===========================================================================
        #  work out some longer vectors, and compare the rmsds - picking the fit with
        # the smallest rmsd
        # the indices for the following things are as follows:
        #
        #  RRRRRRRRRRRRRRRRRRR
        #  0123456789...      Residues, annotatation
        #  000000         }
        #   111111        }
        #    222222       }
        #     333333      }cylinders
        #      ....       }
        #        666666   }
        #           ..... }
        #       0123456789...  Angles
        #  ..0123456789....  C-alpha
        #===========================================================================    
        #print start, end, kink_pos
        a = 0
        for maxang_helix in kink_pos:  #maxang_helix indices are offset from the helix indices by 6 
          #initialise an array
          best_n_cylinder = new_cylinder_n[maxang_helix]
          best_c_cylinder = new_cylinder_c[maxang_helix]
          
          n_fragment = helix[maxang_helix:(maxang_helix+helix_vector_length)]
          c_fragment = helix[(maxang_helix+helix_vector_length):(maxang_helix+2*helix_vector_length)]
          
          #print kink_indices
          #print len(helix), (maxang_helix-helix_vector_length),maxang_helix,(maxang_helix+helix_vector_length)
          #print maxang_helix, maxang_helix+2*helix_vector_length, len(helix)
          #print helix[0]
          #print n_fragment.get_coords()
          #print c_fragment.get_coords()
          
          ################
          # Work out the kink_angle
          kink_angle = math.degrees(angle
                                    (best_n_cylinder[3:6], best_c_cylinder[3:6]))
          #work out some wobble angles (+- six residues)
          wobble_angles_kink = np.zeros([12])
          for i in xrange(helix_vector_length):
            if (i+maxang_helix)<0 :
              wobble_angles_kink[i] = 400
            else:
              wobble_angles_kink[i] = (wobble2(helix[i+maxang_helix].CA.xyz,
                                        best_n_cylinder[0:3],
                                        best_n_cylinder[3:6],
                                        best_c_cylinder[3:6]))

          for i in xrange(helix_vector_length,2*helix_vector_length):
            #these must have the c and v  vectors reversed. As now effectively looking 
            #from the other end of the helix, these are 360-angle
            
            if len(helix)<= (i+maxang_helix):
              wobble_angles_kink[i] = np.nan
            else:
              wobble_angles_kink[i] = (360-wobble2(helix[i+maxang_helix].CA.xyz,
                                        best_c_cylinder[0:3],
                                        -1*best_c_cylinder[3:6],
                                        -1*best_n_cylinder[3:6]))
          
          
          #position_of_kink_in_section = min(6,maxang_helix)
          
          ## outside is wobble = 180 
          #look at the wobble angles around the kink, and pick the one nearest 180
    #      position_correction = ((abs(180 - wobble_angles_kink
    #                                           [(helix_vector_length - 2):
    #                                            (helix_vector_length + 2)])).argmin())

          #=========================================================================
          # This is repositioning the kink
          #=========================================================================
          if in_out == 'outside':
            position_correction = np.nanargmin(abs(180 - wobble_angles_kink
                                               [(helix_vector_length - 2):
                                                (helix_vector_length + 2)]))
          elif in_out == 'inside':
            position_correction = np.nanargmax(abs(180 - wobble_angles_kink
                                               [(helix_vector_length - 2):
                                                (helix_vector_length + 2)]))
          
          corrected_kink_position_protein = maxang_helix+helix_vector_length-1+start-1+position_correction
          uncorrected_kink_position_protein = maxang_helix+helix_vector_length-1+start
          
          
          if a ==0:
            helix_corrected_biggest_kink_position_protein = corrected_kink_position_protein
            a+=1
          
          #========================================================================
          # Write the information to the Kink file
          #========================================================================
          
          kink_start = max(corrected_kink_position_protein - helix_vector_length,0)
          kink_end = min(kink_start+2*helix_vector_length+1,len(pdb))
          #write kink, starting with general info about the kink, then, specifically about the kink
          
          
          kinkWriter.writerow([pdb_code, pdb[start].CA.ires, pdb[end-1].CA.ires, 
                pdb[corrected_kink_position_protein].CA.ires, pdb[kink_start].CA.ires, 
                pdb[kink_end-1].CA.ires,number_formatter.format(kink_angle),
                sequence[kink_start:kink_end],
                number_formatter.format(best_n_cylinder[6]),
                number_formatter.format(best_n_cylinder[7]), 
                number_formatter.format(best_c_cylinder[6]), 
                number_formatter.format(best_c_cylinder[7])])
          
          #========================================================================
          # Write the kink information to the pymol file
          #========================================================================      
          
          if pymol:
            write_pymol_kink(best_n_cylinder, best_c_cylinder, n_fragment, 
                           c_fragment, pdbfile, pdb_code, chain, pdb[start].CA.ires, 
                           pdb[end-1].CA.ires, 
                           pdb[corrected_kink_position_protein].CA.ires, kink_angle,
                           pymol_file_dir=pymol_file_directory)
          

        #==========================================================================
        # save to mongo
        #==========================================================================
        results.append({
          '_id': ObjectId(),
          'chain': pdb_code,
          'firstresi': res_number_formatter.format(pdb[start].CA.ires),
          'lastresi': res_number_formatter.format(pdb[end-1].CA.ires),
          'kinkresi': res_number_formatter.format(pdb[helix_corrected_biggest_kink_position_protein].CA.ires),
          'kinkang': display_formatter.format(helix_kink_angle),
          'helix': sequence[start:end]
        })

        #==========================================================================
        # Now, print the helix info, displaying it if desired
        #==========================================================================
        
        #print the info about the helix

        if display == True:
          
          #print pdb_code, maxangle, pdb[maxpos].CA.ires, angles
          print pdb_code, res_number_formatter.format(pdb[start].CA.ires), \
              res_number_formatter.format(pdb[end-1].CA.ires), \
              res_number_formatter.format(pdb[helix_corrected_biggest_kink_position_protein].CA.ires), \
              display_formatter.format(helix_kink_angle), '   ', \
              sequence[start:end]
        
        
        helixWriter.writerow([pdb_code, pdb[start].CA.ires, pdb[end-1].CA.ires, 
              pdb[helix_corrected_biggest_kink_position_protein].CA.ires, 
              number_formatter.format(helix_kink_angle),
              sequence[start:end]])
        
        angle_strings = [pdb_code+str(pdb[start].CA.ires),'0','0','0','0','0'] #the code of the helix + first residue number, then 0 for the first 5 residues where no angle has been calculated.
        for angle1 in angles:
          angle_strings.append(number_formatter.format(angle1))
         
        angle_strings.extend(['0','0','0','0','0','0']) #no angle for last 6 residues
        
        angleWriter.writerow(angle_strings)
        #==========================================================================
        # Close the pymol file for this helix
        #==========================================================================
        end_pymol_file(pdb_code, pdb[start].CA.ires,pymol_file_dir=pymol_file_directory)
        
      mongo = pymongo.MongoClient('mongodb://*****:*****@ds035014.mlab.com:35014/kinks')
      db = mongo.kinks
      db.jobs.update({'_id': ObjectId(jobid)}, {'$set':{'results': results}})
  
  return