Ejemplo n.º 1
0
    def _seqalign_pickmatches(self, alignment, window=0):
        """Find the indexes of aligned residues.

    Returns a list of lists with the indexes:
      [match index, sequence a index, sequence b index]
    """
        # Find sequence alignment matches -- "|" or "*"
        # Return the alignment index, the source index, and target index
        #   of each match.
        matches = alignment.matches()
        aligned = []
        for i, ia, ib, im, a, b in zip(itertools.count(0), alignment.i_seqs_a,
                                       alignment.i_seqs_b, matches,
                                       alignment.a, alignment.b):
            # print i, ia, ib, im, a, b
            left = i - window
            if left < 0:
                left = 0
            right = i + window + 1
            if right > len(matches):
                right = len(matches)
            w = matches[left:right]
            append = all((i == '*' or i == '|') for i in w)
            if append:
                # print "window: %s -- left, right: %s %s -- i: %s -- append? %s"%(w, left, right, i, append)
                aligned.append((i, ia, ib))
        return aligned
Ejemplo n.º 2
0
 def _seqalign_score(self, alignment):
     matches = alignment.matches()
     total = len(alignment.a) - alignment.a.count("-")
     equal = matches.count("|")
     similar = matches.count("*")
     score = 100. * (equal + similar) / max(1, total)
     return score
Ejemplo n.º 3
0
 def _seqalign_score(self, alignment):
   matches = alignment.matches()
   total = len(alignment.a) - alignment.a.count("-")
   equal = matches.count("|")
   similar = matches.count("*")
   score = 100.*(equal+similar) / max(1,total)
   return score
Ejemplo n.º 4
0
  def _seqalign_pickmatches(self, alignment, window=0):
    """Find the indexes of aligned residues.

    Returns a list of lists with the indexes:
      [match index, sequence a index, sequence b index]
    """
    # Find sequence alignment matches -- "|" or "*"
    # Return the alignment index, the source index, and target index
    #   of each match.
    matches = alignment.matches()
    aligned = []
    for i, ia, ib, im, a, b in zip(
        itertools.count(0),
        alignment.i_seqs_a,
        alignment.i_seqs_b,
        matches,
        alignment.a,
        alignment.b):
      # print i, ia, ib, im, a, b
      left = i-window
      if left < 0:
        left = 0
      right = i+window+1
      if right > len(matches):
        right = len(matches)
      w = matches[left:right]
      append = all((i == '*' or i == '|') for i in w)
      if append:
        # print "window: %s -- left, right: %s %s -- i: %s -- append? %s"%(w, left, right, i, append)
        aligned.append((i, ia, ib))
    return aligned
Ejemplo n.º 5
0
 def _print_seqalign(self, alignment, quiet=False):
     """Print a sequence alignment details."""
     matches = alignment.matches()
     self.log("Alignment details:")
     self.log("\tmatches after alignment: %s" %
              (matches.count("|") + matches.count("*")))
     self.log("\tsequence alignment:")
     # Since this prints directly, check if quiet.
     if not (quiet or self._quiet):
         # Change the labels to target.desc
         alignment.pretty_print(matches=matches,
                                block_size=50,
                                n_block=1,
                                top_name="moving",
                                bottom_name="fixed")
Ejemplo n.º 6
0
 def _print_seqalign(self, alignment, quiet=False):
   """Print a sequence alignment details."""
   matches = alignment.matches()
   self.log("Alignment details:")
   self.log("\tmatches after alignment: %s"%(matches.count("|") + matches.count("*")))
   self.log("\tsequence alignment:")
   # Since this prints directly, check if quiet.
   if not (quiet or self._quiet):
     # Change the labels to target.desc
     alignment.pretty_print(
       matches   = matches,
       block_size  = 50,
       n_block   = 1,
       top_name  = "moving",
       bottom_name = "fixed")
Ejemplo n.º 7
0
def align_chains_rigid(mov_chain, ref_chain):
    """Takes two chains and aligns them - return rt_mx"""

    mov_seq, mov_sites, mov_flags = extract_sites_for_alignment(mov_chain)
    ref_seq, ref_sites, ref_flags = extract_sites_for_alignment(ref_chain)

    align_obj = mmtbx.alignment.align(
        seq_a=ref_seq,
        seq_b=mov_seq,
        gap_opening_penalty = 20,
        gap_extension_penalty = 2,
        similarity_function = 'blosum50',
        style = 'local')

    # Extract the alignment
    alignment = align_obj.extract_alignment()
    # List of matches - '|' for exact match, '*' for good match
    matches = alignment.matches()
    equal = matches.count("|")
    similar = matches.count("*")
    total = len(alignment.a) - alignment.a.count("-")
    alignment.pretty_print(
        matches=matches,
        block_size=50,
        n_block=1,
        top_name="fixed",
        bottom_name="moving")

    # Create list of selected sites
    ref_sites_sel = flex.vec3_double()
    mov_sites_sel = flex.vec3_double()
    for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
        if (m not in ["|", "*"]): continue
        # Check that the sites are flagged to be used
        if (ref_flags[ia] and mov_flags[ib]):
            # Append sites to list to align
            ref_sites_sel.append(ref_sites[ia])
            mov_sites_sel.append(mov_sites[ib])

    if (ref_sites_sel.size() == 0):
      raise Exception("No matching C-alpha atoms.")

    lsq_rt = superpose.least_squares_fit(reference_sites=ref_sites_sel, other_sites=mov_sites_sel).rt()
    return lsq_rt, mov_sites_sel, ref_sites_sel
Ejemplo n.º 8
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.,
              quiet=False):
     h = pdb_hierarchy
     superposition_threshold = 2 * sequence_identity_threshold - 100.
     n_atoms_all = h.atoms_size()
     s_str = "altloc ' ' and (protein or nucleotide)"
     h = h.select(h.atom_selection_cache().selection(s_str))
     h1 = iotbx.pdb.hierarchy.root()
     h1.append_model(h.models()[0].detached_copy())
     unit_cell = crystal_symmetry.unit_cell()
     result = {}
     if not quiet:
         print("Find groups of chains related by translational NCS")
     # double loop over chains to find matching pairs related by pure translation
     for c1 in h1.chains():
         c1.parent().remove_chain(c1)
         nchains = len(h1.models()[0].chains())
         if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue
         r1 = list(c1.residues())
         c1_seq = "".join(c1.as_sequence())
         sc_1_tmp = c1.atoms().extract_xyz()
         h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
         for (ii, c2) in enumerate(h1_p1.chains()):
             orig_c2 = h1.models()[0].chains()[ii % nchains]
             r2 = list(c2.residues())
             c2_seq = "".join(c2.as_sequence())
             sites_cart_1, sites_cart_2 = None, None
             sc_2_tmp = c2.atoms().extract_xyz()
             # chains are identical
             if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()):
                 sites_cart_1 = sc_1_tmp
                 sites_cart_2 = sc_2_tmp
                 p_identity = 100.
             # chains are not identical, do alignment
             else:
                 align_obj = mmtbx.alignment.align(seq_a=c1_seq,
                                                   seq_b=c2_seq)
                 alignment = align_obj.extract_alignment()
                 matches = alignment.matches()
                 equal = matches.count("|")
                 total = len(alignment.a) - alignment.a.count("-")
                 p_identity = 100. * equal / max(1, total)
                 if (p_identity > superposition_threshold):
                     sites_cart_1 = flex.vec3_double()
                     sites_cart_2 = flex.vec3_double()
                     for i1, i2, match in zip(alignment.i_seqs_a,
                                              alignment.i_seqs_b, matches):
                         if (i1 is not None and i2 is not None
                                 and match == "|"):
                             r1i, r2i = r1[i1], r2[i2]
                             assert r1i.resname == r2i.resname, [
                                 r1i.resname, r2i.resname, i1, i2
                             ]
                             for a1 in r1i.atoms():
                                 for a2 in r2i.atoms():
                                     if (a1.name == a2.name):
                                         sites_cart_1.append(a1.xyz)
                                         sites_cart_2.append(a2.xyz)
                                         break
             # superpose two sequence-aligned chains
             if ([sites_cart_1, sites_cart_2].count(None) == 0):
                 lsq_fit_obj = superpose.least_squares_fit(
                     reference_sites=sites_cart_1, other_sites=sites_cart_2)
                 angle = lsq_fit_obj.r.rotation_angle()
                 t_frac = unit_cell.fractionalize(
                     (sites_cart_1 - sites_cart_2).mean())
                 t_frac = [math.modf(t)[0]
                           for t in t_frac]  # put into [-1,1]
                 radius = flex.sum(
                     flex.sqrt((sites_cart_1 - sites_cart_1.mean()
                                ).dot())) / sites_cart_1.size() * 4. / 3.
                 fracscat = min(c1.atoms_size(),
                                c2.atoms_size()) / n_atoms_all
                 result.setdefault(frozenset([c1, orig_c2]), []).append([
                     p_identity,
                     [lsq_fit_obj.r, t_frac, angle, radius, fracscat]
                 ])
             else:
                 result.setdefault(frozenset([c1, orig_c2]),
                                   []).append([p_identity, None])
     # Build graph
     g = graph.adjacency_list()
     vertex_handle = {}
     for key in result:
         seqid = result[key][0][0]
         sup = min(result[key],
                   key=lambda s: 0 if s[1] is None else s[1][2])[1]
         result[key] = [seqid, sup]
         if ((seqid > sequence_identity_threshold)
                 and (sup[2] < angular_difference_threshold_deg)):
             (c1, c2) = key
             if (c1 not in vertex_handle):
                 vertex_handle[c1] = g.add_vertex(label=c1)
             if (c2 not in vertex_handle):
                 vertex_handle[c2] = g.add_vertex(label=c2)
             g.add_edge(vertex1=vertex_handle[c1],
                        vertex2=vertex_handle[c2])
     # Do connected component analysis and compose final tNCS pairs object
     components = connected_component_algorithm.connected_components(g)
     import itertools
     self.ncs_pairs = []
     self.tncsresults = [0, "", [], 0.0]
     for (i, group) in enumerate(components):
         chains = [g.vertex_label(vertex=v) for v in group]
         fracscats = []
         radii = []
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             fracscats.append(sup[-1])
             radii.append(sup[-2])
         fs = sum(fracscats) / len(fracscats)
         self.tncsresults[3] = fs  # store fracscat in array
         rad = sum(radii) / len(radii)
         #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) )
         maxorder = 1
         vectors = []
         previous_id = next(itertools.combinations(chains, 2))[0].id
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             ncs_pair = ext.pair(
                 r=sup[0],
                 t=sup[1],
                 radius=rad,
                 radius_estimate=rad,
                 fracscat=fs,
                 rho_mn=flex.double(
                 ),  # rho_mn undefined, needs to be set later
                 id=i)
             self.ncs_pairs.append(ncs_pair)
             # show tNCS pairs in group
             fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
             t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip()
             if not quiet:
                 print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs))
             if pair[0].id == previous_id:
                 maxorder += 1
                 orthoxyz = unit_cell.orthogonalize(sup[1])
                 vectors.append((sup[1], orthoxyz, sup[2]))
             else:
                 previous_id = pair[0].id
                 maxorder = 1
                 vectors = []
             if maxorder > self.tncsresults[0]:
                 self.tncsresults[0] = maxorder
                 self.tncsresults[1] = previous_id
                 self.tncsresults[2] = vectors
     if not quiet:
         print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \
          str(self.tncsresults))
Ejemplo n.º 9
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=10.,
              sequence_identity_threshold=90.):
   h = pdb_hierarchy
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h = h.select(h.atom_selection_cache().selection(s_str))
   h1 = h.deep_copy()
   unit_cell = crystal_symmetry.unit_cell()
   result = []
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     c1.parent().remove_chain(c1)
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for c2 in h1_p1.chains():
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
       # chains are not identical, do alignment
       else:
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
         if(p_identity>sequence_identity_threshold):
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
                                    matches):
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if(a1.name == a2.name):
                     sites_cart_1.append(a1.xyz)
                     sites_cart_2.append(a2.xyz)
                     break
       # superpose two sequence-aligned chains
       if([sites_cart_1,sites_cart_2].count(None)==0):
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         if(angle < angular_difference_threshold_deg):
           t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
           t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
           radius = flex.sum(flex.sqrt((sites_cart_1-
             sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3.
           fracscat = c1.atoms_size()/n_atoms_all
           result.append([lsq_fit_obj.r, t_frac, angle, radius, fracscat])
           # show tNCS group
           fmt="chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
           t = ",".join([("%6.3f"%t_).strip() for t_ in t_frac]).strip()
           print fmt%(c1.id, c2.id, angle, t, fracscat)
   # compose final tNCS pairs object
   self.ncs_pairs = []
   for _ in result:
     r, t, angle, rad, fs = _
     ncs_pair = ext.pair(
       r = r,
       t = t,
       radius=rad,
       radius_estimate=rad,
       fracscat=fs,
       rho_mn=flex.double()) # rho_mn undefined, needs to be set later
     self.ncs_pairs.append(ncs_pair)
Ejemplo n.º 10
0
def run(args, command_name="mmtbx.super"):
  if (len(args) == 0):
    print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name
    return

  print "#"
  print "#                       ", command_name
  print "#"
  print "# A lightweight sequence-based structure superposition tool."
  print "#"
  print "#"

  phil_objects = []
  argument_interpreter = master_params.command_line_argument_interpreter(
    home_scope="super")
  fixed_pdb_file_name = None
  moving_pdb_file_name = None
  for arg in args:
    if (os.path.isfile(arg)):
      if (fixed_pdb_file_name is None): fixed_pdb_file_name = arg
      elif (moving_pdb_file_name is None): moving_pdb_file_name = arg
      else: raise Sorry("Too many file names.")
    else:
      try: command_line_params = argument_interpreter.process(arg=arg)
      except KeyboardInterrupt: raise
      except Exception: raise Sorry("Unknown file or keyword: %s" % arg)
      else: phil_objects.append(command_line_params)

  working_params = master_params.fetch(sources=phil_objects)
  params = working_params.extract()

  def raise_missing(what):
      raise Sorry("""\
Missing file name for %(what)s structure:
  Please add
    %(what)s=file_name
  to the command line to specify the %(what)s structure.""" % vars())

  if (fixed_pdb_file_name is None):
    if (params.super.fixed is None): raise_missing("fixed")
  else:
    params.super.fixed = fixed_pdb_file_name
  if (moving_pdb_file_name is None):
    if (params.super.moving is None): raise_missing("moving")
  else:
    params.super.moving = moving_pdb_file_name

  print "#Parameters used:"
  print "#phil __ON__"
  print
  working_params = master_params.format(python_object=params)
  working_params.show()
  print
  print "#phil __OFF__"
  print

  print "Reading fixed structure:", params.super.fixed
  fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed)
  print
  print "Reading moving structure:", params.super.moving
  moving_pdb = iotbx.pdb.input(file_name=params.super.moving)
  print

  fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(
    pdb_input=fixed_pdb)
  moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(
    pdb_input=moving_pdb)

  print "Computing sequence alignment..."
  align_obj = mmtbx.alignment.align(
    seq_a=fixed_seq,
    seq_b=moving_seq,
    gap_opening_penalty=params.super.gap_opening_penalty,
    gap_extension_penalty=params.super.gap_extension_penalty,
    similarity_function=params.super.similarity_matrix,
    style=params.super.alignment_style)
  print "done."
  print

  alignment = align_obj.extract_alignment()
  matches = alignment.matches()
  equal = matches.count("|")
  similar = matches.count("*")
  total = len(alignment.a) - alignment.a.count("-")
  alignment.pretty_print(
    matches=matches,
    block_size=50,
    n_block=1,
    top_name="fixed",
    bottom_name="moving",
    comment="""\
The alignment used in the superposition is shown below.

The sequence identity (fraction of | symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.

The sequence similarity (fraction of | and * symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.
""" % (100.*equal/max(1,total), 100.*(equal+similar)/max(1,total)))

  fixed_sites_sel = flex.vec3_double()
  moving_sites_sel = flex.vec3_double()
  for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
    if (m not in ["|", "*"]): continue
    if (fixed_site_flags[ia] and moving_site_flags[ib]):
      fixed_sites_sel.append(fixed_sites[ia])
      moving_sites_sel.append(moving_sites[ib])

  print "Performing least-squares superposition of C-alpha atom pairs:"
  print "  Number of C-alpha atoms pairs in matching residues"
  print "  indicated by | or * above:", fixed_sites_sel.size()
  if (fixed_sites_sel.size() == 0):
    raise Sorry("No matching C-alpha atoms.")
  lsq_fit = superpose.least_squares_fit(
    reference_sites=fixed_sites_sel,
    other_sites=moving_sites_sel)
  rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit())
  print "  RMSD between the aligned C-alpha atoms: %.3f" % rmsd
  print

  print "Writing moved pdb to file: %s" % params.super.moved
  pdb_hierarchy = moving_pdb.construct_hierarchy()
  for atom in pdb_hierarchy.atoms():
    atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t
  pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True)
  print
Ejemplo n.º 11
0
def run(args, command_name="mmtbx.super"):
    if len(args) == 0:
        print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name
        return

    print "#"
    print "#                       ", command_name
    print "#"
    print "# A lightweight sequence-based structure superposition tool."
    print "#"
    print "#"

    phil_objects = []
    argument_interpreter = master_params.command_line_argument_interpreter(home_scope="super")
    fixed_pdb_file_name = None
    moving_pdb_file_name = None
    for arg in args:
        if os.path.isfile(arg):
            if fixed_pdb_file_name is None:
                fixed_pdb_file_name = arg
            elif moving_pdb_file_name is None:
                moving_pdb_file_name = arg
            else:
                raise Sorry("Too many file names.")
        else:
            try:
                command_line_params = argument_interpreter.process(arg=arg)
            except KeyboardInterrupt:
                raise
            except Exception:
                raise Sorry("Unknown file or keyword: %s" % arg)
            else:
                phil_objects.append(command_line_params)

    working_params = master_params.fetch(sources=phil_objects)
    params = working_params.extract()

    def raise_missing(what):
        raise Sorry(
            """\
Missing file name for %(what)s structure:
  Please add
    %(what)s=file_name
  to the command line to specify the %(what)s structure."""
            % vars()
        )

    if fixed_pdb_file_name is None:
        if params.super.fixed is None:
            raise_missing("fixed")
    else:
        params.super.fixed = fixed_pdb_file_name
    if moving_pdb_file_name is None:
        if params.super.moving is None:
            raise_missing("moving")
    else:
        params.super.moving = moving_pdb_file_name

    print "#Parameters used:"
    print "#phil __ON__"
    print
    working_params = master_params.format(python_object=params)
    working_params.show()
    print
    print "#phil __OFF__"
    print

    print "Reading fixed structure:", params.super.fixed
    fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed)
    print
    print "Reading moving structure:", params.super.moving
    moving_pdb = iotbx.pdb.input(file_name=params.super.moving)
    print

    fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(pdb_input=fixed_pdb)
    moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(pdb_input=moving_pdb)

    print "Computing sequence alignment..."
    align_obj = mmtbx.alignment.align(
        seq_a=fixed_seq,
        seq_b=moving_seq,
        gap_opening_penalty=params.super.gap_opening_penalty,
        gap_extension_penalty=params.super.gap_extension_penalty,
        similarity_function=params.super.similarity_matrix,
        style=params.super.alignment_style,
    )
    print "done."
    print

    alignment = align_obj.extract_alignment()
    matches = alignment.matches()
    equal = matches.count("|")
    similar = matches.count("*")
    total = len(alignment.a) - alignment.a.count("-")
    alignment.pretty_print(
        matches=matches,
        block_size=50,
        n_block=1,
        top_name="fixed",
        bottom_name="moving",
        comment="""\
The alignment used in the superposition is shown below.

The sequence identity (fraction of | symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.

The sequence similarity (fraction of | and * symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.
"""
        % (100.0 * equal / max(1, total), 100.0 * (equal + similar) / max(1, total)),
    )

    fixed_sites_sel = flex.vec3_double()
    moving_sites_sel = flex.vec3_double()
    for ia, ib, m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
        if m not in ["|", "*"]:
            continue
        if fixed_site_flags[ia] and moving_site_flags[ib]:
            fixed_sites_sel.append(fixed_sites[ia])
            moving_sites_sel.append(moving_sites[ib])

    print "Performing least-squares superposition of C-alpha atom pairs:"
    print "  Number of C-alpha atoms pairs in matching residues"
    print "  indicated by | or * above:", fixed_sites_sel.size()
    if fixed_sites_sel.size() == 0:
        raise Sorry("No matching C-alpha atoms.")
    lsq_fit = superpose.least_squares_fit(reference_sites=fixed_sites_sel, other_sites=moving_sites_sel)
    rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit())
    print "  RMSD between the aligned C-alpha atoms: %.3f" % rmsd
    print

    print "Writing moved pdb to file: %s" % params.super.moved
    pdb_hierarchy = moving_pdb.construct_hierarchy()
    for atom in pdb_hierarchy.atoms():
        atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t
    pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True)
    print
Ejemplo n.º 12
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.):
   h = pdb_hierarchy
   superposition_threshold = 2*sequence_identity_threshold - 100.
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h = h.select(h.atom_selection_cache().selection(s_str))
   h1 = iotbx.pdb.hierarchy.root()
   h1.append_model(h.models()[0].detached_copy())
   unit_cell = crystal_symmetry.unit_cell()
   result = {}
   print "Find groups of chains related by translational NCS"
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     c1.parent().remove_chain(c1)
     nchains = len(h1.models()[0].chains())
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for (ii,c2) in enumerate(h1_p1.chains()):
       orig_c2 = h1.models()[0].chains()[ii%nchains]
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
         p_identity = 100.
       # chains are not identical, do alignment
       else:
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
         if(p_identity>superposition_threshold):
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
                                    matches):
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if(a1.name == a2.name):
                     sites_cart_1.append(a1.xyz)
                     sites_cart_2.append(a2.xyz)
                     break
       # superpose two sequence-aligned chains
       if([sites_cart_1,sites_cart_2].count(None)==0):
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
         t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
         radius = flex.sum(flex.sqrt((sites_cart_1-
           sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3.
         fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] )
       else:
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] )
   # Build graph
   g = graph.adjacency_list()
   vertex_handle = {}
   for key in result:
     seqid = result[key][0][0]
     sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1]
     result[key] = [seqid,sup]
     if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)):
       (c1,c2) = key
       if (c1 not in vertex_handle):
         vertex_handle[c1] = g.add_vertex(label=c1)
       if (c2 not in vertex_handle):
         vertex_handle[c2] = g.add_vertex(label=c2)
       g.add_edge(vertex1=vertex_handle[c1],vertex2=vertex_handle[c2])
   # Do connected component analysis and compose final tNCS pairs object
   components = connected_component_algorithm.connected_components(g)
   import itertools
   self.ncs_pairs = []
   for (i,group) in enumerate(components):
     chains = [g.vertex_label(vertex=v) for v in group]
     fracscats = []
     radii = []
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       fracscats.append(sup[-1])
       radii.append(sup[-2])
     fs = sum(fracscats)/len(fracscats)
     rad = sum(radii)/len(radii)
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       ncs_pair = ext.pair(
         r = sup[0],
         t = sup[1],
         radius = rad,
         radius_estimate = rad,
         fracscat = fs,
         rho_mn = flex.double(), # rho_mn undefined, needs to be set later
         id = i)
       self.ncs_pairs.append(ncs_pair)
       # show tNCS pairs in group
       fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
       t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip()
       print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)