Example #1
0
 def get_clusters(self):
   self.build_graph()
   threshold = 9
   clustering = True
   while (clustering and threshold >=4):
     edge_centrality_map = clustering_algorithm.\
       betweenness_centrality_clustering(graph=self.g, threshold=threshold)
     components = cca.connected_components(graph=self.g)
     components_size = [ len(component) for component in components]
     #print "max(components_size): ",max(components_size)
     if max(components_size) <= self.maxnum_residues_in_cluster:
       clustering = False
     else:
       threshold -= 1
   final_components = []
   for component in components:
     component[:] = [x + 1 for x in component]
     if len(component) > self.maxnum_residues_in_cluster:
       component.sort()
       index = int(len(component)/2)
       final_components.append(component[:index])
       final_components.append(component[index:])
     else:
       final_components.append(component)
   return final_components
Example #2
0
    def connected_segments(self):

        from boost_adaptbx.graph import connected_component_algorithm as cca
        res = cca.connected_components(graph=self.graph)
        atom_for = self.atom_for

        return [[atom_for[v] for v in comp] for comp in res]
    def manipulation(self, g):

        vd1 = g.add_vertex()
        vd2 = g.add_vertex()
        vd3 = g.add_vertex()

        g.add_edge(vertex1=vd1, vertex2=vd2)
        components = cca.connected_components(graph=g)

        self.assertEqual(len(components), 2)
        self.assertEqual(
            set([frozenset(c) for c in components]),
            set([frozenset([vd1, vd2]),
                 frozenset([vd3])]),
        )
    def build_and_test(self, g, params):

        (threshold, exp_ecs, exp_comps) = params
        (vds, eds) = self.graph_build(g)
        ecmap = clustering_algorithm.betweenness_centrality_clustering(
            graph=g,
            threshold=threshold,
        )

        self.assertTrue(eds[3] not in ecmap)
        self.assertEqual(len(ecmap), len(exp_ecs))
        self.assertEqual(exp_ecs, [ecmap[ed] for ed in eds if ed != eds[3]])

        if exp_comps is not None:
            from boost_adaptbx.graph import connected_component_algorithm as cca
            comps = cca.connected_components(graph=g)
            self.assertEqual(
                set(frozenset(c) for c in comps),
                set(frozenset(vds[i] for i in c) for c in exp_comps))
  def build_and_test(self, g, params):

    ( threshold, exp_ecs, exp_comps ) = params
    ( vds, eds ) = self.graph_build( g )
    ecmap = clustering_algorithm.betweenness_centrality_clustering(
      graph = g,
      threshold = threshold,
      )

    self.assertTrue( eds[3] not in ecmap )
    self.assertEqual( len( ecmap ), len( exp_ecs ) )
    self.assertEqual( exp_ecs, [ ecmap[ ed ] for ed in eds if ed != eds[3] ] )

    if exp_comps is not None:
      from boost_adaptbx.graph import connected_component_algorithm as cca
      comps = cca.connected_components( graph = g )
      self.assertEqual(
        set( frozenset( c ) for c in comps ),
        set( frozenset( vds[i] for i in c ) for c in exp_comps )
        )
Example #6
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.,
              quiet=False):
     h = pdb_hierarchy
     superposition_threshold = 2 * sequence_identity_threshold - 100.
     n_atoms_all = h.atoms_size()
     s_str = "altloc ' ' and (protein or nucleotide)"
     h = h.select(h.atom_selection_cache().selection(s_str))
     h1 = iotbx.pdb.hierarchy.root()
     h1.append_model(h.models()[0].detached_copy())
     unit_cell = crystal_symmetry.unit_cell()
     result = {}
     if not quiet:
         print("Find groups of chains related by translational NCS")
     # double loop over chains to find matching pairs related by pure translation
     for c1 in h1.chains():
         c1.parent().remove_chain(c1)
         nchains = len(h1.models()[0].chains())
         if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue
         r1 = list(c1.residues())
         c1_seq = "".join(c1.as_sequence())
         sc_1_tmp = c1.atoms().extract_xyz()
         h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
         for (ii, c2) in enumerate(h1_p1.chains()):
             orig_c2 = h1.models()[0].chains()[ii % nchains]
             r2 = list(c2.residues())
             c2_seq = "".join(c2.as_sequence())
             sites_cart_1, sites_cart_2 = None, None
             sc_2_tmp = c2.atoms().extract_xyz()
             # chains are identical
             if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()):
                 sites_cart_1 = sc_1_tmp
                 sites_cart_2 = sc_2_tmp
                 p_identity = 100.
             # chains are not identical, do alignment
             else:
                 align_obj = mmtbx.alignment.align(seq_a=c1_seq,
                                                   seq_b=c2_seq)
                 alignment = align_obj.extract_alignment()
                 matches = alignment.matches()
                 equal = matches.count("|")
                 total = len(alignment.a) - alignment.a.count("-")
                 p_identity = 100. * equal / max(1, total)
                 if (p_identity > superposition_threshold):
                     sites_cart_1 = flex.vec3_double()
                     sites_cart_2 = flex.vec3_double()
                     for i1, i2, match in zip(alignment.i_seqs_a,
                                              alignment.i_seqs_b, matches):
                         if (i1 is not None and i2 is not None
                                 and match == "|"):
                             r1i, r2i = r1[i1], r2[i2]
                             assert r1i.resname == r2i.resname, [
                                 r1i.resname, r2i.resname, i1, i2
                             ]
                             for a1 in r1i.atoms():
                                 for a2 in r2i.atoms():
                                     if (a1.name == a2.name):
                                         sites_cart_1.append(a1.xyz)
                                         sites_cart_2.append(a2.xyz)
                                         break
             # superpose two sequence-aligned chains
             if ([sites_cart_1, sites_cart_2].count(None) == 0):
                 lsq_fit_obj = superpose.least_squares_fit(
                     reference_sites=sites_cart_1, other_sites=sites_cart_2)
                 angle = lsq_fit_obj.r.rotation_angle()
                 t_frac = unit_cell.fractionalize(
                     (sites_cart_1 - sites_cart_2).mean())
                 t_frac = [math.modf(t)[0]
                           for t in t_frac]  # put into [-1,1]
                 radius = flex.sum(
                     flex.sqrt((sites_cart_1 - sites_cart_1.mean()
                                ).dot())) / sites_cart_1.size() * 4. / 3.
                 fracscat = min(c1.atoms_size(),
                                c2.atoms_size()) / n_atoms_all
                 result.setdefault(frozenset([c1, orig_c2]), []).append([
                     p_identity,
                     [lsq_fit_obj.r, t_frac, angle, radius, fracscat]
                 ])
             else:
                 result.setdefault(frozenset([c1, orig_c2]),
                                   []).append([p_identity, None])
     # Build graph
     g = graph.adjacency_list()
     vertex_handle = {}
     for key in result:
         seqid = result[key][0][0]
         sup = min(result[key],
                   key=lambda s: 0 if s[1] is None else s[1][2])[1]
         result[key] = [seqid, sup]
         if ((seqid > sequence_identity_threshold)
                 and (sup[2] < angular_difference_threshold_deg)):
             (c1, c2) = key
             if (c1 not in vertex_handle):
                 vertex_handle[c1] = g.add_vertex(label=c1)
             if (c2 not in vertex_handle):
                 vertex_handle[c2] = g.add_vertex(label=c2)
             g.add_edge(vertex1=vertex_handle[c1],
                        vertex2=vertex_handle[c2])
     # Do connected component analysis and compose final tNCS pairs object
     components = connected_component_algorithm.connected_components(g)
     import itertools
     self.ncs_pairs = []
     self.tncsresults = [0, "", [], 0.0]
     for (i, group) in enumerate(components):
         chains = [g.vertex_label(vertex=v) for v in group]
         fracscats = []
         radii = []
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             fracscats.append(sup[-1])
             radii.append(sup[-2])
         fs = sum(fracscats) / len(fracscats)
         self.tncsresults[3] = fs  # store fracscat in array
         rad = sum(radii) / len(radii)
         #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) )
         maxorder = 1
         vectors = []
         previous_id = next(itertools.combinations(chains, 2))[0].id
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             ncs_pair = ext.pair(
                 r=sup[0],
                 t=sup[1],
                 radius=rad,
                 radius_estimate=rad,
                 fracscat=fs,
                 rho_mn=flex.double(
                 ),  # rho_mn undefined, needs to be set later
                 id=i)
             self.ncs_pairs.append(ncs_pair)
             # show tNCS pairs in group
             fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
             t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip()
             if not quiet:
                 print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs))
             if pair[0].id == previous_id:
                 maxorder += 1
                 orthoxyz = unit_cell.orthogonalize(sup[1])
                 vectors.append((sup[1], orthoxyz, sup[2]))
             else:
                 previous_id = pair[0].id
                 maxorder = 1
                 vectors = []
             if maxorder > self.tncsresults[0]:
                 self.tncsresults[0] = maxorder
                 self.tncsresults[1] = previous_id
                 self.tncsresults[2] = vectors
     if not quiet:
         print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \
          str(self.tncsresults))
Example #7
0
def connected_components(miller_array: cctbx.miller.array, ) -> [{}]:
    """
    Identify connected regions of missing reflections in the asymmetric unit.

    This is achieved by first generating the complete set of possible miller indices,
    then performing connected components analysis on a graph of nearest neighbours in
    the list of missing reflections.

    Args:
        miller_array:  The input list of reflections.

    Returns:
        The list of miller sets for each connected region of missing reflections. The
        first item in the list will be the complete set of all possible miller indices.
    """

    # Map to primitive setting for centred cells, otherwise true missing reflections
    # won't be identified as connected as a result of being separated by systematically
    # absent reflections.
    cb_op_to_primitive = miller_array.change_of_basis_op_to_primitive_setting()
    miller_array = miller_array.change_basis(cb_op_to_primitive)

    # First generate the missing_set of reflections. We want the full sphere of missing
    # reflections to allow us to find connected regions that cross the boundary of the
    # asu.
    unique = miller_array.unique_under_symmetry().map_to_asu()
    unique = unique.generate_bijvoet_mates()
    complete_set = unique.complete_set()
    missing_set = complete_set.lone_set(unique)
    missing_set = missing_set.expand_to_p1().customized_copy(
        crystal_symmetry=missing_set.crystal_symmetry())

    if missing_set.size() == 0:
        return complete_set, []

    # Now find the nearest neighbours.
    mi = missing_set.indices().as_vec3_double().as_double()
    k = 6
    ann = AnnAdaptor(data=mi, dim=3, k=k)
    ann.query(mi)

    # Construct the graph of connected missing reflections
    g = graph.adjacency_list(
        graph_type="undirected",
        vertex_type="vector",
        edge_type="set",
    )
    distance_cutoff = 2**0.5
    for i in range(missing_set.size()):
        ik = i * k
        for i_ann in range(k):
            if ann.distances[ik + i_ann] <= distance_cutoff:
                j = ann.nn[ik + i_ann]
                g.add_edge(i, j)

    # Now do the connected components analysis, filtering out lone missing reflections
    components = [c for c in cca.connected_components(graph=g) if len(c) > 1]

    # Determine the unique miller indices for each component within the asu
    unique_mi = []
    unique_ms = []
    for i, c in enumerate(components):
        ms = (missing_set.select(flex.size_t(list(c))).customized_copy(
            crystal_symmetry=miller_array).as_non_anomalous_set().map_to_asu())
        ms = ms.unique_under_symmetry()
        mi = set(ms.indices())
        if mi not in unique_mi:
            unique_ms.append(ms)
            unique_mi.append(mi)

    # Sort connected regions by size
    unique_ms = sorted(unique_ms, key=lambda ms: ms.size(), reverse=True)

    # Map indices back to input setting
    cb_op_primitive_inp = cb_op_to_primitive.inverse()
    return (
        unique.as_non_anomalous_set().complete_set().change_basis(
            cb_op_primitive_inp),
        [ms.change_basis(cb_op_primitive_inp) for ms in unique_ms],
    )
Example #8
0
def Test():
    """Test function for all functions provided above.
  returns: Empty string on success, string describing the problem on failure.
  """

    # Construct a set of Mover/atoms that will be used to test the routines.  They will all be part of the
    # same residue and they will all have the same unit radius in the extraAtomInfo associated with them.
    # There will be a set of five along the X axis, with one pair overlapping slightly and the others
    # spaced 0.45 units apart so that they will overlap when using a probe radius of 0.25 (diameter 0.5).
    # There will be another one that is obliquely located away from the first such that it will overlap
    # in a bounding-box test but not in a true atom-comparison test for a probe with radius 0.25.  There
    # will be a final one 10 units above the origin.
    rad = 1.0
    probeRad = 0.25
    locs = [[0.0, 0.0, 0.0], [1.9, 0.0, 0.0]]
    for i in range(1, 4):
        loc = [1.9 + 2.1 * i, 0.0, 0.0]
        locs.append(loc)
    delta = 2 * rad + 2 * probeRad - 0.1
    dist = -delta * math.cos(math.pi / 4)
    dist = -delta * math.sin(math.pi / 4)
    locs.append([dist, dist, 0.0])
    locs.append([0.0, 0.0, 10.0])

    name = " H  "
    ag = pdb.hierarchy.atom_group()
    ag.resname = "LYS"
    atoms = pdb.hierarchy.af_shared_atom()
    extras = []
    movers = []
    baseAtom = pdb.hierarchy.atom()
    for i in range(len(locs)):
        a = pdb.hierarchy.atom(parent=ag, other=baseAtom)
        a.name = name
        a.xyz = locs[i]
        atoms.append(a)
        e = probe.ExtraAtomInfo(rad)
        extras.append(e)
        extrasMap = probeExt.ExtraAtomInfoMap(atoms, extras)
        movers.append(Movers.MoverNull(a, extrasMap))
    # Fix the sequence numbers, which are otherwise all 0
    atoms.reset_i_seq()

    # Generate a table of parameters and expected results.  The first entry in each row is
    # the probe radius.  The second is the expected number of connected components.
    # The third is the size of the largest connected component.
    _expectedCases = [[0.0, 5, 3], [probeRad, 2, 6], [100, 1, 7]]

    # Specify the probe radius and run the test.  Compare the results to what we expect.
    for i, e in enumerate(_expectedCases):
        probeRadius = e[0]
        g = _InteractionGraphAABB(movers, extrasMap, probeRadius)

        # Find the connected components of the graph and compare their counts and maximum size to
        # what is expected.
        components = cca.connected_components(graph=g)
        if len(components) != e[1]:
            return "AABB Expected " + str(e[1]) + " components, found " + str(
                len(components)) + " for case " + str(i)
        maxLen = -1
        for c in components:
            if len(c) > maxLen:
                maxLen = len(c)
        if maxLen != e[2]:
            return "AABB Expected max sized component of " + str(
                e[2]) + ", found " + str(maxLen) + " for case " + str(i)

    # Generate a table of parameters and expected results.  The first entry in each row is
    # the probe radius.  The second is the expected number of connected components.
    # The third is the size of the largest connected component.
    # The fourth (not present in the AABB table above) is the set of expected sizes of
    # atomMoverSets across all atoms; not one per atom but across all atoms what answers are
    # expected.  The easiest to explain is the 100-radius entry, which should have all atoms interacting
    # with all Movers so the only answer across all atoms is 7.  The 0-radius case has only one pair
    # of overlaps, so only up to 2 Movers per atom.  The middle case has some Movers overlapping with
    # two neighbors, so up to 3 Movers associated with a given atom.
    _expectedCases = [
        # One of the pairs actually does not overlap for the all-pairs test.  Other conditions are the same
        # as the AABB tests.
        [0.0, 6, 2, {1, 2}],
        [probeRad, 2, 6, {1, 2, 3}],
        [100, 1, 7, {7}]
    ]

    # Specify the probe radius and run the test.  Compare the results to what we expect.
    for i, e in enumerate(_expectedCases):
        probeRadius = e[0]
        g, am = InteractionGraphAllPairs(movers, extrasMap, probeRadius)

        # Find the connected components of the graph and compare their counts and maximum size to
        # what is expected.
        components = cca.connected_components(graph=g)
        if len(components) != e[1]:
            return "Expected " + str(e[1]) + " components, found " + str(
                len(components)) + " for case " + str(i)
        maxLen = -1
        for c in components:
            if len(c) > maxLen:
                maxLen = len(c)
        if maxLen != e[2]:
            return "Expected max sized component of " + str(
                e[2]) + ", found " + str(maxLen) + " for case " + str(i)

        # Check atom/Mover overlaps by finding the set of lengths that are present accross all atoms.
        lengths = set()
        for a in atoms:
            lengths.add(len(am[a]))
        if lengths != e[3]:
            return "Expected set of overlap counts " + str(
                e[3]) + ", found " + str(lengths) + " for case " + str(i)

    return ""
Example #9
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.):
   h = pdb_hierarchy
   superposition_threshold = 2*sequence_identity_threshold - 100.
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h = h.select(h.atom_selection_cache().selection(s_str))
   h1 = iotbx.pdb.hierarchy.root()
   h1.append_model(h.models()[0].detached_copy())
   unit_cell = crystal_symmetry.unit_cell()
   result = {}
   print "Find groups of chains related by translational NCS"
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     c1.parent().remove_chain(c1)
     nchains = len(h1.models()[0].chains())
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for (ii,c2) in enumerate(h1_p1.chains()):
       orig_c2 = h1.models()[0].chains()[ii%nchains]
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
         p_identity = 100.
       # chains are not identical, do alignment
       else:
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
         if(p_identity>superposition_threshold):
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
                                    matches):
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if(a1.name == a2.name):
                     sites_cart_1.append(a1.xyz)
                     sites_cart_2.append(a2.xyz)
                     break
       # superpose two sequence-aligned chains
       if([sites_cart_1,sites_cart_2].count(None)==0):
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
         t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
         radius = flex.sum(flex.sqrt((sites_cart_1-
           sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3.
         fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] )
       else:
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] )
   # Build graph
   g = graph.adjacency_list()
   vertex_handle = {}
   for key in result:
     seqid = result[key][0][0]
     sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1]
     result[key] = [seqid,sup]
     if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)):
       (c1,c2) = key
       if (c1 not in vertex_handle):
         vertex_handle[c1] = g.add_vertex(label=c1)
       if (c2 not in vertex_handle):
         vertex_handle[c2] = g.add_vertex(label=c2)
       g.add_edge(vertex1=vertex_handle[c1],vertex2=vertex_handle[c2])
   # Do connected component analysis and compose final tNCS pairs object
   components = connected_component_algorithm.connected_components(g)
   import itertools
   self.ncs_pairs = []
   for (i,group) in enumerate(components):
     chains = [g.vertex_label(vertex=v) for v in group]
     fracscats = []
     radii = []
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       fracscats.append(sup[-1])
       radii.append(sup[-2])
     fs = sum(fracscats)/len(fracscats)
     rad = sum(radii)/len(radii)
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       ncs_pair = ext.pair(
         r = sup[0],
         t = sup[1],
         radius = rad,
         radius_estimate = rad,
         fracscat = fs,
         rho_mn = flex.double(), # rho_mn undefined, needs to be set later
         id = i)
       self.ncs_pairs.append(ncs_pair)
       # show tNCS pairs in group
       fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
       t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip()
       print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)