Esempio n. 1
 def get_clusters(self):
   threshold = 9
   clustering = True
   while (clustering and threshold >=4):
     edge_centrality_map = clustering_algorithm.\
       betweenness_centrality_clustering(graph=self.g, threshold=threshold)
     components = cca.connected_components(graph=self.g)
     components_size = [ len(component) for component in components]
     #print "max(components_size): ",max(components_size)
     if max(components_size) <= self.maxnum_residues_in_cluster:
       clustering = False
       threshold -= 1
   final_components = []
   for component in components:
     component[:] = [x + 1 for x in component]
     if len(component) > self.maxnum_residues_in_cluster:
       index = int(len(component)/2)
   return final_components
Esempio n. 2
    def connected_segments(self):

        from boost_adaptbx.graph import connected_component_algorithm as cca
        res = cca.connected_components(graph=self.graph)
        atom_for = self.atom_for

        return [[atom_for[v] for v in comp] for comp in res]
    def manipulation(self, g):

        vd1 = g.add_vertex()
        vd2 = g.add_vertex()
        vd3 = g.add_vertex()

        g.add_edge(vertex1=vd1, vertex2=vd2)
        components = cca.connected_components(graph=g)

        self.assertEqual(len(components), 2)
            set([frozenset(c) for c in components]),
            set([frozenset([vd1, vd2]),
    def build_and_test(self, g, params):

        (threshold, exp_ecs, exp_comps) = params
        (vds, eds) = self.graph_build(g)
        ecmap = clustering_algorithm.betweenness_centrality_clustering(

        self.assertTrue(eds[3] not in ecmap)
        self.assertEqual(len(ecmap), len(exp_ecs))
        self.assertEqual(exp_ecs, [ecmap[ed] for ed in eds if ed != eds[3]])

        if exp_comps is not None:
            from boost_adaptbx.graph import connected_component_algorithm as cca
            comps = cca.connected_components(graph=g)
                set(frozenset(c) for c in comps),
                set(frozenset(vds[i] for i in c) for c in exp_comps))
  def build_and_test(self, g, params):

    ( threshold, exp_ecs, exp_comps ) = params
    ( vds, eds ) = self.graph_build( g )
    ecmap = clustering_algorithm.betweenness_centrality_clustering(
      graph = g,
      threshold = threshold,

    self.assertTrue( eds[3] not in ecmap )
    self.assertEqual( len( ecmap ), len( exp_ecs ) )
    self.assertEqual( exp_ecs, [ ecmap[ ed ] for ed in eds if ed != eds[3] ] )

    if exp_comps is not None:
      from boost_adaptbx.graph import connected_component_algorithm as cca
      comps = cca.connected_components( graph = g )
        set( frozenset( c ) for c in comps ),
        set( frozenset( vds[i] for i in c ) for c in exp_comps )
Esempio n. 6
 def __init__(self,
     h = pdb_hierarchy
     superposition_threshold = 2 * sequence_identity_threshold - 100.
     n_atoms_all = h.atoms_size()
     s_str = "altloc ' ' and (protein or nucleotide)"
     h =
     h1 = iotbx.pdb.hierarchy.root()
     unit_cell = crystal_symmetry.unit_cell()
     result = {}
     if not quiet:
         print("Find groups of chains related by translational NCS")
     # double loop over chains to find matching pairs related by pure translation
     for c1 in h1.chains():
         nchains = len(h1.models()[0].chains())
         if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue
         r1 = list(c1.residues())
         c1_seq = "".join(c1.as_sequence())
         sc_1_tmp = c1.atoms().extract_xyz()
         h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
         for (ii, c2) in enumerate(h1_p1.chains()):
             orig_c2 = h1.models()[0].chains()[ii % nchains]
             r2 = list(c2.residues())
             c2_seq = "".join(c2.as_sequence())
             sites_cart_1, sites_cart_2 = None, None
             sc_2_tmp = c2.atoms().extract_xyz()
             # chains are identical
             if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()):
                 sites_cart_1 = sc_1_tmp
                 sites_cart_2 = sc_2_tmp
                 p_identity = 100.
             # chains are not identical, do alignment
                 align_obj = mmtbx.alignment.align(seq_a=c1_seq,
                 alignment = align_obj.extract_alignment()
                 matches = alignment.matches()
                 equal = matches.count("|")
                 total = len(alignment.a) - alignment.a.count("-")
                 p_identity = 100. * equal / max(1, total)
                 if (p_identity > superposition_threshold):
                     sites_cart_1 = flex.vec3_double()
                     sites_cart_2 = flex.vec3_double()
                     for i1, i2, match in zip(alignment.i_seqs_a,
                                              alignment.i_seqs_b, matches):
                         if (i1 is not None and i2 is not None
                                 and match == "|"):
                             r1i, r2i = r1[i1], r2[i2]
                             assert r1i.resname == r2i.resname, [
                                 r1i.resname, r2i.resname, i1, i2
                             for a1 in r1i.atoms():
                                 for a2 in r2i.atoms():
                                     if ( ==
             # superpose two sequence-aligned chains
             if ([sites_cart_1, sites_cart_2].count(None) == 0):
                 lsq_fit_obj = superpose.least_squares_fit(
                     reference_sites=sites_cart_1, other_sites=sites_cart_2)
                 angle = lsq_fit_obj.r.rotation_angle()
                 t_frac = unit_cell.fractionalize(
                     (sites_cart_1 - sites_cart_2).mean())
                 t_frac = [math.modf(t)[0]
                           for t in t_frac]  # put into [-1,1]
                 radius = flex.sum(
                     flex.sqrt((sites_cart_1 - sites_cart_1.mean()
                                ).dot())) / sites_cart_1.size() * 4. / 3.
                 fracscat = min(c1.atoms_size(),
                                c2.atoms_size()) / n_atoms_all
                 result.setdefault(frozenset([c1, orig_c2]), []).append([
                     [lsq_fit_obj.r, t_frac, angle, radius, fracscat]
                 result.setdefault(frozenset([c1, orig_c2]),
                                   []).append([p_identity, None])
     # Build graph
     g = graph.adjacency_list()
     vertex_handle = {}
     for key in result:
         seqid = result[key][0][0]
         sup = min(result[key],
                   key=lambda s: 0 if s[1] is None else s[1][2])[1]
         result[key] = [seqid, sup]
         if ((seqid > sequence_identity_threshold)
                 and (sup[2] < angular_difference_threshold_deg)):
             (c1, c2) = key
             if (c1 not in vertex_handle):
                 vertex_handle[c1] = g.add_vertex(label=c1)
             if (c2 not in vertex_handle):
                 vertex_handle[c2] = g.add_vertex(label=c2)
     # Do connected component analysis and compose final tNCS pairs object
     components = connected_component_algorithm.connected_components(g)
     import itertools
     self.ncs_pairs = []
     self.tncsresults = [0, "", [], 0.0]
     for (i, group) in enumerate(components):
         chains = [g.vertex_label(vertex=v) for v in group]
         fracscats = []
         radii = []
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
         fs = sum(fracscats) / len(fracscats)
         self.tncsresults[3] = fs  # store fracscat in array
         rad = sum(radii) / len(radii)
         #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) )
         maxorder = 1
         vectors = []
         previous_id = next(itertools.combinations(chains, 2))[0].id
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             ncs_pair = ext.pair(
                 ),  # rho_mn undefined, needs to be set later
             # show tNCS pairs in group
             fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
             t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip()
             if not quiet:
                 print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs))
             if pair[0].id == previous_id:
                 maxorder += 1
                 orthoxyz = unit_cell.orthogonalize(sup[1])
                 vectors.append((sup[1], orthoxyz, sup[2]))
                 previous_id = pair[0].id
                 maxorder = 1
                 vectors = []
             if maxorder > self.tncsresults[0]:
                 self.tncsresults[0] = maxorder
                 self.tncsresults[1] = previous_id
                 self.tncsresults[2] = vectors
     if not quiet:
         print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \
Esempio n. 7
def connected_components(miller_array: cctbx.miller.array, ) -> [{}]:
    Identify connected regions of missing reflections in the asymmetric unit.

    This is achieved by first generating the complete set of possible miller indices,
    then performing connected components analysis on a graph of nearest neighbours in
    the list of missing reflections.

        miller_array:  The input list of reflections.

        The list of miller sets for each connected region of missing reflections. The
        first item in the list will be the complete set of all possible miller indices.

    # Map to primitive setting for centred cells, otherwise true missing reflections
    # won't be identified as connected as a result of being separated by systematically
    # absent reflections.
    cb_op_to_primitive = miller_array.change_of_basis_op_to_primitive_setting()
    miller_array = miller_array.change_basis(cb_op_to_primitive)

    # First generate the missing_set of reflections. We want the full sphere of missing
    # reflections to allow us to find connected regions that cross the boundary of the
    # asu.
    unique = miller_array.unique_under_symmetry().map_to_asu()
    unique = unique.generate_bijvoet_mates()
    complete_set = unique.complete_set()
    missing_set = complete_set.lone_set(unique)
    missing_set = missing_set.expand_to_p1().customized_copy(

    if missing_set.size() == 0:
        return complete_set, []

    # Now find the nearest neighbours.
    mi = missing_set.indices().as_vec3_double().as_double()
    k = 6
    ann = AnnAdaptor(data=mi, dim=3, k=k)

    # Construct the graph of connected missing reflections
    g = graph.adjacency_list(
    distance_cutoff = 2**0.5
    for i in range(missing_set.size()):
        ik = i * k
        for i_ann in range(k):
            if ann.distances[ik + i_ann] <= distance_cutoff:
                j = ann.nn[ik + i_ann]
                g.add_edge(i, j)

    # Now do the connected components analysis, filtering out lone missing reflections
    components = [c for c in cca.connected_components(graph=g) if len(c) > 1]

    # Determine the unique miller indices for each component within the asu
    unique_mi = []
    unique_ms = []
    for i, c in enumerate(components):
        ms = (
        ms = ms.unique_under_symmetry()
        mi = set(ms.indices())
        if mi not in unique_mi:

    # Sort connected regions by size
    unique_ms = sorted(unique_ms, key=lambda ms: ms.size(), reverse=True)

    # Map indices back to input setting
    cb_op_primitive_inp = cb_op_to_primitive.inverse()
    return (
        [ms.change_basis(cb_op_primitive_inp) for ms in unique_ms],
Esempio n. 8
def Test():
    """Test function for all functions provided above.
  returns: Empty string on success, string describing the problem on failure.

    # Construct a set of Mover/atoms that will be used to test the routines.  They will all be part of the
    # same residue and they will all have the same unit radius in the extraAtomInfo associated with them.
    # There will be a set of five along the X axis, with one pair overlapping slightly and the others
    # spaced 0.45 units apart so that they will overlap when using a probe radius of 0.25 (diameter 0.5).
    # There will be another one that is obliquely located away from the first such that it will overlap
    # in a bounding-box test but not in a true atom-comparison test for a probe with radius 0.25.  There
    # will be a final one 10 units above the origin.
    rad = 1.0
    probeRad = 0.25
    locs = [[0.0, 0.0, 0.0], [1.9, 0.0, 0.0]]
    for i in range(1, 4):
        loc = [1.9 + 2.1 * i, 0.0, 0.0]
    delta = 2 * rad + 2 * probeRad - 0.1
    dist = -delta * math.cos(math.pi / 4)
    dist = -delta * math.sin(math.pi / 4)
    locs.append([dist, dist, 0.0])
    locs.append([0.0, 0.0, 10.0])

    name = " H  "
    ag = pdb.hierarchy.atom_group()
    ag.resname = "LYS"
    atoms = pdb.hierarchy.af_shared_atom()
    extras = []
    movers = []
    baseAtom = pdb.hierarchy.atom()
    for i in range(len(locs)):
        a = pdb.hierarchy.atom(parent=ag, other=baseAtom) = name = locs[i]
        e = probe.ExtraAtomInfo(rad)
        extrasMap = probeExt.ExtraAtomInfoMap(atoms, extras)
        movers.append(Movers.MoverNull(a, extrasMap))
    # Fix the sequence numbers, which are otherwise all 0

    # Generate a table of parameters and expected results.  The first entry in each row is
    # the probe radius.  The second is the expected number of connected components.
    # The third is the size of the largest connected component.
    _expectedCases = [[0.0, 5, 3], [probeRad, 2, 6], [100, 1, 7]]

    # Specify the probe radius and run the test.  Compare the results to what we expect.
    for i, e in enumerate(_expectedCases):
        probeRadius = e[0]
        g = _InteractionGraphAABB(movers, extrasMap, probeRadius)

        # Find the connected components of the graph and compare their counts and maximum size to
        # what is expected.
        components = cca.connected_components(graph=g)
        if len(components) != e[1]:
            return "AABB Expected " + str(e[1]) + " components, found " + str(
                len(components)) + " for case " + str(i)
        maxLen = -1
        for c in components:
            if len(c) > maxLen:
                maxLen = len(c)
        if maxLen != e[2]:
            return "AABB Expected max sized component of " + str(
                e[2]) + ", found " + str(maxLen) + " for case " + str(i)

    # Generate a table of parameters and expected results.  The first entry in each row is
    # the probe radius.  The second is the expected number of connected components.
    # The third is the size of the largest connected component.
    # The fourth (not present in the AABB table above) is the set of expected sizes of
    # atomMoverSets across all atoms; not one per atom but across all atoms what answers are
    # expected.  The easiest to explain is the 100-radius entry, which should have all atoms interacting
    # with all Movers so the only answer across all atoms is 7.  The 0-radius case has only one pair
    # of overlaps, so only up to 2 Movers per atom.  The middle case has some Movers overlapping with
    # two neighbors, so up to 3 Movers associated with a given atom.
    _expectedCases = [
        # One of the pairs actually does not overlap for the all-pairs test.  Other conditions are the same
        # as the AABB tests.
        [0.0, 6, 2, {1, 2}],
        [probeRad, 2, 6, {1, 2, 3}],
        [100, 1, 7, {7}]

    # Specify the probe radius and run the test.  Compare the results to what we expect.
    for i, e in enumerate(_expectedCases):
        probeRadius = e[0]
        g, am = InteractionGraphAllPairs(movers, extrasMap, probeRadius)

        # Find the connected components of the graph and compare their counts and maximum size to
        # what is expected.
        components = cca.connected_components(graph=g)
        if len(components) != e[1]:
            return "Expected " + str(e[1]) + " components, found " + str(
                len(components)) + " for case " + str(i)
        maxLen = -1
        for c in components:
            if len(c) > maxLen:
                maxLen = len(c)
        if maxLen != e[2]:
            return "Expected max sized component of " + str(
                e[2]) + ", found " + str(maxLen) + " for case " + str(i)

        # Check atom/Mover overlaps by finding the set of lengths that are present accross all atoms.
        lengths = set()
        for a in atoms:
        if lengths != e[3]:
            return "Expected set of overlap counts " + str(
                e[3]) + ", found " + str(lengths) + " for case " + str(i)

    return ""
Esempio n. 9
 def __init__(self,
   h = pdb_hierarchy
   superposition_threshold = 2*sequence_identity_threshold - 100.
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h =
   h1 = iotbx.pdb.hierarchy.root()
   unit_cell = crystal_symmetry.unit_cell()
   result = {}
   print "Find groups of chains related by translational NCS"
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     nchains = len(h1.models()[0].chains())
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for (ii,c2) in enumerate(h1_p1.chains()):
       orig_c2 = h1.models()[0].chains()[ii%nchains]
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
         p_identity = 100.
       # chains are not identical, do alignment
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if( ==
       # superpose two sequence-aligned chains
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
         t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
         radius = flex.sum(flex.sqrt((sites_cart_1-
         fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] )
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] )
   # Build graph
   g = graph.adjacency_list()
   vertex_handle = {}
   for key in result:
     seqid = result[key][0][0]
     sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1]
     result[key] = [seqid,sup]
     if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)):
       (c1,c2) = key
       if (c1 not in vertex_handle):
         vertex_handle[c1] = g.add_vertex(label=c1)
       if (c2 not in vertex_handle):
         vertex_handle[c2] = g.add_vertex(label=c2)
   # Do connected component analysis and compose final tNCS pairs object
   components = connected_component_algorithm.connected_components(g)
   import itertools
   self.ncs_pairs = []
   for (i,group) in enumerate(components):
     chains = [g.vertex_label(vertex=v) for v in group]
     fracscats = []
     radii = []
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
     fs = sum(fracscats)/len(fracscats)
     rad = sum(radii)/len(radii)
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       ncs_pair = ext.pair(
         r = sup[0],
         t = sup[1],
         radius = rad,
         radius_estimate = rad,
         fracscat = fs,
         rho_mn = flex.double(), # rho_mn undefined, needs to be set later
         id = i)
       # show tNCS pairs in group
       fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
       t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip()
       print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)