Ejemplo n.º 1
0
def cluster_hits(hits, radius1, radius2=None, samedir=False):
    """
    Cluster hits using windows

    hits -- iterable of tuples (region1, region2, extra)
    radius -- radius of window in query genome
    radius2 -- radius of window in subject genome (default=radius)
    samdir -- whether or not to require genes in same direction

    hits must be sorted by query region species, chrom, and start
    """

    # connected components set
    comps = {}

    for hit, syntenic in find_syntenic_neighbors(hits, radius1, radius2):

        # get block of hit
        block = comps.get(hit, None)
        if block is None:
            block = UnionFind([hit])
            comps[hit] = block

        # union block with syntenic hits
        for hit2 in syntenic:
            block2 = comps.get(hit2, None)

            # check whether hits are in the same direction
            if samedir and not samedir_hits(hit, hit2):
                if hit2 not in comps:
                    comps[hit2] = UnionFind([hit2])
                continue

            if block2 is None:
                comps[hit2] = block
                block.add(hit2)
            else:
                block2.union(block)

    # get the set of blocks
    comps = set(b.root() for b in comps.itervalues())

    return comps
Ejemplo n.º 2
0
    def test_union_find(self):
        set1 = UnionFind()
        set2 = UnionFind()
        set3 = UnionFind()

        set1.add(1)
        set1.add(2)

        self.assertEqual(len(set1), 2)
        self.assertTrue(1 in set1)
        self.assertFalse(set1.has(-1))

        set2.add(3)
        set2.add(4)
        set2.add(5)
        self.assertEqual(len(set2), 3)

        set3.add(5)
        set3.add(6)
        set3.add(7)
        self.assertEqual(len(set3), 3)

        self.assertFalse(set1.same(set2))
        set1.union(set2)
        self.assertTrue(set1.same(set2))

        set1.union(set3)
        self.assertTrue(set1.members(), set([1, 2, 3, 4, 5, 6, 7]))
        self.assertTrue(len(set1), len(set2))
    def test_union_find(self):
        set1 = UnionFind()
        set2 = UnionFind()
        set3 = UnionFind()

        set1.add(1)
        set1.add(2)

        self.assertEqual(len(set1), 2)
        self.assertTrue(1 in set1)
        self.assertFalse(set1.has(-1))

        set2.add(3)
        set2.add(4)
        set2.add(5)
        self.assertEqual(len(set2), 3)

        set3.add(5)
        set3.add(6)
        set3.add(7)
        self.assertEqual(len(set3), 3)

        self.assertFalse(set1.same(set2))
        set1.union(set2)
        self.assertTrue(set1.same(set2))

        set1.union(set3)
        self.assertTrue(
            set1.members(),
            set([1, 2, 3, 4, 5, 6, 7]))
        self.assertTrue(len(set1), len(set2))