Ejemplo n.º 1
0
 def test_num_obs_linkage_multi_matrix(self):
     # Tests num_obs_linkage with observation matrices of multiple sizes.
     for n in xrange(2, 10):
         X = np.random.rand(n, 4)
         Y = pdist(X)
         Z = linkage(Y)
         assert_equal(num_obs_linkage(Z), n)
Ejemplo n.º 2
0
 def test_num_obs_linkage_multi_matrix(self):
     # Tests num_obs_linkage with observation matrices of multiple sizes.
     for n in xrange(2, 10):
         X = np.random.rand(n, 4)
         Y = pdist(X)
         Z = linkage(Y)
         self.assertTrue(num_obs_linkage(Z) == n)
Ejemplo n.º 3
0
 def test_num_obs_linkage_4_and_up(self):
     # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
     # 4 and 15 (step size 3).
     for i in xrange(4, 15, 3):
         y = np.random.rand(i * (i - 1) // 2)
         Z = linkage(y)
         assert_equal(num_obs_linkage(Z), i)
Ejemplo n.º 4
0
    def isNoiseCluster(self, Z):
        Z = np.asarray(Z)
        n = sp_hierarchy.num_obs_linkage(Z)
        flat_ids = hierarchy.flatten_nodes(Z)

        # Quality clusters have total contig length at least minSize (if
        # defined) or a number of contigs at least minPts (if defined)
        doMinSize = self._minSize is not None
        doMinPts = self._minPts is not None
        if not doMinSize and not doMinPts:
            return np.zeros(2 * n - 1, dtype=bool)
        if doMinSize:
            weights = np.concatenate(
                (self._profile.contigLengths, np.zeros(n - 1)))
            weights[n:] = hierarchy.maxscoresbelow(Z,
                                                   weights,
                                                   fun=operator.add)
            weights[n:] = weights[flat_ids + n]
            is_noise = weights < self._minSize
        if doMinPts:
            is_below_minPts = np.concatenate(
                (np.full(n, 1 < self._minPts,
                         dtype=bool), Z[flat_ids, 3] < self._minPts))
            if doMinSize:
                is_noise = np.logical_and(is_noise, is_below_minPts)
            else:
                is_noise = is_below_minPts

        return is_noise
Ejemplo n.º 5
0
 def test_num_obs_linkage_4_and_up(self):
     # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
     # 4 and 15 (step size 3).
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         self.assertTrue(num_obs_linkage(Z) == i)
Ejemplo n.º 6
0
 def __init__(self, Z, indices, leaf_labeller, node_labeller):
     self._Z = np.asarray(Z)
     self._n = sp_hierarchy.num_obs_linkage(self._Z)
     self._flat_ids = hierarchy.flatten_nodes(self._Z)
     self._embed_ids = hierarchy.embed_nodes(self._Z, indices)
     self._indices = indices
     self._leaf_labeller = leaf_labeller
     self._node_labeller = node_labeller
Ejemplo n.º 7
0
 def __init__(self, Z, indices, leaf_labeller, node_labeller):
     self._Z = np.asarray(Z)
     self._n = sp_hierarchy.num_obs_linkage(self._Z)
     self._flat_ids = hierarchy.flatten_nodes(self._Z)
     self._embed_ids = hierarchy.embed_nodes(self._Z, indices)
     self._indices = indices
     self._leaf_labeller = leaf_labeller
     self._node_labeller = node_labeller
Ejemplo n.º 8
0
 def __init__(self, profile):
     self._profile = profile
     Z = hierarchy.linkage_from_reachability(self._profile.reachOrder, self._profile.reachDists)
     self._Z = Z
     self._n = sp_hierarchy.num_obs_linkage(self._Z)
     ce = MarkerCheckFCE(self._profile, minPts=20, minSize=1000000)
     self._scores = ce.getScores(self._Z)
     self._is_noise = ce.isNoiseCluster(self._Z)
     n = self._n
     weights = np.concatenate((self._profile.contigLengths, np.zeros(n-1)))
     weights[n:] = hierarchy.maxscoresbelow(Z, weights, fun=np.add)
     #flat_ids = hierarchy.flatten_nodes(Z)
     #weights[n:] = weights[flat_ids+n]
     self._weights = weights
     self._counts = np.concatenate((np.ones(n), Z[:, 3]))
Ejemplo n.º 9
0
 def __init__(self, profile):
     self._profile = profile
     Z = hierarchy.linkage_from_reachability(self._profile.reachOrder,
                                             self._profile.reachDists)
     self._Z = Z
     self._n = sp_hierarchy.num_obs_linkage(self._Z)
     ce = MarkerCheckFCE(self._profile, minPts=20, minSize=1000000)
     self._scores = ce.getScores(self._Z)
     self._is_noise = ce.isNoiseCluster(self._Z)
     n = self._n
     weights = np.concatenate(
         (self._profile.contigLengths, np.zeros(n - 1)))
     weights[n:] = hierarchy.maxscoresbelow(Z, weights, fun=np.add)
     #flat_ids = hierarchy.flatten_nodes(Z)
     #weights[n:] = weights[flat_ids+n]
     self._weights = weights
     self._counts = np.concatenate((np.ones(n), Z[:, 3]))
Ejemplo n.º 10
0
 def isNoiseCluster(self, Z):
     Z = np.asarray(Z)
     n = sp_hierarchy.num_obs_linkage(Z)
     flat_ids = hierarchy.flatten_nodes(Z)
     
     # Quality clusters have total contig length at least minSize (if
     # defined) or a number of contigs at least minPts (if defined)
     doMinSize = self._minSize is not None
     doMinPts = self._minPts is not None
     if not doMinSize and not doMinPts:
         return np.zeros(2*n-1, dtype=bool)
     if doMinSize:
         weights = np.concatenate((self._profile.contigLengths, np.zeros(n-1)))
         weights[n:] = hierarchy.maxscoresbelow(Z, weights, fun=operator.add)
         weights[n:] = weights[flat_ids+n]
         is_noise = weights < self._minSize   
     if doMinPts:
         is_below_minPts = np.concatenate((np.full(n, 1 < self._minPts, dtype=bool), Z[flat_ids, 3] < self._minPts))
         if doMinSize:
             is_noise = np.logical_and(is_noise, is_below_minPts)
         else:
             is_noise = is_below_minPts
             
     return is_noise
Ejemplo n.º 11
0
 def test_num_obs_linkage_2x4(self):
     # Tests num_obs_linkage(Z) on linkage over 3 observations.
     Z = np.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=np.double)
     assert_equal(num_obs_linkage(Z), 3)
Ejemplo n.º 12
0
 def test_num_obs_linkage_1x4(self):
     # Tests num_obs_linkage(Z) on linkage over 2 observations.
     Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
     assert_equal(num_obs_linkage(Z), 2)
Ejemplo n.º 13
0
 def test_num_obs_linkage_2x4(self):
     # Tests num_obs_linkage(Z) on linkage over 3 observations.
     Z = np.asarray([[0, 1, 3.0, 2],
                     [3, 2, 4.0, 3]], dtype=np.double)
     self.assertTrue(num_obs_linkage(Z) == 3)
Ejemplo n.º 14
0
 def test_num_obs_linkage_1x4(self):
     # Tests num_obs_linkage(Z) on linkage over 2 observations.
     Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
     self.assertTrue(num_obs_linkage(Z) == 2)