Example #1
0
def reshape(readlen_dist, edges, heights):
    lenDistShaper = functools.partial(_cont_dist_shaper, dist_shaper(
        [(heights, edges)], nbins=40, trim_excess=False))
    readlen_dist = lenDistShaper(readlen_dist)
    nbins = readlen_dist.numBins
    heights = readlen_dist.bins
    bin_width = readlen_dist.binWidth
    edges = [float(bn) * bin_width for bn in xrange(nbins)]
    return edges, heights, bin_width
 def test_abstract_dist_shaper_float_bwidth(self):
     bins1 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0]
     labels1 = [i * 0.2 for i in range(len(bins1))]
     bins2 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     labels2 = [1.0 + i * 0.2 for i in range(len(bins2))]
     dist_list = [(bins1, labels1), (bins2, labels2)]
     merged = [0, 2, 3, 4, 3, 2, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     for nbins in range(1, 100):
         shaper = dist_shaper(dist_list, nbins=nbins)
         for dist in dist_list:
             bins, labels = shaper(dist)
             self.assertEqual(len(bins), len(labels))
             self.assertEqual(len(bins), nbins)
             self.assertEqual(sum(bins), sum(dist[0]))
             """
 def test_abstract_dist_shaper_float_bwidth(self):
     bins1 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0]
     labels1 = [i * 0.2 for i in range(len(bins1))]
     bins2 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     labels2 = [1.0 + i * 0.2 for i in range(len(bins2))]
     dist_list = [(bins1, labels1), (bins2, labels2)]
     merged = [0, 2, 3, 4, 3, 2, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     for nbins in range(1, 100):
         shaper = dist_shaper(dist_list, nbins=nbins)
         for dist in dist_list:
             bins, labels = shaper(dist)
             self.assertEqual(len(bins), len(labels))
             self.assertEqual(len(bins), nbins)
             self.assertEqual(sum(bins), sum(dist[0]))
             """
 def test_abstract_dist_shaper(self):
     bins1 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0]
     labels1 = [i * 5 for i in range(len(bins1))]
     bins2 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     labels2 = [25 + i * 5 for i in range(len(bins2))]
     dist_list = [(bins1, labels1), (bins2, labels2)]
     merged = [0, 2, 3, 4, 3, 2, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     for nbins in range(1, 100):
         shaper = dist_shaper(dist_list, nbins=nbins)
         for dist in dist_list:
             # print 'pre'
             # print dist[0]
             bins, labels = shaper(dist)
             # print 'post'
             # print bins
             # print labels
             self.assertEqual(len(bins), len(labels))
             self.assertEqual(len(bins), nbins)
             self.assertEqual(sum(bins), sum(dist[0]))
             # print ""
             """
 def test_abstract_dist_shaper(self):
     bins1 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0]
     labels1 = [i * 5 for i in range(len(bins1))]
     bins2 = [0, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     labels2 = [25 + i * 5 for i in range(len(bins2))]
     dist_list = [(bins1, labels1), (bins2, labels2)]
     merged = [0, 2, 3, 4, 3, 2, 2, 3, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0]
     for nbins in range(1, 100):
         shaper = dist_shaper(dist_list, nbins=nbins)
         for dist in dist_list:
             # print 'pre'
             # print dist[0]
             bins, labels = shaper(dist)
             # print 'post'
             # print bins
             # print labels
             self.assertEqual(len(bins), len(labels))
             self.assertEqual(len(bins), nbins)
             self.assertEqual(sum(bins), sum(dist[0]))
             # print ""
             """
    def test_dist_shaper_leading_and_trailing(self):

        # basic setup:
        bins = [0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0]
        labels = [i * 5 for i in range(len(bins))]
        self.assertEqual(len(bins), len(labels))
        self.assertEqual(labels[-1], 55)

        # no-op re-binning
        shaper = dist_shaper([(bins, labels)], nbins=len(bins))
        ob, ol = shaper((bins, labels))
        self.assertEqual(bins, ob)
        self.assertEqual(labels, ol)

        # nbins = npopulated_columns - 1
        eb = [3, 7, 0]
        el = [20, 30, 40]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns - 1, excess trimmed
        eb = [3, 7]
        el = [20, 30]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns
        eb = [1, 2, 3, 4]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins)
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns + 1
        eb = [1, 2, 3, 4, 0]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins) + 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns + 1, excess trimmed
        eb = [1, 2, 3, 4]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins) + 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # larger setup:
        bins = [0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0]
        labels = [i * 5 for i in range(len(bins))]
        self.assertEqual(len(bins), len(labels))
        self.assertEqual(labels[-1], 55)

        # nbins = npopulated_columns - 1
        eb = [3, 7, 5, 0]
        el = [20, 30, 40, 50]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns - 1, excess trimmed
        eb = [3, 7, 5]
        el = [20, 30, 40]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)
    def test_dist_shaper_leading_and_trailing(self):

        # basic setup:
        bins = [0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0]
        labels = [i * 5 for i in range(len(bins))]
        self.assertEqual(len(bins), len(labels))
        self.assertEqual(labels[-1], 55)

        # no-op re-binning
        shaper = dist_shaper([(bins, labels)], nbins=len(bins))
        ob, ol = shaper((bins, labels))
        self.assertEqual(bins, ob)
        self.assertEqual(labels, ol)

        # nbins = npopulated_columns - 1
        eb = [3, 7, 0]
        el = [20, 30, 40]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns - 1, excess trimmed
        eb = [3, 7]
        el = [20, 30]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns
        eb = [1, 2, 3, 4]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins)
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns + 1
        eb = [1, 2, 3, 4, 0]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins) + 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns + 1, excess trimmed
        eb = [1, 2, 3, 4]
        el = [i * 5 for i in range(4, 4 + len(eb))]
        nbins = np.count_nonzero(bins) + 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # larger setup:
        bins = [0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0]
        labels = [i * 5 for i in range(len(bins))]
        self.assertEqual(len(bins), len(labels))
        self.assertEqual(labels[-1], 55)

        # nbins = npopulated_columns - 1
        eb = [3, 7, 5, 0]
        el = [20, 30, 40, 50]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)

        # nbins = npopulated_columns - 1, excess trimmed
        eb = [3, 7, 5]
        el = [20, 30, 40]
        nbins = np.count_nonzero(bins) - 1
        shaper = dist_shaper([(bins, labels)], nbins=nbins, trim_excess=True)
        ob, ol = shaper((bins, labels))
        self.assertEqual(eb, ob)
        self.assertEqual(el, ol)