def create_test_array():
     d = {'9':'5', '10':'288', '11':'002556688', '12':'00012355555',
          '13':'0000013555688', '14':'00002555558',
          '15':'0000000000355555555557', '16':'000045', '17':'000055',
          '18':'0005', '19':'00005', '21':'5'}
     ext_array = IntsStats()
     for key, values in d.items():
         for num in values:
             ext_array.append(int(key+num))
     return ext_array
    def test_distribution(self):
        'It tests the histogram function'
#        ints_array = IntsStats(1)
#        num_integers = 1000000
#        max_value    = 100000
#        for i in xrange(num_integers):
#            ints_array.append(random.randint(0, max_value))

#        (distrib, bin_edges) = ints_array.histogram(25)
#
#        (distrib, bin_edges) = ints_array.histogram(25, range_=None)
#
#        (distrib, bin_edges) = ints_array.histogram(25, range_=(None, 1000))
#
#        (distrib, bin_edges) = ints_array.histogram(25, range_=(1000, None))
#
#        (distrib, bin_edges) = ints_array.histogram(25, range_=(1000, 2000))

        ints_array = self.create_test_array()
        distrib = ints_array.calculate_distribution(bins=10,
                                                        remove_outliers=5)

        assert distrib['distrib'] == [7L, 13L, 7L, 10L, 7L, 22L, 6L, 4L, 5L, 5L]
        assert distrib['bin_edges'] == [110, 118, 126, 134, 142, 150, 158, 166,
                                        174, 182, 190]

        general_stats = '''Statistics for histogram
-------------------------
minimum: 95
maximum: 215
average: 145.1522
variance: 557.4334
sum: 13354
items: 92'''
        result_fhand = tempfile.NamedTemporaryFile()
        ints_array.write_general_stats(result_fhand)
        assert general_stats in open(result_fhand.name).read()

        ints_array = IntsStats([0, 0, 1, 3])

        assert [2, 1, 1]  == ints_array.calculate_distribution(bins=3)['distrib']
Beispiel #3
0
 def _get_lengths_quals_from_file(seq_fpath):
     'Given a sequence file it returns the lengths and quals'
     lengths = IntsStats(init_len=1000)
     quals   = IntsStats(init_len=100)
     for seq in seqs_in_file(open(seq_fpath)):
         lengths.append(len(seq))
         qual = seq.qual
         if qual:
             quals.extend(qual)
     return lengths, quals
    def test_array():
        'Create an extensible array'
        ext_array = IntsStats(init_len=5)
        ext_array.append(6)
        ext_array.append(2)
        assert  ext_array.min == 2
        assert  ext_array.max == 6
        ext_array.append(200)
        assert ext_array.max == 200

        input_ = (3, 5, 7, 7, 38)
        ext_array = IntsStats(input_)
        assert ext_array.median == 7
    def test_stats_functs(self):
        'It test the statistical functions of the class'
        ext_array = IntsStats()
        ext_array.append(3)
        ext_array.append(5)
        ext_array.append(7)
        ext_array.append(7)
        ext_array.append(38)
        assert ext_array.median == 7

        ext_array = IntsStats()
        ext_array.append(3)
        ext_array.append(5)
        ext_array.append(7)
        ext_array.append(7)
        assert ext_array.median == 6

        ext_array = self.create_test_array()
        assert ext_array.median == 145
        assert round(ext_array.average, 2) == 145.15

        assert ext_array.sum == 13354
        assert ext_array.count == 92
        assert round(ext_array.variance, 2) == 557.43