Esempio n. 1
0
    def run_sample_statistics(self):
        moments = FrequencyTable() # add option to make this and streaming

        for i in range(self.sample_size):
            for j in self.random_with_replacement():
                moments.add(j)

        return map(lambda a: getattr(moments, a)(), self.functions)
Esempio n. 2
0
    def setUp(self):
        self.empty_ft = FrequencyTable()

        self.range_ft = FrequencyTable()
        for i in range(10, 0, -1):
            self.range_ft.add(i)

        self.bucketed = FrequencyTable()
        self.bucketed.add(1)
        self.bucketed.add(2, 2)
        self.bucketed.add(3, 3)
Esempio n. 3
0
class TestHistogram(unittest.TestCase):

    def setUp(self):
        self.ranged_fa = FrequencyArray()
        self.ranged_ft = FrequencyTable()
        self.expected_all = {}

        for i in range(10, 0, -1):
            self.ranged_fa.add(i)
            self.ranged_ft.add(i)
            self.expected_all[i] = 1

    def testRangeToHistogram(self):
        histogram = Histogram(1)
        for i in range(10, 0, -1):
            histogram.add(i)

        self.assertEqual(histogram.data, self.expected_all)

    def testArrayToHistogram(self):
        histogram = Histogram(1)
        for i in self.ranged_fa.explode():
            histogram.add(i)

        self.assertEqual(histogram.data, self.expected_all)

    def testTableToHistogram(self):
        histogram = Histogram(1)
        for i in self.ranged_ft.explode():
            histogram.add(i)

        self.assertEqual(histogram.data, self.expected_all)

    def testHistogramBucketWidthOfTwo(self):
        histogram = Histogram(2)
        for i in range(10, 0, -1):
            histogram.add(i)

        self.assertEqual(histogram.data, {0:1, 1:2, 2:2, 3:2, 4:2, 5:1})

    def testHistogramBucketWidthOfTwo(self):
        histogram = Histogram(2)
        for i in range(10, 0, -1):
            histogram.add(i)

        self.assertEqual(histogram.orderedData(), [(0,1), (2,2), (4,2), (6,2), (8,2), (10,1)])

    def testHistogramBucketWidthOfThree(self):
        histogram = Histogram(3)
        for i in range(10, 0, -1):
            histogram.add(i)

        self.assertEqual(histogram.orderedData(), [(0,2), (3,3), (6,3), (9,2)])
Esempio n. 4
0
    def setUp(self):
        self.ranged_fa = FrequencyArray()
        self.ranged_ft = FrequencyTable()
        self.expected_all = {}

        for i in range(10, 0, -1):
            self.ranged_fa.add(i)
            self.ranged_ft.add(i)
            self.expected_all[i] = 1
Esempio n. 5
0
    def testAddingCounts(self):
        self.empty_ft.add(1, 5)

        empty_2 = FrequencyTable()
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)

        self.assertEquals(self.empty_ft.data, {1:5})
        self.assertEquals(empty_2.data, {1:5})
Esempio n. 6
0
class TestFrequencyTable(unittest.TestCase):

    def setUp(self):
        self.empty_ft = FrequencyTable()

        self.range_ft = FrequencyTable()
        for i in range(10, 0, -1):
            self.range_ft.add(i)

        self.bucketed = FrequencyTable()
        self.bucketed.add(1)
        self.bucketed.add(2, 2)
        self.bucketed.add(3, 3)

    def testNoDataToStart(self):
        self.assertEqual(self.empty_ft.data, {})

    def testAddData(self):
        self.empty_ft.add(1)
        self.empty_ft.add(3)

        self.assertEqual(self.empty_ft.data, {1:1, 3:1})

    def testAddDataWithCounts(self):
        self.empty_ft.add(1, 2)
        self.empty_ft.add(3)

        self.assertEqual(self.empty_ft.data, {1:2, 3:1})

    def testOrderedData(self):
        self.assertEqual(self.range_ft.orderedData(), [(1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1), (9, 1), (10, 1)])

    def testOrderedDataBucketed(self):
        self.assertEqual(self.bucketed.orderedData(), [(1,1), (2,2), (3,3)])

    def testMax(self):
        self.assertEqual(self.range_ft.max, 10)

    def testMin(self):
        self.assertEqual(self.range_ft.min, 1)

    def testAverage(self):
        self.assertEqual(self.bucketed.mean(), 14/6.)

    def testModeWithObviousMode(self):
        self.assertEqual(self.bucketed.mode(), 3)

    def testModeWithNoMode(self):
        # first item
        self.assertEqual(self.range_ft.mode(), 1)

    def testExplodedData(self):
        exploded = [i for i in self.bucketed.explode()]
        self.assertEqual(exploded, [1,2,2,3,3,3])

    def testMedianUneven(self):
        self.bucketed.add(1)
        self.assertEqual(self.bucketed.median(), 2)

    def testMedianEven(self):
        self.assertEqual(self.bucketed.median(), 2)
        self.assertEqual(self.range_ft.median(), 5)

    def testFirstPercentile(self):
        self.assertEqual(self.range_ft.percentile(0), 1)

    def testLastPercentile(self):
        self.assertEqual(self.range_ft.percentile(1), 10)

    def testFirstQuantile(self):
        self.assertEqual(self.range_ft.firstQuartile(), 3)

    def testThirdQuantile(self):
        self.assertEqual(self.range_ft.thirdQuartile(), 8)

    def testInterquartileRange(self):
        self.assertEqual(self.range_ft.iqr(), 5)

    def testAddingCounts(self):
        self.empty_ft.add(1, 5)

        empty_2 = FrequencyTable()
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)
        empty_2.add(1)

        self.assertEquals(self.empty_ft.data, {1:5})
        self.assertEquals(empty_2.data, {1:5})

    def testPdfAsMap(self):
        self.assertEquals(self.bucketed.pdfAsMap(), {1:1/6.,2:2/6.,3:.5})

    def testCdfAsMap(self):
        self.assertEquals(self.bucketed.cdfAsMap(), {1:1/6.,2:3/6.,3:1})