예제 #1
0
from odps.df.tools.lib import HyperLogLog
from odps.tests.core import TestBase

class Test(TestBase):
    def testHLL(self):
        hll = HyperLogLog(0.05)
        buf = hll.buffer()

        for i in xrange(10000):
            hll(buf, str(i))

        self.assertAlmostEqual(hll.getvalue(buf) / float(10000), 1, delta=0.1)

        for i in xrange(100000, 200000):
            hll(buf, str(i))

        self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2)

        buf2 = hll.buffer()

        for i in xrange(10000):
            hll(buf2, str(i))

        hll.merge(buf, buf2)

        self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2)


if __name__ == '__main__':
    unittest.main()
        self.assertTrue(expr._sort_fields[0]._ascending)
        self.assertFalse(expr._sort_fields[1]._ascending)

        expr = self.expr.map_reduce(mapper,
                                    reducer,
                                    group='name',
                                    sort=['rating', 'id'],
                                    ascending=[False, True])

        self.assertEqual(expr.schema.names, ['name', 'rating'])
        self.assertEqual(len(expr._sort_fields), 3)
        self.assertTrue(expr._sort_fields[0]._ascending)
        self.assertFalse(expr._sort_fields[1]._ascending)
        self.assertTrue(expr._sort_fields[2]._ascending)

        expr = self.expr.map_reduce(mapper,
                                    reducer,
                                    group='name',
                                    sort=['rating', 'id'],
                                    ascending=False)

        self.assertEqual(expr.schema.names, ['name', 'rating'])
        self.assertEqual(len(expr._sort_fields), 3)
        self.assertTrue(expr._sort_fields[0]._ascending)
        self.assertFalse(expr._sort_fields[1]._ascending)
        self.assertFalse(expr._sort_fields[2]._ascending)


if __name__ == '__main__':
    unittest.main()