from odps.df.tools.lib import HyperLogLog from odps.tests.core import TestBase class Test(TestBase): def testHLL(self): hll = HyperLogLog(0.05) buf = hll.buffer() for i in xrange(10000): hll(buf, str(i)) self.assertAlmostEqual(hll.getvalue(buf) / float(10000), 1, delta=0.1) for i in xrange(100000, 200000): hll(buf, str(i)) self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2) buf2 = hll.buffer() for i in xrange(10000): hll(buf2, str(i)) hll.merge(buf, buf2) self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2) if __name__ == '__main__': unittest.main()
self.assertTrue(expr._sort_fields[0]._ascending) self.assertFalse(expr._sort_fields[1]._ascending) expr = self.expr.map_reduce(mapper, reducer, group='name', sort=['rating', 'id'], ascending=[False, True]) self.assertEqual(expr.schema.names, ['name', 'rating']) self.assertEqual(len(expr._sort_fields), 3) self.assertTrue(expr._sort_fields[0]._ascending) self.assertFalse(expr._sort_fields[1]._ascending) self.assertTrue(expr._sort_fields[2]._ascending) expr = self.expr.map_reduce(mapper, reducer, group='name', sort=['rating', 'id'], ascending=False) self.assertEqual(expr.schema.names, ['name', 'rating']) self.assertEqual(len(expr._sort_fields), 3) self.assertTrue(expr._sort_fields[0]._ascending) self.assertFalse(expr._sort_fields[1]._ascending) self.assertFalse(expr._sort_fields[2]._ascending) if __name__ == '__main__': unittest.main()