def h_min(col): """ Return an aggregation for the minimum of the given column. Like the SQL min() function:: select(h_min(employee.salary), employee.department, where=employee) returns the lowest salary in each department. :type col: :class:`hustle.core.marble.Column` :param col: the column to aggregate """ import mdb if col.is_numeric: return Aggregation("min", col, f=lambda a, v: a if a < v else v, default=lambda: 9223372036854775807, result_spec=Column('_min_type', type_indicator=mdb.MDB_INT_32)) else: return Aggregation("min", col, f=lambda a, v: a if a < v else v, default=lambda: unichr(0xFFFF), result_spec=Column('_min_type', type_indicator=mdb.MDB_STR))
def test_is_trie(self): str_column = Column(_NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_STR, compression_indicator=0, rtrie_indicator=mdb.MDB_UINT_16) self.assertTrue(str_column.is_trie) str_column = Column(_NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_INT_16, compression_indicator=0, rtrie_indicator=mdb.MDB_UINT_16) self.assertFalse(str_column.is_trie) str_column = Column(_NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_STR, compression_indicator=1, rtrie_indicator=mdb.MDB_UINT_16) self.assertFalse(str_column.is_trie)
def test_get_effective_inttype(self): c = Column(_NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_UINT_16, compression_indicator=0, rtrie_indicator=mdb.MDB_INT_16) self.assertEqual(c.get_effective_inttype(), mdb.MDB_UINT_16) c.type_indicator = mdb.MDB_STR self.assertEqual(c.get_effective_inttype(), mdb.MDB_INT_16)
def h_count(): """ Return an aggregation for the count of each grouped key in a query. Like SQL count() function:: select(h_count(), employee.department, where=employee) returns a count of the number of employees in each department. """ import mdb return Aggregation("count", Column(name='_count', type_indicator=1), f=lambda a, v: a + v, default=lambda: 0, result_spec=Column('_count_type', type_indicator=mdb.MDB_UINT_32))
def test_is_boolean(self): b_column = Column(_NAME, None, boolean=True) self.assertTrue(b_column.is_int) self.assertTrue(b_column.is_index) self.assertTrue(b_column.is_boolean) self.assertFalse(b_column.is_wide) self.assertFalse(b_column.is_trie)
def test_check_range_query(self): c = Column(_NAME, None, index_indicator=True, partition=False, type_indicator=mdb.MDB_INT_16, compression_indicator=1, rtrie_indicator=None) c < 1 c <= 1 c > 1 c >= 1 c == 1 c != 1
def test_check_range_query_for_partition(self): c = Column(_NAME, None, index_indicator=1, partition=True, type_indicator=mdb.MDB_STR, compression_indicator=1, rtrie_indicator=None) c < "foo" c <= "foo" c > "foo" c >= "foo" c == "foo" c != "foo"
def test_column_errors(self): str_column = Column(_NAME, None, index_indicator=0, partition=False, type_indicator=mdb.MDB_STR, compression_indicator=0, rtrie_indicator=mdb.MDB_UINT_16) with self.assertRaises(TypeError): str_column > 'hello' with self.assertRaises(TypeError): str_column == 'hello'
def h_count(): """ Return an aggregation for the count of each grouped key in a query. Like SQL count() function:: select(h_count(), employee.department, where=employee) returns a count of the number of employees in each department. """ return Aggregation("count", Column('all', None, type_indicator=1), f=lambda a, v: a + (v or 1), default=lambda: 0)
def test_expr_without_partitions(self): cee_vals = Tablet([1, 5, 7, 9, 12, 13, 14, 19, 27, 38]) cee = Column('cee', None, type_indicator=1, index_indicator=1, partition=False) ex = (cee < 8) self.assertEqual(list(ex(cee_vals)), [1, 5, 7]) ex = (cee > 7) self.assertEqual(list(ex(cee_vals)), [9, 12, 13, 14, 19, 27, 38]) ex = (cee <= 7) self.assertEqual(list(ex(cee_vals)), [1, 5, 7]) ex = (cee >= 7) self.assertEqual(list(ex(cee_vals)), [7, 9, 12, 13, 14, 19, 27, 38]) ex = (cee == 7) self.assertEqual(list(ex(cee_vals)), [7]) ex = (cee != 7) self.assertEqual(list(ex(cee_vals)), [1, 5, 9, 12, 13, 14, 19, 27, 38]) # test AND ex = (cee > 7) & (cee < 20) self.assertEqual(list(ex(cee_vals)), [9, 12, 13, 14, 19]) ex = (cee > 7) & (cee < 20) & (cee > 13) self.assertEqual(list(ex(cee_vals)), [14, 19]) # test OR ex = (cee < 7) | (cee > 20) x = sorted(ex(cee_vals)) self.assertEqual(x, [1, 5, 27, 38]) ex = (cee == 7) | (cee == 20) | (cee == 13) self.assertEqual(list(ex(cee_vals)), [7, 13]) # test NOT ex = ~((cee >= 7) & (cee <= 20)) x = sorted(ex(cee_vals)) self.assertEqual(x, [1, 5, 27, 38]) # test NOT ex = ~((cee < 7) | (cee == 19)) x = sorted(ex(cee_vals)) self.assertEqual(x, [7, 9, 12, 13, 14, 27, 38])
def test_check_range_query_for_lz4(self): c = Column(_NAME, None, index_indicator=1, partition=False, type_indicator=mdb.MDB_STR, compression_indicator=2, rtrie_indicator=None) with self.assertRaises(TypeError): c < "foo" with self.assertRaises(TypeError): c <= "foo" with self.assertRaises(TypeError): c > "foo" with self.assertRaises(TypeError): c >= "foo" c == "foo" c != "foo"
def wrap(column): index_indicator = self.index_indicator if self.index_indicator is \ not None else column.index_indicator type_indicator = self.type_indicator if self.type_indicator is \ not None else column.type_indicator rtrie_indicator = self.rtrie_indicator if self.rtrie_indicator is \ not None else column.rtrie_indicator compression_indicator = self.compression_indicator if \ self.compression_indicator is not None else column.compression_indicator is_boolean = self.boolean if self.boolean is not None else column.is_boolean new_column = Column(column.name, column.table, index_indicator, column.partition, type_indicator, compression_indicator, rtrie_indicator, alias=column.alias, boolean=is_boolean, column_fn=fn) return new_column
def h_sum(col): """ Return an aggregation for the sum of the given column. Like SQL sum() function. This is used in :func:`hustle.select` calls to specify the sum aggregation over a column in a query:: select(h_sum(employee.salary), employee.department, where=employee.age > 25) returns the total salaries for each departments employees over 25 years old :type col: :class:`hustle.core.marble.Column` :param col: the column to aggregate """ import mdb return Aggregation("sum", col, f=lambda a, v: a + v, default=lambda: 0, result_spec=Column('_sum_type', type_indicator=mdb.MDB_INT_32))
def h_avg(col): """ Return an aggregation for the average of the given column. Like the SQL avg() function:: select(h_avg(employee.salary), employee.department, where=employee) returns the average salary in each department :type col: :class:`hustle.core.marble.Column` :param col: the column to aggregate """ import mdb return Aggregation("avg", col, f=lambda (a, c), v: (a + v, c + 1), g=lambda (a, c): float(a) / c, default=lambda: (0, 0), result_spec=Column('_avg_type', type_indicator=mdb.MDB_INT_32))
def h_combine(col, separator=','): """ Return a combination of the given column. Like the join function. :type col: :class:`hustle.core.marble.Column` :param col: the column to combine :type separator: strnig :param separator: the separator of the combination """ import mdb import functools func = functools.partial(_h_combine, separator=separator) functools.update_wrapper(func, _h_combine) return Aggregation("combine", col, f=func, default=lambda: None, result_spec=Column('_combine_type', type_indicator=mdb.MDB_STR))
def test_schema_string(self): c = Column(_NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_UINT_16, compression_indicator=0, rtrie_indicator=mdb.MDB_UINT_16) self.assertEqual(c.schema_string(), "%s%s" % ('@2', _NAME)) c.type_indicator = mdb.MDB_INT_16 self.assertEqual(c.schema_string(), "%s%s" % ('#2', _NAME)) c.type_indicator = mdb.MDB_INT_32 self.assertEqual(c.schema_string(), "%s%s" % ('#4', _NAME)) c.type_indicator = mdb.MDB_UINT_32 self.assertEqual(c.schema_string(), "%s%s" % ('@4', _NAME)) c.type_indicator = mdb.MDB_INT_64 self.assertEqual(c.schema_string(), "%s%s" % ('#8', _NAME)) c.type_indicator = mdb.MDB_UINT_64 self.assertEqual(c.schema_string(), "%s%s" % ('@8', _NAME)) c.type_indicator = mdb.MDB_STR c.compression_indicator = 0 self.assertEqual(c.schema_string(), "%s%s" % ('%2', _NAME)) c.rtrie_indicator = mdb.MDB_UINT_32 self.assertEqual(c.schema_string(), "%s%s" % ('%4', _NAME)) c.compression_indicator = 1 self.assertEqual(c.schema_string(), "%s%s" % ('$', _NAME)) c.compression_indicator = 2 self.assertEqual(c.schema_string(), "%s%s" % ('*', _NAME))
def test_expr_with_partitions(self): pee = Column('pee', None, type_indicator=1, index_indicator=1, partition=True) pee_tags = [1, 5, 7, 9, 12, 13, 14, 19, 27, 38] cee = Column('cee', None, type_indicator=1, index_indicator=1, partition=False) p_and_p = (pee < 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5]) p_and_p = (pee > 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [9, 12, 13, 14, 19, 27, 38]) p_and_p = (pee == 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [7]) p_and_p = (pee != 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (pee >= 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (pee <= 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 7]) p_and_p = ~(pee > 7) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 7]) # test pure partition combination p_and_p = (pee > 5) | (pee == 1) self.assertEqual(sorted(p_and_p.partition(pee_tags)), [1, 7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = ~((pee <= 5) | (pee > 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14]) p_and_p = (pee == 5) | (pee == 99) self.assertEqual(list(p_and_p.partition(pee_tags)), [5]) p_and_p = (pee > 5) & (pee <= 14) & (pee > 12) self.assertEqual(list(p_and_p.partition(pee_tags)), [13, 14]) p_and_p = ((pee > 5) & (pee <= 14)) | (pee == 5) x = sorted(p_and_p.partition(pee_tags)) self.assertEqual(x, [5, 7, 9, 12, 13, 14]) p_and_p = ~(~(((pee > 5) & (pee <= 14))) & (pee != 5)) x = sorted(p_and_p.partition(pee_tags)) self.assertEqual(x, [5, 7, 9, 12, 13, 14]) p_and_p = ~(((pee <= 5) | (pee > 14)) & (pee != 5)) x = sorted(p_and_p.partition(pee_tags)) self.assertEqual(x, [5, 7, 9, 12, 13, 14]) # test combined partition/index combinations # p & c == p p_and_p = (pee > 5) & (cee <= 14) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) # test combined partition/index combinations # p & ~c == p p_and_p = (pee > 5) & ~(cee <= 14) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (pee > 5) & ~~(cee <= 14) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) # p & ~c & ~c == p p_and_p = (pee > 5) & ~(cee <= 14) & ~(cee >= 5) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (cee > 5) & ~((pee > 5) & ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5]) p_and_p = (cee > 5) & (~(pee > 5) & ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5]) p_and_p = (cee > 5) & ~(~(pee > 5) & ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (cee > 5) & ~~((pee > 5) & ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) p_and_p = (cee > 5) & ~((pee > 5) | ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags) p_and_p = (cee > 5) & ~~((pee > 5) | ~(cee <= 14)) self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags) # p & ~c | ~c == all p_and_p = (pee > 5) & ~(cee <= 14) | ~(cee >= 5) self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags) # ~c & ~c & p == p p_and_p = ~(cee <= 14) & ~(cee >= 5) & (pee > 5) self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38]) # p | c == universe p_and_p = (pee == 5) | (pee == 8) | (cee == 99) x = list(p_and_p.partition(pee_tags)) self.assertEqual(x, pee_tags) p_and_p = (pee == 5) | (pee == 8) | (cee == 99) x = list(p_and_p.partition(pee_tags)) self.assertEqual(x, pee_tags) # p | c == universe p_and_p = ((pee == 5) | (pee > 14)) | (cee > 12) self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags) # c & p == p ==> p | p p_and_p = ((pee == 5) | (pee > 14)) | ((cee > 12) & (pee == 1)) self.assertEqual(sorted(p_and_p.partition(pee_tags)), [1, 5, 19, 27, 38])
def test_schema_string(self): c = Column( _NAME, None, index_indicator=False, partition=False, type_indicator=mdb.MDB_UINT_16, compression_indicator=0, rtrie_indicator=mdb.MDB_UINT_16, ) self.assertEqual(c.schema_string(), "%s%s" % ("@2", _NAME)) c.type_indicator = mdb.MDB_INT_16 self.assertEqual(c.schema_string(), "%s%s" % ("#2", _NAME)) c.type_indicator = mdb.MDB_INT_32 self.assertEqual(c.schema_string(), "%s%s" % ("#4", _NAME)) c.type_indicator = mdb.MDB_UINT_32 self.assertEqual(c.schema_string(), "%s%s" % ("@4", _NAME)) c.type_indicator = mdb.MDB_INT_64 self.assertEqual(c.schema_string(), "%s%s" % ("#8", _NAME)) c.type_indicator = mdb.MDB_UINT_64 self.assertEqual(c.schema_string(), "%s%s" % ("@8", _NAME)) c.type_indicator = mdb.MDB_STR c.compression_indicator = 0 self.assertEqual(c.schema_string(), "%s%s" % ("%2", _NAME)) c.rtrie_indicator = mdb.MDB_UINT_32 self.assertEqual(c.schema_string(), "%s%s" % ("%4", _NAME)) c.compression_indicator = 1 self.assertEqual(c.schema_string(), "%s%s" % ("$", _NAME)) c.compression_indicator = 2 self.assertEqual(c.schema_string(), "%s%s" % ("*", _NAME))