def test_expression_column_names(self): expr = Expression('a.a', '==', 'b.b') expected_column_names = set([ColumnName('a.a'), ColumnName('b.b')]) self.assertEqual(expected_column_names, expr.column_names) expr = Expression('a.a', '==', '1') expected_column_names = set([ColumnName('a.a')]) self.assertEqual(expected_column_names, expr.column_names)
def test_condition_column_names(self): boolean_condition = AndList([ Expression('a.a', '==', 'b.b'), Expression('a.a', '==', '1'), Expression('1', '==', 'c.c'), Expression('d.d', '==', '1'), ]) expected_column_names = set([ ColumnName('a.a'), ColumnName('b.b'), ColumnName('c.c'), ColumnName('d.d') ]) self.assertEqual(expected_column_names, boolean_condition.column_names)
def test_subset_rows(self): conditions = [ [Expression('col_b', '==', '1'), 'or', Expression('col_a', '==', '2')] ] self.table_a.subset_rows(conditions) cmds_actual = self.table_a.cmds cmds_expected = [ 'echo -e "1,1\n2,3\n3,2"', "awk -F',' 'OFS=\",\" { if (($2 == 1 || $1 == 2)) { print $1,$2 } }'"] self.assertEqual(cmds_actual, cmds_expected)
def test_classify_conditions(self): aliases = {'a': 'a', 'alpha': 'a', 'b': 'b', 'c': 'c'} conditions = [ { 'left_operand': 'a.col1', 'operator': '=', 'right_operand': 'b.col1' }, 'and', { 'left_operand': 'a.col1', 'operator': '=', 'right_operand': 5 }, 'and', { 'left_operand': 'a.col1', 'operator': '=', 'right_operand': 'c.col1' }, 'and', { 'left_operand': 'a.col1', 'operator': '=', 'right_operand': 'a.col2' }, ] expected_join_conditions = [ Expression(ColumnName('a.col1'), '=', ColumnName('b.col1')), Expression(ColumnName('a.col1'), '=', ColumnName('c.col1')), ] expected_where_conditions = [ Expression(ColumnName('a.col1'), '=', 5), Expression(ColumnName('a.col1'), '=', ColumnName('a.col2')), ] actual_join_conditions, actual_where_conditions = classify_conditions( conditions) self.assertEqual(actual_join_conditions, expected_join_conditions) self.assertEqual(actual_where_conditions, expected_where_conditions)
def test_build_graph(self): relations = [ { 'path': 'a.txt', 'alias': 'a' }, { 'path': 'b.txt', 'alias': 'b' }, { 'path': 'c.txt', 'alias': 'c' }, ] tables = [Table(r['path'], alias=r['alias']) for r in relations] aliases = {'a': 'a', 'alpha': 'a', 'b': 'b', 'c': 'c'} conditions = [ Expression(ColumnName('a.col1'), '=', ColumnName('b.col1')), Expression(ColumnName('a.col1'), '=', ColumnName('c.col1')), ] expected_graph = { 'a': { 'idx': 0, 'neighbors': set(['b', 'c']), }, 'b': { 'idx': 1, 'neighbors': set(['a']), }, 'c': { 'idx': 2, 'neighbors': set(['a']), }, } actual_graph = build_graph(tables, conditions) self.assertEqual(actual_graph, expected_graph)
def test_stage_conditions(self): conditions = [ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_a', '==', 'table_a.col_b'), OrList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_a', '==', 'table_a.col_b') ]), ] expected_condition_order = [ [Expression('table_a.col_a', '==', 'table_a.col_b')], [ Expression('table_a.col_a', '==', 'table_b.col_z'), OrList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_a', '==', 'table_a.col_b') ]) ], ] actual_condition_order = stage_conditions([self.table_a, self.table_b], conditions) self.assertEqual(expected_condition_order, actual_condition_order)
def test_condition_applies(self): condition = AndList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_b', '==', 'table_b.col_a') ]) self.assertTrue(condition_applies(condition, self.table_a, self.table_b)) condition = OrList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_b', '==', 'table_b.col_a') ]) self.assertTrue(condition_applies(condition, self.table_a, self.table_b)) condition = OrList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_a.col_b', '==', 'table_b.col_a') ]) self.assertFalse(condition_applies(condition, self.table_a)) condition = AndList([ Expression('table_a.col_a', '==', 'table_b.col_z'), Expression('table_c.col_b', '==', 'table_b.col_a') ]) self.assertFalse(condition_applies(condition, self.table_a, self.table_b)) condition = OrList([ Expression('table_a.col_a', '==', 'table_b.col_j'), Expression('table_a.col_b', '==', 'table_b.col_a') ]) self.assertFalse(condition_applies(condition, self.table_a, self.table_b)) condition = AndList([ Expression('table_a.col_a', '==', '1'), Expression('table_a.col_b', '==', 'table_b.col_a') ]) self.assertTrue(condition_applies(condition, self.table_a, self.table_b)) with self.assertRaises(AmbiguousColumnNameError): condition = AndList([ Expression('table_a.col_a', '==', '1'), Expression('table_a.col_b', '==', 'col_a') ]) condition_applies(condition, self.table_a, self.table_b)