Beispiel #1
0
    def test_expression_column_names(self):
        expr = Expression('a.a', '==', 'b.b')
        expected_column_names = set([ColumnName('a.a'), ColumnName('b.b')])
        self.assertEqual(expected_column_names, expr.column_names)

        expr = Expression('a.a', '==', '1')
        expected_column_names = set([ColumnName('a.a')])
        self.assertEqual(expected_column_names, expr.column_names)
Beispiel #2
0
 def test_condition_column_names(self):
     boolean_condition = AndList([
         Expression('a.a', '==', 'b.b'),
         Expression('a.a', '==', '1'),
         Expression('1', '==', 'c.c'),
         Expression('d.d', '==', '1'),
     ])
     expected_column_names = set([
         ColumnName('a.a'), ColumnName('b.b'), ColumnName('c.c'), ColumnName('d.d')
     ])
     self.assertEqual(expected_column_names, boolean_condition.column_names)
Beispiel #3
0
 def test_subset_rows(self):
 
     conditions = [
         [Expression('col_b', '==', '1'), 'or', Expression('col_a', '==', '2')]
     ]
     self.table_a.subset_rows(conditions)
    
     cmds_actual = self.table_a.cmds
     cmds_expected = [
         'echo -e "1,1\n2,3\n3,2"',
         "awk -F',' 'OFS=\",\" { if (($2 == 1 || $1 == 2)) { print $1,$2 } }'"]
     self.assertEqual(cmds_actual, cmds_expected)
Beispiel #4
0
    def test_classify_conditions(self):
        aliases = {'a': 'a', 'alpha': 'a', 'b': 'b', 'c': 'c'}
        conditions = [
            {
                'left_operand': 'a.col1',
                'operator': '=',
                'right_operand': 'b.col1'
            },
            'and',
            {
                'left_operand': 'a.col1',
                'operator': '=',
                'right_operand': 5
            },
            'and',
            {
                'left_operand': 'a.col1',
                'operator': '=',
                'right_operand': 'c.col1'
            },
            'and',
            {
                'left_operand': 'a.col1',
                'operator': '=',
                'right_operand': 'a.col2'
            },
        ]

        expected_join_conditions = [
            Expression(ColumnName('a.col1'), '=', ColumnName('b.col1')),
            Expression(ColumnName('a.col1'), '=', ColumnName('c.col1')),
        ]
        expected_where_conditions = [
            Expression(ColumnName('a.col1'), '=', 5),
            Expression(ColumnName('a.col1'), '=', ColumnName('a.col2')),
        ]

        actual_join_conditions, actual_where_conditions = classify_conditions(
            conditions)
        self.assertEqual(actual_join_conditions, expected_join_conditions)
        self.assertEqual(actual_where_conditions, expected_where_conditions)
Beispiel #5
0
    def test_build_graph(self):
        relations = [
            {
                'path': 'a.txt',
                'alias': 'a'
            },
            {
                'path': 'b.txt',
                'alias': 'b'
            },
            {
                'path': 'c.txt',
                'alias': 'c'
            },
        ]
        tables = [Table(r['path'], alias=r['alias']) for r in relations]
        aliases = {'a': 'a', 'alpha': 'a', 'b': 'b', 'c': 'c'}
        conditions = [
            Expression(ColumnName('a.col1'), '=', ColumnName('b.col1')),
            Expression(ColumnName('a.col1'), '=', ColumnName('c.col1')),
        ]

        expected_graph = {
            'a': {
                'idx': 0,
                'neighbors': set(['b', 'c']),
            },
            'b': {
                'idx': 1,
                'neighbors': set(['a']),
            },
            'c': {
                'idx': 2,
                'neighbors': set(['a']),
            },
        }

        actual_graph = build_graph(tables, conditions)
        self.assertEqual(actual_graph, expected_graph)
Beispiel #6
0
    def test_stage_conditions(self):

        conditions = [
            Expression('table_a.col_a', '==', 'table_b.col_z'),
            Expression('table_a.col_a', '==', 'table_a.col_b'),
            OrList([
                Expression('table_a.col_a', '==', 'table_b.col_z'),
                Expression('table_a.col_a', '==', 'table_a.col_b')
            ]),
        ]

        expected_condition_order = [
            [Expression('table_a.col_a', '==', 'table_a.col_b')],
            [
                Expression('table_a.col_a', '==', 'table_b.col_z'),
                OrList([
                    Expression('table_a.col_a', '==', 'table_b.col_z'),
                    Expression('table_a.col_a', '==', 'table_a.col_b')
                ])
            ],
        ]
        actual_condition_order = stage_conditions([self.table_a, self.table_b], conditions)
        self.assertEqual(expected_condition_order, actual_condition_order)
Beispiel #7
0
    def test_condition_applies(self):

        condition = AndList([
            Expression('table_a.col_a', '==', 'table_b.col_z'),
            Expression('table_a.col_b', '==', 'table_b.col_a')
        ])
        self.assertTrue(condition_applies(condition, self.table_a, self.table_b))

        condition = OrList([
            Expression('table_a.col_a', '==', 'table_b.col_z'),
            Expression('table_a.col_b', '==', 'table_b.col_a')
        ])
        self.assertTrue(condition_applies(condition, self.table_a, self.table_b))

        condition = OrList([
            Expression('table_a.col_a', '==', 'table_b.col_z'),
            Expression('table_a.col_b', '==', 'table_b.col_a')
        ])
        self.assertFalse(condition_applies(condition, self.table_a))

        condition = AndList([
            Expression('table_a.col_a', '==', 'table_b.col_z'),
            Expression('table_c.col_b', '==', 'table_b.col_a')
        ])
        self.assertFalse(condition_applies(condition, self.table_a, self.table_b))

        condition = OrList([
            Expression('table_a.col_a', '==', 'table_b.col_j'),
            Expression('table_a.col_b', '==', 'table_b.col_a')
        ])
        self.assertFalse(condition_applies(condition, self.table_a, self.table_b))

        condition = AndList([
            Expression('table_a.col_a', '==', '1'),
            Expression('table_a.col_b', '==', 'table_b.col_a')
        ])
        self.assertTrue(condition_applies(condition, self.table_a, self.table_b))

        with self.assertRaises(AmbiguousColumnNameError):
            condition = AndList([
                Expression('table_a.col_a', '==', '1'),
                Expression('table_a.col_b', '==', 'col_a')
            ])
            condition_applies(condition, self.table_a, self.table_b)