Ejemplo n.º 1
0
    def test_data_source_join_on_field_comparison(self, condition,
                                                  expected_result):
        # type: (str, List[List[int]]) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame([[1, 9], [2, 8], [2, 1]],
                                     columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(
                        pd.DataFrame([[1, 2], [3, 4]], columns=['c', 'd']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })
        data_source_node, leftover = data_source(
            tokenize(
                'my_project.my_dataset.my_table join my_project.my_dataset.my_table2 on {}'
                .format(condition)))
        self.assertFalse(leftover)
        assert isinstance(data_source_node, DataSource)
        context = data_source_node.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), expected_result)
        self.assertEqual(
            list(context.table.dataframe),
            ['my_table.a', 'my_table.b', 'my_table2.c', 'my_table2.d'])
 def test_complex_type_errors(self, query, error):
     # type: (str, str) -> None
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     with self.assertRaisesRegexp(ValueError, error):
         node.get_dataframe(self.small_table_context)
Ejemplo n.º 3
0
    def test_data_source_join_on_arbitrary_bool(
            self,
            join_type,  # type: Union[_EmptyNode, str]
            result  # type: List[List[int]]
    ):
        # type: (...) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1], [2]], columns=['a']),
                                   types=[BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[2], [0]], columns=['b']),
                                   types=[BQScalarType.INTEGER])
                }
            }
        })
        tokens = tokenize(
            'my_table {} my_table2 ON MOD(a + b, 3) = 0'.format(join_type))
        data_source_node, leftover = apply_rule(data_source, tokens)
        self.assertFalse(leftover)
        assert isinstance(data_source_node, DataSource)
        context = data_source_node.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), result)
    def test_exists_reference_outer(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1], [4]], columns=['a']),
                                   types=[BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[4], [2]], columns=['b']),
                                   types=[BQScalarType.INTEGER]),
                }
            }
        })
        select_query = "select a from `my_project.my_dataset.my_table` where " \
                       "my_table.a = my_table2.b"
        select_node, leftover = apply_rule(select_rule, tokenize(select_query))
        self.assertFalse(leftover)

        exists = Exists(select_node)

        context = EvaluationContext(table_context)
        context.add_table_from_node(
            TableReference(('my_project', 'my_dataset', 'my_table2')),
            EMPTY_NODE)
        dataframe = exists.evaluate(context)

        self.assertEqual(list(dataframe.series), [True, False])
Ejemplo n.º 5
0
 def test_match_order(self):
     """Test tokenization of operators that share characters."""
     self.assertEqual(
         tokenize("SELECT f1 <> 3, f2 << 2, f3 < 5 AND f3 > -1 FROM t"), [
             'SELECT', 'f1', '<>', '3', ',', 'f2', '<<', '2', ',', 'f3',
             '<', '5', 'AND', 'f3', '>', '-', '1', 'FROM', 't'
         ])
Ejemplo n.º 6
0
 def test_negatives(self):
     """Test negative and positive numbers."""
     self.assertEqual(
         tokenize("SELECT -1.23e1, -1e1, -.23e2, 4.0e-7, +5, +3. FROM t"), [
             'SELECT', '-', '1.23e1', ',', '-', '1e1', ',', '-', '.23e2',
             ',', '4.0e-7', ',', '+', '5', ',', '+', '3.', 'FROM', 't'
         ])
 def test_bigquery_statement(self, statement, type_):
     # type: (str, type) -> None
     tree, leftover = apply_rule(bigquery_statement, tokenize(statement))
     self.assertFalse(leftover)
     assert isinstance(tree, tuple)
     node, unused_semicolon = tree
     self.assertIsInstance(node, type_)
Ejemplo n.º 8
0
    def test_data_source_joins(
            self,
            join_type,  # type: Union[_EmptyNode, str]
            table1,  # type: List[List[int]]
            table2,  # type: List[List[int]]
            result  # type: List[List[int]]
    ):
        # type: (...) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame(table1, columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(
                        pd.DataFrame(table2, columns=['a', 'c']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })
        tokens = tokenize('my_table {} my_table2 {}'.format(
            join_type,
            'USING (a)' if join_type not in (',', 'CROSS JOIN') else ''))
        data_source_node, leftover = apply_rule(data_source, tokens)
        self.assertFalse(leftover)
        assert isinstance(data_source_node, DataSource)
        context = data_source_node.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), result)
        self.assertEqual(
            list(context.table.dataframe),
            ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.c'])
Ejemplo n.º 9
0
    def test_select_star(self, select, expected_result):
        # type: (str, List[List[int]]) -> None
        group_table_context = DatasetTableContext({
            'p': {
                'd': {
                    'table1':
                    TypedDataFrame(
                        pd.DataFrame([[2, 8, 4], [6, 3, 0], [12, 10, 1]],
                                     columns=['a', 'b', 'c']),
                        types=[
                            BQScalarType.INTEGER, BQScalarType.INTEGER,
                            BQScalarType.INTEGER
                        ]),
                    'table2':
                    TypedDataFrame(
                        pd.DataFrame([[2, 7, 3], [6, 2, -1], [12, 9, 0]],
                                     columns=['a', 'd', 'e']),
                        types=[
                            BQScalarType.INTEGER, BQScalarType.INTEGER,
                            BQScalarType.INTEGER
                        ]),
                }
            }
        })

        select_node, leftover = select_rule(tokenize(select))
        assert isinstance(select_node, Select)
        dataframe, unused_table_name = select_node.get_dataframe(
            group_table_context)
        self.assertFalse(leftover)
        self.assertEqual(dataframe.to_list_of_lists(), expected_result)
 def test_create_table_with_select_mismatched_types(self, query, error):
     # type: (str, str) -> None
     node, leftover = apply_rule(statement_rule, tokenize(query))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     assert isinstance(node, Statement)
     with self.assertRaisesRegexp(ValueError, error):
         node.execute(table_context)
Ejemplo n.º 11
0
 def test_with_clause_error(self, query_expression, error):
     # type: (str, str) -> None
     query_expression_node, leftover = query_expression_rule(
         tokenize(query_expression))
     self.assertFalse(leftover)
     assert isinstance(query_expression_node, QueryExpression)
     with self.assertRaisesRegexp(ValueError, error):
         query_expression_node.get_dataframe(self.table_context)
Ejemplo n.º 12
0
 def test_struct_field_and_constant(self, query, expected_result, expected_types):
     # type: (str, List[List[Tuple[Optional[int], ...]]], Sequence[BQStructType]) -> None
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     result, unused_table_name = node.get_dataframe(self.small_table_context)
     self.assertEqual(result.to_list_of_lists(), expected_result)
     self.assertEqual(result.types, expected_types)
Ejemplo n.º 13
0
 def test_with_clause(self, query_expression, expected_result):
     # type: (str, List[List[int]]) -> None
     query_expression_node, leftover = query_expression_rule(
         tokenize(query_expression))
     self.assertFalse(leftover)
     assert isinstance(query_expression_node, QueryExpression)
     dataframe, _ = query_expression_node.get_dataframe(self.table_context)
     self.assertEqual(dataframe.to_list_of_lists(), expected_result)
Ejemplo n.º 14
0
    def test_unnest_error(self, query, expected_error):
        # type: (str, str) -> None
        node, leftover = query_expression_rule(tokenize(query))

        self.assertFalse(leftover)
        assert isinstance(node, QueryExpression)

        with self.assertRaisesRegexp(ValueError, expected_error):
            node.get_dataframe(TableContext())
Ejemplo n.º 15
0
 def test_like_order_by(self):
     """Test tokenization of operators like WHERE, LIKE, ORDER BY."""
     self.assertEqual(
         tokenize(
             "SELECT * FROM mytable WHERE field1 LIKE 'a%' ORDER BY field2"
         ), [
             'SELECT', '*', 'FROM', 'mytable', 'WHERE', 'field1', 'LIKE',
             "'a%'", 'ORDER', 'BY', 'field2'
         ])
 def test_complex_types(self, query, expected_result, expected_type):
     # type: (str, Tuple[Optional[int], ...], BQType) -> None
     table_context = DatasetTableContext({})
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     result, unused_table_name = node.get_dataframe(table_context)
     self.assertEqual(result.to_list_of_lists(), [[expected_result]])
     self.assertEqual(result.types, [expected_type])
 def test_create_table_with_select(self, statement, columns):
     # type: (str, List[str]) -> None
     node, leftover = apply_rule(statement_rule, tokenize(statement))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertEqual(list(table.dataframe.columns), columns)
     self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
Ejemplo n.º 18
0
    def test_unnest(self, query, result, result_columns):
        # type: (str, List[List[Any]], List[str]) -> None
        node, leftover = query_expression_rule(tokenize(query))

        self.assertFalse(leftover)
        assert isinstance(node, QueryExpression)

        dataframe, _ = node.get_dataframe(TableContext())

        self.assertEqual(dataframe.to_list_of_lists(), result)
        self.assertEqual(list(dataframe.dataframe.columns), result_columns)
Ejemplo n.º 19
0
    def test_scalar_expressions(self, expression, expected_result):
        # type: (str, Any) -> None
        tokens = tokenize(expression)

        ast, leftover = apply_rule(expression_rule, tokens)
        self.assertFalse(leftover, 'leftover {}'.format(leftover))

        assert isinstance(ast, EvaluatableNode)
        typed_series = ast.evaluate(EMPTY_CONTEXT)
        assert isinstance(typed_series, TypedSeries)
        self.assertEqual(typed_series.to_list(), [expected_result])
Ejemplo n.º 20
0
    def test_array_agg_arguments(self, query, expected_result):
        # type: (str, Tuple[Optional[int], ...]) -> None
        table_context = DatasetTableContext(
            {'p': {'d': {'t':
                         TypedDataFrame(pd.DataFrame([[1], [1], [2], [None]], columns=['a']),
                                        types=[BQScalarType.INTEGER])}}})

        node, leftover = select_rule(tokenize(query + ' FROM p.d.t'))
        self.assertFalse(leftover)
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertEqual(result.to_list_of_lists(), [[expected_result]])
Ejemplo n.º 21
0
    def test_non_aggregate_function_in_group_by(self):
        table_context = DatasetTableContext(
            {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
                pd.DataFrame([['one', '1'], ['two', '1'], ['three', '2'], ['four', '2']],
                             columns=['a', 'b']),
                types=[BQScalarType.STRING, BQScalarType.INTEGER])}}})

        tokens = tokenize('select max(concat(b, "hi")) from my_table group by b')
        node, leftover = select_rule(tokens)
        self.assertFalse(leftover)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertEqual(result.to_list_of_lists(), [['1hi'], ['2hi']])
Ejemplo n.º 22
0
 def test_analytic_function_with_group_by(self, selectors, expected_result):
     table_context = DatasetTableContext(
         {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
             pd.DataFrame([[20, 2], [10, 2], [30, 3], [31, 3], [32, 3]], columns=['a', 'b']),
             types=[BQScalarType.INTEGER, BQScalarType.INTEGER])}}})
     tokens = tokenize('select {} from my_table group by b'.format(selectors))
     node, leftover = select_rule(tokens)
     result, unused_table_name = node.get_dataframe(table_context)
     self.assertFalse(leftover)
     # Note: BQ docs say if ORDER BY clause (for the select as a whole) is not present, order of
     # results is undefined, so we do not assert on the order.
     six.assertCountEqual(self, result.to_list_of_lists(), expected_result)
Ejemplo n.º 23
0
 def test_join(self):
     """Test tokenization of syntax related to JOIN."""
     self.assertEqual(
         tokenize('''SELECT * FROM (SELECT field1, field2 FROM table1) AS t1
                JOIN (SELECT field3, field4 FROM table2) AS t2 ON
                t1.field1=t2.field2
             '''), [
             'SELECT', '*', 'FROM', '(', 'SELECT', 'field1', ',', 'field2',
             'FROM', 'table1', ')', 'AS', 't1', 'JOIN', '(', 'SELECT',
             'field3', ',', 'field4', 'FROM', 'table2', ')', 'AS', 't2',
             'ON', 't1', '.', 'field1', '=', 't2', '.', 'field2'
         ])
Ejemplo n.º 24
0
    def test_aggregate_functions_in_group_by(self, selectors, expected_result):
        # type: (str, List[List[int]]) -> None
        table_context = DatasetTableContext(
            {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
                pd.DataFrame([[2, 1], [4, 1], [5, 2], [np.nan, 2]], columns=['a', 'b']),
                types=[BQScalarType.INTEGER, BQScalarType.INTEGER])}}})

        tokens = tokenize('select {} from my_table group by b'.format(selectors))
        node, leftover = select_rule(tokens)
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertFalse(leftover)
        self.assertEqual(result.to_list_of_lists(), expected_result)
Ejemplo n.º 25
0
    def test_exists(self, select_query, result):
        # type: (str, List[bool]) -> None
        subquery_node, leftover = apply_rule(query_expression, tokenize(select_query))
        assert isinstance(subquery_node, QueryExpression)
        self.assertFalse(leftover)

        exists = Exists(subquery_node)

        context = EvaluationContext(self.small_table_context)
        context.add_table_from_node(TableReference(('my_project', 'my_dataset', 'my_table')),
                                    EMPTY_NODE)
        typed_series = exists.evaluate(context)
        assert isinstance(typed_series, TypedSeries)
        self.assertEqual(list(typed_series.series), result)
 def test_create_table_already_exists(self):
     # type: () -> None
     node, leftover = apply_rule(statement_rule, tokenize(
         'CREATE TABLE project.dataset.table (a int64, b string);'))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     with self.assertRaisesRegexp(ValueError, 'Already Exists'):
         node.execute(table_context)
         return
 def test_create_table_if_not_exists_and_it_does(self):
     # type: () -> None
     node, leftover = apply_rule(statement_rule, tokenize(
         'CREATE TABLE IF NOT EXISTS project.dataset.table (a int64, b string);'))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertIs(table, original_table)
Ejemplo n.º 28
0
 def test_general_tokenization(self):
     """Test tokenization of numbers, strings, variables, comments, lists."""
     self.assertEqual(
         tokenize(
             '''SELECT 34.25, .92, 14., 78.23e-24, 12+23* hello as foo_bar0234, "nope", -- first
                ARRAY<INT64>[1,2]
                FROM `myproject.mydataset.mytable` -- more stuff
                WHERE goodbye <= False
             '''),
         [
             'SELECT', '34.25', ',', '.92', ',', '14.', ',', '78.23e-24',
             ',', '12', '+', '23', '*', 'hello', 'as', 'foo_bar0234', ',',
             '"nope"', ',', 'ARRAY', '<', 'INT64', '>', '[', '1', ',', '2',
             ']', 'FROM', '`myproject.mydataset.mytable`', 'WHERE',
             'goodbye', '<=', 'False'
         ])
 def test_create_table(self, statement, already_exists):
     # type: (str, bool) -> None
     node, leftover = apply_rule(statement_rule, tokenize(statement))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     if already_exists:
         table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertEqual(list(table.dataframe.columns), ['a', 'b'])
     self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
Ejemplo n.º 30
0
 def test_select_distinct(self, select, expected_result):
     # type: (str, List[List[int]]) -> None
     table_context = DatasetTableContext({
         'my_project': {
             'my_dataset': {
                 'my_table':
                 TypedDataFrame(
                     pd.DataFrame([[1, 2], [1, 3]], columns=['a', 'b']),
                     types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
             }
         }
     })
     select_node, leftover = select_rule(tokenize(select))
     assert isinstance(select_node, Select)
     dataframe, unused_table_name = select_node.get_dataframe(table_context)
     self.assertFalse(leftover)
     self.assertEqual(dataframe.to_list_of_lists(), expected_result)