Exemple #1
0
    def test_data_source_joins(
            self,
            join_type,  # type: Union[_EmptyNode, str]
            table1,  # type: List[List[int]]
            table2,  # type: List[List[int]]
            result  # type: List[List[int]]
    ):
        # type: (...) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame(table1, columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(
                        pd.DataFrame(table2, columns=['a', 'c']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })
        tokens = tokenize('my_table {} my_table2 {}'.format(
            join_type,
            'USING (a)' if join_type not in (',', 'CROSS JOIN') else ''))
        data_source_node, leftover = apply_rule(data_source, tokens)
        self.assertFalse(leftover)
        assert isinstance(data_source_node, DataSource)
        context = data_source_node.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), result)
        self.assertEqual(
            list(context.table.dataframe),
            ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.c'])
 def test_bigquery_statement(self, statement, type_):
     # type: (str, type) -> None
     tree, leftover = apply_rule(bigquery_statement, tokenize(statement))
     self.assertFalse(leftover)
     assert isinstance(tree, tuple)
     node, unused_semicolon = tree
     self.assertIsInstance(node, type_)
    def test_exists_reference_outer(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1], [4]], columns=['a']),
                                   types=[BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[4], [2]], columns=['b']),
                                   types=[BQScalarType.INTEGER]),
                }
            }
        })
        select_query = "select a from `my_project.my_dataset.my_table` where " \
                       "my_table.a = my_table2.b"
        select_node, leftover = apply_rule(select_rule, tokenize(select_query))
        self.assertFalse(leftover)

        exists = Exists(select_node)

        context = EvaluationContext(table_context)
        context.add_table_from_node(
            TableReference(('my_project', 'my_dataset', 'my_table2')),
            EMPTY_NODE)
        dataframe = exists.evaluate(context)

        self.assertEqual(list(dataframe.series), [True, False])
Exemple #4
0
    def test_data_source_join_on_arbitrary_bool(
            self,
            join_type,  # type: Union[_EmptyNode, str]
            result  # type: List[List[int]]
    ):
        # type: (...) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1], [2]], columns=['a']),
                                   types=[BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[2], [0]], columns=['b']),
                                   types=[BQScalarType.INTEGER])
                }
            }
        })
        tokens = tokenize(
            'my_table {} my_table2 ON MOD(a + b, 3) = 0'.format(join_type))
        data_source_node, leftover = apply_rule(data_source, tokens)
        self.assertFalse(leftover)
        assert isinstance(data_source_node, DataSource)
        context = data_source_node.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), result)
Exemple #5
0
 def test_apply_rule(self, rule,  # type: RuleType
                     tokens,  # type: List[str]
                     result,  # type: AppliedRuleOutputType
                     comment  # type: str
                     ):
     # test: (...) -> None
     self.assertEqual(apply_rule(rule, tokens), result)
 def test_create_table_with_select_mismatched_types(self, query, error):
     # type: (str, str) -> None
     node, leftover = apply_rule(statement_rule, tokenize(query))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     assert isinstance(node, Statement)
     with self.assertRaisesRegexp(ValueError, error):
         node.execute(table_context)
 def test_create_table_with_select(self, statement, columns):
     # type: (str, List[str]) -> None
     node, leftover = apply_rule(statement_rule, tokenize(statement))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertEqual(list(table.dataframe.columns), columns)
     self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
 def test_create_table_already_exists(self):
     # type: () -> None
     node, leftover = apply_rule(statement_rule, tokenize(
         'CREATE TABLE project.dataset.table (a int64, b string);'))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     with self.assertRaisesRegexp(ValueError, 'Already Exists'):
         node.execute(table_context)
         return
Exemple #9
0
    def test_exists(self, select_query, result):
        # type: (str, List[bool]) -> None
        subquery_node, leftover = apply_rule(query_expression, tokenize(select_query))
        assert isinstance(subquery_node, QueryExpression)
        self.assertFalse(leftover)

        exists = Exists(subquery_node)

        context = EvaluationContext(self.small_table_context)
        context.add_table_from_node(TableReference(('my_project', 'my_dataset', 'my_table')),
                                    EMPTY_NODE)
        typed_series = exists.evaluate(context)
        assert isinstance(typed_series, TypedSeries)
        self.assertEqual(list(typed_series.series), result)
 def test_create_table_if_not_exists_and_it_does(self):
     # type: () -> None
     node, leftover = apply_rule(statement_rule, tokenize(
         'CREATE TABLE IF NOT EXISTS project.dataset.table (a int64, b string);'))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertIs(table, original_table)
 def test_create_table(self, statement, already_exists):
     # type: (str, bool) -> None
     node, leftover = apply_rule(statement_rule, tokenize(statement))
     self.assertFalse(leftover)
     table_context = DatasetTableContext({'project': {'dataset': {}}})
     original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']),
                                     [BQScalarType.STRING, BQScalarType.INTEGER,
                                      BQScalarType.BOOLEAN])
     if already_exists:
         table_context.set(('project', 'dataset', 'table'), original_table)
     assert isinstance(node, Statement)
     result = node.execute(table_context)
     self.assertEqual(result.path, ('project', 'dataset', 'table'))
     table, unused_name = table_context.lookup(result.path)
     self.assertEqual(list(table.dataframe.columns), ['a', 'b'])
     self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
 def test_current_timestamp(self):
     # type: () -> None
     node, leftover = apply_rule(
         query_expression,
         tokenize(
             'select current_timestamp(), a from unnest([struct(1 as a), struct(2), struct(3)])'
         ))
     assert isinstance(node, QueryExpression)
     self.assertFalse(leftover)
     result, _ = node.get_dataframe(DatasetTableContext({}))
     table = cast(List[List[datetime.datetime]], result.to_list_of_lists())
     self.assertEqual(len(table), 3)
     # CURRENT_TIMESTAMP() returns a very recent timestamp
     self.assertLess((datetime.datetime.now() - table[0][0]).seconds, 2)
     # All rows have the same timestamp value.
     self.assertEqual(table[0][0], table[1][0])
     self.assertEqual(table[0][0], table[2][0])
    def test_exists_index(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'bool_table':
                    TypedDataFrame(pd.DataFrame([[True], [False]],
                                                columns=['a']),
                                   types=[BQScalarType.BOOLEAN])
                }
            }
        })
        select_query = 'select a = exists(select 1) from `my_project.my_dataset.bool_table`'
        select_node, leftover = apply_rule(select_rule, tokenize(select_query))
        self.assertFalse(leftover)

        result, unused_table_name = select_node.get_dataframe(table_context)

        self.assertEqual(result.to_list_of_lists(), [[True], [False]])
 def test_create_table_grammar(self, statement):
     # type: (str) -> None
     node, leftover = apply_rule(statement_rule, tokenize(statement))
     self.assertFalse(leftover)
     self.assertIsInstance(node, CreateTable)