def test_data_source_joins( self, join_type, # type: Union[_EmptyNode, str] table1, # type: List[List[int]] table2, # type: List[List[int]] result # type: List[List[int]] ): # type: (...) -> None table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame( pd.DataFrame(table1, columns=['a', 'b']), types=[BQScalarType.INTEGER, BQScalarType.INTEGER]), 'my_table2': TypedDataFrame( pd.DataFrame(table2, columns=['a', 'c']), types=[BQScalarType.INTEGER, BQScalarType.INTEGER]) } } }) tokens = tokenize('my_table {} my_table2 {}'.format( join_type, 'USING (a)' if join_type not in (',', 'CROSS JOIN') else '')) data_source_node, leftover = apply_rule(data_source, tokens) self.assertFalse(leftover) assert isinstance(data_source_node, DataSource) context = data_source_node.create_context(table_context) self.assertEqual(context.table.to_list_of_lists(), result) self.assertEqual( list(context.table.dataframe), ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.c'])
def test_bigquery_statement(self, statement, type_): # type: (str, type) -> None tree, leftover = apply_rule(bigquery_statement, tokenize(statement)) self.assertFalse(leftover) assert isinstance(tree, tuple) node, unused_semicolon = tree self.assertIsInstance(node, type_)
def test_exists_reference_outer(self): table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame(pd.DataFrame([[1], [4]], columns=['a']), types=[BQScalarType.INTEGER]), 'my_table2': TypedDataFrame(pd.DataFrame([[4], [2]], columns=['b']), types=[BQScalarType.INTEGER]), } } }) select_query = "select a from `my_project.my_dataset.my_table` where " \ "my_table.a = my_table2.b" select_node, leftover = apply_rule(select_rule, tokenize(select_query)) self.assertFalse(leftover) exists = Exists(select_node) context = EvaluationContext(table_context) context.add_table_from_node( TableReference(('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE) dataframe = exists.evaluate(context) self.assertEqual(list(dataframe.series), [True, False])
def test_data_source_join_on_arbitrary_bool( self, join_type, # type: Union[_EmptyNode, str] result # type: List[List[int]] ): # type: (...) -> None table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame(pd.DataFrame([[1], [2]], columns=['a']), types=[BQScalarType.INTEGER]), 'my_table2': TypedDataFrame(pd.DataFrame([[2], [0]], columns=['b']), types=[BQScalarType.INTEGER]) } } }) tokens = tokenize( 'my_table {} my_table2 ON MOD(a + b, 3) = 0'.format(join_type)) data_source_node, leftover = apply_rule(data_source, tokens) self.assertFalse(leftover) assert isinstance(data_source_node, DataSource) context = data_source_node.create_context(table_context) self.assertEqual(context.table.to_list_of_lists(), result)
def test_apply_rule(self, rule, # type: RuleType tokens, # type: List[str] result, # type: AppliedRuleOutputType comment # type: str ): # test: (...) -> None self.assertEqual(apply_rule(rule, tokens), result)
def test_create_table_with_select_mismatched_types(self, query, error): # type: (str, str) -> None node, leftover = apply_rule(statement_rule, tokenize(query)) self.assertFalse(leftover) table_context = DatasetTableContext({'project': {'dataset': {}}}) assert isinstance(node, Statement) with self.assertRaisesRegexp(ValueError, error): node.execute(table_context)
def test_create_table_with_select(self, statement, columns): # type: (str, List[str]) -> None node, leftover = apply_rule(statement_rule, tokenize(statement)) self.assertFalse(leftover) table_context = DatasetTableContext({'project': {'dataset': {}}}) assert isinstance(node, Statement) result = node.execute(table_context) self.assertEqual(result.path, ('project', 'dataset', 'table')) table, unused_name = table_context.lookup(result.path) self.assertEqual(list(table.dataframe.columns), columns) self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
def test_create_table_already_exists(self): # type: () -> None node, leftover = apply_rule(statement_rule, tokenize( 'CREATE TABLE project.dataset.table (a int64, b string);')) self.assertFalse(leftover) table_context = DatasetTableContext({'project': {'dataset': {}}}) original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']), [BQScalarType.STRING, BQScalarType.INTEGER, BQScalarType.BOOLEAN]) table_context.set(('project', 'dataset', 'table'), original_table) assert isinstance(node, Statement) with self.assertRaisesRegexp(ValueError, 'Already Exists'): node.execute(table_context) return
def test_exists(self, select_query, result): # type: (str, List[bool]) -> None subquery_node, leftover = apply_rule(query_expression, tokenize(select_query)) assert isinstance(subquery_node, QueryExpression) self.assertFalse(leftover) exists = Exists(subquery_node) context = EvaluationContext(self.small_table_context) context.add_table_from_node(TableReference(('my_project', 'my_dataset', 'my_table')), EMPTY_NODE) typed_series = exists.evaluate(context) assert isinstance(typed_series, TypedSeries) self.assertEqual(list(typed_series.series), result)
def test_create_table_if_not_exists_and_it_does(self): # type: () -> None node, leftover = apply_rule(statement_rule, tokenize( 'CREATE TABLE IF NOT EXISTS project.dataset.table (a int64, b string);')) self.assertFalse(leftover) table_context = DatasetTableContext({'project': {'dataset': {}}}) original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']), [BQScalarType.STRING, BQScalarType.INTEGER, BQScalarType.BOOLEAN]) table_context.set(('project', 'dataset', 'table'), original_table) assert isinstance(node, Statement) result = node.execute(table_context) self.assertEqual(result.path, ('project', 'dataset', 'table')) table, unused_name = table_context.lookup(result.path) self.assertIs(table, original_table)
def test_create_table(self, statement, already_exists): # type: (str, bool) -> None node, leftover = apply_rule(statement_rule, tokenize(statement)) self.assertFalse(leftover) table_context = DatasetTableContext({'project': {'dataset': {}}}) original_table = TypedDataFrame(pd.DataFrame([], columns=['x', 'y', 'z']), [BQScalarType.STRING, BQScalarType.INTEGER, BQScalarType.BOOLEAN]) if already_exists: table_context.set(('project', 'dataset', 'table'), original_table) assert isinstance(node, Statement) result = node.execute(table_context) self.assertEqual(result.path, ('project', 'dataset', 'table')) table, unused_name = table_context.lookup(result.path) self.assertEqual(list(table.dataframe.columns), ['a', 'b']) self.assertEqual(table.types, [BQScalarType.INTEGER, BQScalarType.STRING])
def test_current_timestamp(self): # type: () -> None node, leftover = apply_rule( query_expression, tokenize( 'select current_timestamp(), a from unnest([struct(1 as a), struct(2), struct(3)])' )) assert isinstance(node, QueryExpression) self.assertFalse(leftover) result, _ = node.get_dataframe(DatasetTableContext({})) table = cast(List[List[datetime.datetime]], result.to_list_of_lists()) self.assertEqual(len(table), 3) # CURRENT_TIMESTAMP() returns a very recent timestamp self.assertLess((datetime.datetime.now() - table[0][0]).seconds, 2) # All rows have the same timestamp value. self.assertEqual(table[0][0], table[1][0]) self.assertEqual(table[0][0], table[2][0])
def test_exists_index(self): table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'bool_table': TypedDataFrame(pd.DataFrame([[True], [False]], columns=['a']), types=[BQScalarType.BOOLEAN]) } } }) select_query = 'select a = exists(select 1) from `my_project.my_dataset.bool_table`' select_node, leftover = apply_rule(select_rule, tokenize(select_query)) self.assertFalse(leftover) result, unused_table_name = select_node.get_dataframe(table_context) self.assertEqual(result.to_list_of_lists(), [[True], [False]])
def test_create_table_grammar(self, statement): # type: (str) -> None node, leftover = apply_rule(statement_rule, tokenize(statement)) self.assertFalse(leftover) self.assertIsInstance(node, CreateTable)