def setUp(self): self.con = MockConnection() self.table = self.con.table('functional_alltypes') self.i8 = self.table.tinyint_col self.i16 = self.table.smallint_col self.i32 = self.table.int_col self.i64 = self.table.bigint_col self.d = self.table.double_col self.f = self.table.float_col self.s = self.table.string_col self.b = self.table.bool_col self.t = self.table.timestamp_col self.dec = self.con.table('tpch_customer').c_acctbal self.all_cols = [ self.i8, self.i16, self.i32, self.i64, self.d, self.f, self.dec, self.s, self.b, self.t, ]
def test_ctas_ddl(self): con = MockConnection() select = build_ast(con.table('test1')).queries[0] statement = ksupport.CTASKudu( 'another_table', 'kudu_name', ['dom.d.com:7051'], select, ['string_col'], external=True, can_exist=False, database='foo', ) result = statement.compile() expected = """\ CREATE EXTERNAL TABLE foo.`another_table` TBLPROPERTIES ( 'kudu.key_columns'='string_col', 'kudu.master_addresses'='dom.d.com:7051', 'kudu.table_name'='kudu_name', 'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler' ) AS SELECT * FROM test1""" assert result == expected
class TestOps(unittest.TestCase): def setUp(self): self.con = MockConnection() def test_join_column_count(self): t1 = self.con.table('star1') t2 = self.con.table('star2') # msc = MSSQLClient() # new_t1 = msc.database(name=t1) <-- use instances of ibis_mssql tables # new_t2 = msc.database(name=t2) <-- use instances of ibis_mssql tables predicate = t1['foo_id'] == t2['foo_id'] joined = t1.inner_join(t2, predicate)[ [t1] + t2.get_columns([c for c in t2.columns if c not in t1.columns])] assert len(joined.columns) == 6
def setUp(self): self.con = MockConnection() self.alltypes = self.con.table('functional_alltypes')
class TestAnalytics(unittest.TestCase): def setUp(self): self.con = MockConnection() self.alltypes = self.con.table('functional_alltypes') def test_category_project(self): t = self.alltypes tier = t.double_col.bucket([0, 50, 100]).name('tier') expr = t[tier, t] assert isinstance(expr.tier, ir.CategoryColumn) def test_bucket(self): d = self.alltypes.double_col bins = [0, 10, 50, 100] expr = d.bucket(bins) assert isinstance(expr, ir.CategoryColumn) assert expr.op().nbuckets == 3 expr = d.bucket(bins, include_over=True) assert expr.op().nbuckets == 4 expr = d.bucket(bins, include_over=True, include_under=True) assert expr.op().nbuckets == 5 def test_bucket_error_cases(self): d = self.alltypes.double_col self.assertRaises(ValueError, d.bucket, []) self.assertRaises(ValueError, d.bucket, [1, 2], closed='foo') # it works! d.bucket([10], include_under=True, include_over=True) self.assertRaises(ValueError, d.bucket, [10]) self.assertRaises(ValueError, d.bucket, [10], include_under=True) self.assertRaises(ValueError, d.bucket, [10], include_over=True) def test_histogram(self): d = self.alltypes.double_col self.assertRaises(ValueError, d.histogram, nbins=10, binwidth=5) self.assertRaises(ValueError, d.histogram) self.assertRaises(ValueError, d.histogram, 10, closed='foo') def test_topk_analysis_bug(self): # GH #398 airlines = ibis.table( [('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')], 'airlines', ) dests = ['ORD', 'JFK', 'SFO'] t = airlines[airlines.dest.isin(dests)] delay_filter = t.origin.topk(10, by=t.arrdelay.mean()) filtered = t.filter([delay_filter]) post_pred = filtered.op().predicates[0] assert delay_filter.to_filter().equals(post_pred) def test_topk_function_late_bind(self): # GH #520 airlines = ibis.table( [('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')], 'airlines', ) expr1 = airlines.dest.topk(5, by=lambda x: x.arrdelay.mean()) expr2 = airlines.dest.topk(5, by=airlines.arrdelay.mean()) assert_equal(expr1.to_aggregation(), expr2.to_aggregation())
def mockcon(): return MockConnection()
def setUp(self): self.con = MockConnection() self.name = 'test_name' self.inputs = ['string', 'string'] self.output = 'int64'
class TestWrapping(unittest.TestCase): def setUp(self): self.con = MockConnection() self.table = self.con.table('functional_alltypes') self.i8 = self.table.tinyint_col self.i16 = self.table.smallint_col self.i32 = self.table.int_col self.i64 = self.table.bigint_col self.d = self.table.double_col self.f = self.table.float_col self.s = self.table.string_col self.b = self.table.bool_col self.t = self.table.timestamp_col self.dec = self.con.table('tpch_customer').c_acctbal self.all_cols = [ self.i8, self.i16, self.i32, self.i64, self.d, self.f, self.dec, self.s, self.b, self.t, ] def test_sql_generation(self): func = api.scalar_function(['string'], 'string', name='Tester') func.register('identity', 'udf_testing') result = func('hello world') assert (ibis.impala.compile(result) == "SELECT udf_testing.identity('hello world') AS `tmp`") def test_sql_generation_from_infoclass(self): func = api.wrap_udf('test.so', ['string'], 'string', 'info_test') repr(func) func.register('info_test', 'udf_testing') result = func('hello world') assert (ibis.impala.compile(result) == "SELECT udf_testing.info_test('hello world') AS `tmp`") def test_udf_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t), ] for t, sv, av in types: func = self._register_udf([t], t, 'test') ibis_type = dt.validate_type(t) expr = func(sv) assert type(expr) == type( # noqa: E501, E721 ibis_type.scalar_type()(expr.op())) expr = func(av) assert type(expr) == type( # noqa: E501, E721 ibis_type.column_type()(expr.op())) def test_uda_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t), ] for t, sv, av in types: func = self._register_uda([t], t, 'test') ibis_type = dt.validate_type(t) expr1 = func(sv) expr2 = func(sv) expected_type1 = type(ibis_type.scalar_type()(expr1.op())) expected_type2 = type(ibis_type.scalar_type()(expr2.op())) assert isinstance(expr1, expected_type1) assert isinstance(expr2, expected_type2) def test_decimal(self): func = self._register_udf(['decimal(9,0)'], 'decimal(9,0)', 'test') expr = func(1.0) assert type(expr) == ir.DecimalScalar expr = func(self.dec) assert type(expr) == ir.DecimalColumn def test_udf_invalid_typecasting(self): cases = [ ('int8', self.all_cols[:1], self.all_cols[1:]), ('int16', self.all_cols[:2], self.all_cols[2:]), ('int32', self.all_cols[:3], self.all_cols[3:]), ('int64', self.all_cols[:4], self.all_cols[4:]), ('boolean', [], self.all_cols[:8] + self.all_cols[9:]), # allowing double here for now ('float', self.all_cols[:6], [self.s, self.b, self.t]), ('double', self.all_cols[:6], [self.s, self.b, self.t]), ('string', [], self.all_cols[:7] + self.all_cols[8:]), ('timestamp', [], self.all_cols[:-1]), ('decimal', self.all_cols[:7], self.all_cols[7:]), ] for t, valid_casts, invalid_casts in cases: func = self._register_udf([t], 'int32', 'typecast') for expr in valid_casts: func(expr) for expr in invalid_casts: self.assertRaises(IbisTypeError, func, expr) def test_mult_args(self): func = self._register_udf( ['int32', 'double', 'string', 'boolean', 'timestamp'], 'int64', 'mult_types', ) expr = func(self.i32, self.d, self.s, self.b, self.t) assert issubclass(type(expr), ir.ColumnExpr) expr = func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10')) assert issubclass(type(expr), ir.ScalarExpr) def _register_udf(self, inputs, output, name): func = api.scalar_function(inputs, output, name=name) func.register(name, 'ibis_testing') return func def _register_uda(self, inputs, output, name): func = api.aggregate_function(inputs, output, name=name) func.register(name, 'ibis_testing') return func
def con(): return MockConnection()
def setUp(self): self.con = MockConnection()
class TestInteractiveUse(unittest.TestCase): def setUp(self): self.con = MockConnection() def test_interactive_execute_on_repr(self): table = self.con.table('functional_alltypes') expr = table.bigint_col.sum() with config.option_context('interactive', True): repr(expr) assert len(self.con.executed_queries) > 0 def test_repr_png_is_none_in_interactive(self): table = self.con.table('functional_alltypes') with config.option_context('interactive', True): assert table._repr_png_() is None def test_repr_png_is_not_none_in_not_interactive(self): pytest.importorskip('ibis.expr.visualize') table = self.con.table('functional_alltypes') with config.option_context( 'interactive', False ), config.option_context('graphviz_repr', True): assert table._repr_png_() is not None # XXX This test is failing in the OmniSciDB/Spark build, and working # in the rest, even if does not seem to depend on the backend. # For some reason in that build the statement does not contain # the LIMIT. Xfailing with `strict=False` since in the other backends # it does work. See #2337 @pytest.mark.xfail( reason='Not obvious why this is failing for omnisci/spark, and this ' 'was incorrectly skipped until now. Xfailing to restore the CI', strict=False, ) def test_default_limit(self): table = self.con.table('functional_alltypes') with config.option_context('interactive', True): repr(table) expected = """\ SELECT * FROM functional_alltypes LIMIT {0}""".format( config.options.sql.default_limit ) assert self.con.executed_queries[0] == expected def test_respect_set_limit(self): table = self.con.table('functional_alltypes').limit(10) with config.option_context('interactive', True): repr(table) expected = """\ SELECT * FROM functional_alltypes LIMIT 10""" assert self.con.executed_queries[0] == expected def test_disable_query_limit(self): table = self.con.table('functional_alltypes') with config.option_context('interactive', True): with config.option_context('sql.default_limit', None): repr(table) expected = """\ SELECT * FROM functional_alltypes""" assert self.con.executed_queries[0] == expected def test_interactive_non_compilable_repr_not_fail(self): # #170 table = self.con.table('functional_alltypes') expr = table.string_col.topk(3) # it works! with config.option_context('interactive', True): repr(expr) def test_histogram_repr_no_query_execute(self): t = self.con.table('functional_alltypes') tier = t.double_col.histogram(10).name('bucket') expr = t.group_by(tier).size() with config.option_context('interactive', True): expr._repr() assert self.con.executed_queries == [] def test_compile_no_execute(self): t = self.con.table('functional_alltypes') t.double_col.sum().compile() assert self.con.executed_queries == [] def test_isin_rule_supressed_exception_repr_not_fail(self): with config.option_context('interactive', True): t = self.con.table('functional_alltypes') bool_clause = t['string_col'].notin(['1', '4', '7']) expr = t[bool_clause]['string_col'].value_counts() repr(expr)