Esempio n. 1
0
    def setUp(self):
        self.con = MockConnection()
        self.table = self.con.table('functional_alltypes')

        self.i8 = self.table.tinyint_col
        self.i16 = self.table.smallint_col
        self.i32 = self.table.int_col
        self.i64 = self.table.bigint_col
        self.d = self.table.double_col
        self.f = self.table.float_col
        self.s = self.table.string_col
        self.b = self.table.bool_col
        self.t = self.table.timestamp_col
        self.dec = self.con.table('tpch_customer').c_acctbal
        self.all_cols = [
            self.i8,
            self.i16,
            self.i32,
            self.i64,
            self.d,
            self.f,
            self.dec,
            self.s,
            self.b,
            self.t,
        ]
Esempio n. 2
0
    def test_ctas_ddl(self):
        con = MockConnection()

        select = build_ast(con.table('test1')).queries[0]
        statement = ksupport.CTASKudu(
            'another_table',
            'kudu_name',
            ['dom.d.com:7051'],
            select,
            ['string_col'],
            external=True,
            can_exist=False,
            database='foo',
        )
        result = statement.compile()

        expected = """\
CREATE EXTERNAL TABLE foo.`another_table`
TBLPROPERTIES (
  'kudu.key_columns'='string_col',
  'kudu.master_addresses'='dom.d.com:7051',
  'kudu.table_name'='kudu_name',
  'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'
) AS
SELECT *
FROM test1"""
        assert result == expected
Esempio n. 3
0
class TestOps(unittest.TestCase):
    def setUp(self):
        self.con = MockConnection()

    def test_join_column_count(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        # msc = MSSQLClient()
        # new_t1 = msc.database(name=t1) <-- use instances of ibis_mssql tables
        # new_t2 = msc.database(name=t2) <-- use instances of ibis_mssql tables

        predicate = t1['foo_id'] == t2['foo_id']

        joined = t1.inner_join(t2, predicate)[
            [t1] +
            t2.get_columns([c for c in t2.columns if c not in t1.columns])]
        assert len(joined.columns) == 6
Esempio n. 4
0
 def setUp(self):
     self.con = MockConnection()
     self.alltypes = self.con.table('functional_alltypes')
Esempio n. 5
0
class TestAnalytics(unittest.TestCase):
    def setUp(self):
        self.con = MockConnection()
        self.alltypes = self.con.table('functional_alltypes')

    def test_category_project(self):
        t = self.alltypes

        tier = t.double_col.bucket([0, 50, 100]).name('tier')
        expr = t[tier, t]

        assert isinstance(expr.tier, ir.CategoryColumn)

    def test_bucket(self):
        d = self.alltypes.double_col
        bins = [0, 10, 50, 100]

        expr = d.bucket(bins)
        assert isinstance(expr, ir.CategoryColumn)
        assert expr.op().nbuckets == 3

        expr = d.bucket(bins, include_over=True)
        assert expr.op().nbuckets == 4

        expr = d.bucket(bins, include_over=True, include_under=True)
        assert expr.op().nbuckets == 5

    def test_bucket_error_cases(self):
        d = self.alltypes.double_col

        self.assertRaises(ValueError, d.bucket, [])
        self.assertRaises(ValueError, d.bucket, [1, 2], closed='foo')

        # it works!
        d.bucket([10], include_under=True, include_over=True)

        self.assertRaises(ValueError, d.bucket, [10])
        self.assertRaises(ValueError, d.bucket, [10], include_under=True)
        self.assertRaises(ValueError, d.bucket, [10], include_over=True)

    def test_histogram(self):
        d = self.alltypes.double_col

        self.assertRaises(ValueError, d.histogram, nbins=10, binwidth=5)
        self.assertRaises(ValueError, d.histogram)
        self.assertRaises(ValueError, d.histogram, 10, closed='foo')

    def test_topk_analysis_bug(self):
        # GH #398
        airlines = ibis.table(
            [('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')],
            'airlines',
        )
        dests = ['ORD', 'JFK', 'SFO']
        t = airlines[airlines.dest.isin(dests)]
        delay_filter = t.origin.topk(10, by=t.arrdelay.mean())

        filtered = t.filter([delay_filter])

        post_pred = filtered.op().predicates[0]
        assert delay_filter.to_filter().equals(post_pred)

    def test_topk_function_late_bind(self):
        # GH #520
        airlines = ibis.table(
            [('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')],
            'airlines',
        )
        expr1 = airlines.dest.topk(5, by=lambda x: x.arrdelay.mean())
        expr2 = airlines.dest.topk(5, by=airlines.arrdelay.mean())

        assert_equal(expr1.to_aggregation(), expr2.to_aggregation())
Esempio n. 6
0
def mockcon():
    return MockConnection()
Esempio n. 7
0
 def setUp(self):
     self.con = MockConnection()
     self.name = 'test_name'
     self.inputs = ['string', 'string']
     self.output = 'int64'
Esempio n. 8
0
class TestWrapping(unittest.TestCase):
    def setUp(self):
        self.con = MockConnection()
        self.table = self.con.table('functional_alltypes')

        self.i8 = self.table.tinyint_col
        self.i16 = self.table.smallint_col
        self.i32 = self.table.int_col
        self.i64 = self.table.bigint_col
        self.d = self.table.double_col
        self.f = self.table.float_col
        self.s = self.table.string_col
        self.b = self.table.bool_col
        self.t = self.table.timestamp_col
        self.dec = self.con.table('tpch_customer').c_acctbal
        self.all_cols = [
            self.i8,
            self.i16,
            self.i32,
            self.i64,
            self.d,
            self.f,
            self.dec,
            self.s,
            self.b,
            self.t,
        ]

    def test_sql_generation(self):
        func = api.scalar_function(['string'], 'string', name='Tester')
        func.register('identity', 'udf_testing')

        result = func('hello world')
        assert (ibis.impala.compile(result) ==
                "SELECT udf_testing.identity('hello world') AS `tmp`")

    def test_sql_generation_from_infoclass(self):
        func = api.wrap_udf('test.so', ['string'], 'string', 'info_test')
        repr(func)

        func.register('info_test', 'udf_testing')
        result = func('hello world')
        assert (ibis.impala.compile(result) ==
                "SELECT udf_testing.info_test('hello world') AS `tmp`")

    def test_udf_primitive_output_types(self):
        types = [
            ('boolean', True, self.b),
            ('int8', 1, self.i8),
            ('int16', 1, self.i16),
            ('int32', 1, self.i32),
            ('int64', 1, self.i64),
            ('float', 1.0, self.f),
            ('double', 1.0, self.d),
            ('string', '1', self.s),
            ('timestamp', ibis.timestamp('1961-04-10'), self.t),
        ]
        for t, sv, av in types:
            func = self._register_udf([t], t, 'test')

            ibis_type = dt.validate_type(t)

            expr = func(sv)
            assert type(expr) == type(  # noqa: E501, E721
                ibis_type.scalar_type()(expr.op()))
            expr = func(av)
            assert type(expr) == type(  # noqa: E501, E721
                ibis_type.column_type()(expr.op()))

    def test_uda_primitive_output_types(self):
        types = [
            ('boolean', True, self.b),
            ('int8', 1, self.i8),
            ('int16', 1, self.i16),
            ('int32', 1, self.i32),
            ('int64', 1, self.i64),
            ('float', 1.0, self.f),
            ('double', 1.0, self.d),
            ('string', '1', self.s),
            ('timestamp', ibis.timestamp('1961-04-10'), self.t),
        ]
        for t, sv, av in types:
            func = self._register_uda([t], t, 'test')

            ibis_type = dt.validate_type(t)

            expr1 = func(sv)
            expr2 = func(sv)
            expected_type1 = type(ibis_type.scalar_type()(expr1.op()))
            expected_type2 = type(ibis_type.scalar_type()(expr2.op()))
            assert isinstance(expr1, expected_type1)
            assert isinstance(expr2, expected_type2)

    def test_decimal(self):
        func = self._register_udf(['decimal(9,0)'], 'decimal(9,0)', 'test')
        expr = func(1.0)
        assert type(expr) == ir.DecimalScalar
        expr = func(self.dec)
        assert type(expr) == ir.DecimalColumn

    def test_udf_invalid_typecasting(self):
        cases = [
            ('int8', self.all_cols[:1], self.all_cols[1:]),
            ('int16', self.all_cols[:2], self.all_cols[2:]),
            ('int32', self.all_cols[:3], self.all_cols[3:]),
            ('int64', self.all_cols[:4], self.all_cols[4:]),
            ('boolean', [], self.all_cols[:8] + self.all_cols[9:]),
            # allowing double here for now
            ('float', self.all_cols[:6], [self.s, self.b, self.t]),
            ('double', self.all_cols[:6], [self.s, self.b, self.t]),
            ('string', [], self.all_cols[:7] + self.all_cols[8:]),
            ('timestamp', [], self.all_cols[:-1]),
            ('decimal', self.all_cols[:7], self.all_cols[7:]),
        ]

        for t, valid_casts, invalid_casts in cases:
            func = self._register_udf([t], 'int32', 'typecast')

            for expr in valid_casts:
                func(expr)

            for expr in invalid_casts:
                self.assertRaises(IbisTypeError, func, expr)

    def test_mult_args(self):
        func = self._register_udf(
            ['int32', 'double', 'string', 'boolean', 'timestamp'],
            'int64',
            'mult_types',
        )

        expr = func(self.i32, self.d, self.s, self.b, self.t)
        assert issubclass(type(expr), ir.ColumnExpr)

        expr = func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10'))
        assert issubclass(type(expr), ir.ScalarExpr)

    def _register_udf(self, inputs, output, name):
        func = api.scalar_function(inputs, output, name=name)
        func.register(name, 'ibis_testing')
        return func

    def _register_uda(self, inputs, output, name):
        func = api.aggregate_function(inputs, output, name=name)
        func.register(name, 'ibis_testing')
        return func
Esempio n. 9
0
def con():
    return MockConnection()
Esempio n. 10
0
 def setUp(self):
     self.con = MockConnection()
Esempio n. 11
0
class TestInteractiveUse(unittest.TestCase):
    def setUp(self):
        self.con = MockConnection()

    def test_interactive_execute_on_repr(self):
        table = self.con.table('functional_alltypes')
        expr = table.bigint_col.sum()
        with config.option_context('interactive', True):
            repr(expr)

        assert len(self.con.executed_queries) > 0

    def test_repr_png_is_none_in_interactive(self):
        table = self.con.table('functional_alltypes')

        with config.option_context('interactive', True):
            assert table._repr_png_() is None

    def test_repr_png_is_not_none_in_not_interactive(self):
        pytest.importorskip('ibis.expr.visualize')

        table = self.con.table('functional_alltypes')

        with config.option_context(
            'interactive', False
        ), config.option_context('graphviz_repr', True):
            assert table._repr_png_() is not None

    # XXX This test is failing in the OmniSciDB/Spark build, and working
    # in the rest, even if does not seem to depend on the backend.
    # For some reason in that build the statement does not contain
    # the LIMIT. Xfailing with `strict=False` since in the other backends
    # it does work. See #2337
    @pytest.mark.xfail(
        reason='Not obvious why this is failing for omnisci/spark, and this '
        'was incorrectly skipped until now. Xfailing to restore the CI',
        strict=False,
    )
    def test_default_limit(self):
        table = self.con.table('functional_alltypes')

        with config.option_context('interactive', True):
            repr(table)

        expected = """\
SELECT *
FROM functional_alltypes
LIMIT {0}""".format(
            config.options.sql.default_limit
        )

        assert self.con.executed_queries[0] == expected

    def test_respect_set_limit(self):
        table = self.con.table('functional_alltypes').limit(10)

        with config.option_context('interactive', True):
            repr(table)

        expected = """\
SELECT *
FROM functional_alltypes
LIMIT 10"""

        assert self.con.executed_queries[0] == expected

    def test_disable_query_limit(self):
        table = self.con.table('functional_alltypes')

        with config.option_context('interactive', True):
            with config.option_context('sql.default_limit', None):
                repr(table)

        expected = """\
SELECT *
FROM functional_alltypes"""

        assert self.con.executed_queries[0] == expected

    def test_interactive_non_compilable_repr_not_fail(self):
        # #170
        table = self.con.table('functional_alltypes')

        expr = table.string_col.topk(3)

        # it works!
        with config.option_context('interactive', True):
            repr(expr)

    def test_histogram_repr_no_query_execute(self):
        t = self.con.table('functional_alltypes')
        tier = t.double_col.histogram(10).name('bucket')
        expr = t.group_by(tier).size()
        with config.option_context('interactive', True):
            expr._repr()
        assert self.con.executed_queries == []

    def test_compile_no_execute(self):
        t = self.con.table('functional_alltypes')
        t.double_col.sum().compile()
        assert self.con.executed_queries == []

    def test_isin_rule_supressed_exception_repr_not_fail(self):
        with config.option_context('interactive', True):
            t = self.con.table('functional_alltypes')
            bool_clause = t['string_col'].notin(['1', '4', '7'])
            expr = t[bool_clause]['string_col'].value_counts()
            repr(expr)