コード例 #1
0
ファイル: test_kudu_support.py プロジェクト: cloudera/ibis
    def test_kudu_schema_convert(self):
        spec = [
            # name, type, is_nullable, is_primary_key
            ('a', dt.Int8(False), 'int8', False, True),
            ('b', dt.Int16(False), 'int16', False, True),
            ('c', dt.Int32(False), 'int32', False, False),
            ('d', dt.Int64(True), 'int64', True, False),
            ('e', dt.String(True), 'string', True, False),
            ('f', dt.Boolean(False), 'bool', False, False),
            ('g', dt.Float(False), 'float', False, False),
            ('h', dt.Double(True), 'double', True, False),
            # TODO
            # ('i', 'binary', False, False),
            ('j', dt.Timestamp(True), 'timestamp', True, False),
        ]

        builder = kudu.schema_builder()
        primary_keys = []
        ibis_types = []
        for name, itype, type_, is_nullable, is_primary_key in spec:
            builder.add_column(name, type_, nullable=is_nullable)

            if is_primary_key:
                primary_keys.append(name)

            ibis_types.append((name, itype))

        builder.set_primary_keys(primary_keys)
        kschema = builder.build()

        ischema = ksupport.schema_kudu_to_ibis(kschema)
        expected = ibis.schema(ibis_types)

        assert_equal(ischema, expected)
コード例 #2
0
ファイル: test_client.py プロジェクト: cloudera/ibis
def test_database_layer(con, alltypes):
    db = con.database()
    t = db.functional_alltypes

    assert_equal(t, alltypes)

    assert db.list_tables() == con.list_tables()
コード例 #3
0
ファイル: test_window_functions.py プロジェクト: wesm/ibis
def test_window_bind_to_table(t):
    w = ibis.window(group_by="g", order_by=ibis.desc("f"))

    w2 = w.bind(t)
    expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f))

    assert_equal(w2, expected)
コード例 #4
0
ファイル: test_sqlalchemy.py プロジェクト: thekingofhero/ibis
    def test_sqla_schema_conversion(self):
        typespec = [
            # name, type, nullable
            ("smallint", sat.SmallInteger, False, dt.int16),
            ("int", sat.Integer, True, dt.int32),
            ("integer", sat.INTEGER(), True, dt.int64),
            ("bigint", sat.BigInteger, False, dt.int64),
            ("real", sat.REAL, True, dt.double),
            ("bool", sat.Boolean, True, dt.boolean),
            ("timestamp", sat.DateTime, True, dt.timestamp),
        ]

        sqla_types = []
        ibis_types = []
        for name, t, nullable, ibis_type in typespec:
            sqla_type = sa.Column(name, t, nullable=nullable)
            sqla_types.append(sqla_type)
            ibis_types.append((name, ibis_type(nullable)))

        table = sa.Table("tname", self.meta, *sqla_types)

        schema = alch.schema_from_table(table)
        expected = ibis.schema(ibis_types)

        assert_equal(schema, expected)
コード例 #5
0
ファイル: test_table.py プロジェクト: cloudera/ibis
def test_mutate(table):
    one = table.f * 2
    foo = (table.a + table.b).name('foo')

    expr = table.mutate(foo, one=one, two=2)
    expected = table[table, foo, one.name('one'), ibis.literal(2).name('two')]
    assert_equal(expr, expected)
コード例 #6
0
ファイル: test_partition.py プロジェクト: cloudera/ibis
def test_create_table_with_partition_column(con, temp_table_db):
    schema = ibis.schema(
        [
            ('year', 'int32'),
            ('month', 'string'),
            ('day', 'int8'),
            ('value', 'double'),
        ]
    )

    tmp_db, name = temp_table_db
    con.create_table(
        name, schema=schema, database=tmp_db, partition=['year', 'month']
    )

    # the partition column get put at the end of the table
    ex_schema = ibis.schema(
        [
            ('day', 'int8'),
            ('value', 'double'),
            ('year', 'int32'),
            ('month', 'string'),
        ]
    )
    table_schema = con.get_schema(name, database=tmp_db)
    assert_equal(table_schema, ex_schema)

    partition_schema = con.database(tmp_db).table(name).partition_schema()

    expected = ibis.schema([('year', 'int32'), ('month', 'string')])
    assert_equal(partition_schema, expected)
コード例 #7
0
ファイル: test_table.py プロジェクト: deepfield/ibis
def test_group_by_kwargs(table):
    t = table
    expr = (t.group_by(['f', t.h], z='g', z2=t.d)
             .aggregate(t.d.mean().name('foo')))
    expected = (t.group_by(['f', t.h, t.g.name('z'), t.d.name('z2')])
                .aggregate(t.d.mean().name('foo')))
    assert_equal(expr, expected)
コード例 #8
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_set_column(self):
        def g(x):
            return x.f * 2

        result = self.table.set_column('f', g)
        expected = self.table.set_column('f', self.table.f * 2)
        assert_equal(result, expected)
コード例 #9
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_add_column(self):
        def g(x):
            return x.f * 2

        result = self.table.add_column(g, name='foo')
        expected = self.table.mutate(foo=g)
        assert_equal(result, expected)
コード例 #10
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_summary_expand_list(self):
        summ = self.table.f.summary()

        metric = self.table.g.group_concat().name('bar')
        result = self.table.aggregate([metric, summ])
        expected = self.table.aggregate([metric] + summ.exprs())
        assert_equal(result, expected)
コード例 #11
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_rewrite_join_projection_without_other_ops(self):
        # Drop out filters and other commutative table operations. Join
        # predicates are "lifted" to reference the base, unmodified join roots

        # Star schema with fact table
        table = self.con.table('star1')
        table2 = self.con.table('star2')
        table3 = self.con.table('star3')

        filtered = table[table['f'] > 0]

        pred1 = table['foo_id'] == table2['foo_id']
        pred2 = filtered['bar_id'] == table3['bar_id']

        j1 = filtered.left_join(table2, [pred1])
        j2 = j1.inner_join(table3, [pred2])

        # Project out the desired fields
        view = j2[[filtered, table2['value1'], table3['value2']]]

        # Construct the thing we expect to obtain
        ex_pred2 = table['bar_id'] == table3['bar_id']
        ex_expr = (table.left_join(table2, [pred1])
                   .inner_join(table3, [ex_pred2]))

        rewritten_proj = L.substitute_parents(view)
        op = rewritten_proj.op()
        assert_equal(op.table, ex_expr)

        # Ensure that filtered table has been substituted with the base table
        assert op.selections[0] is table
コード例 #12
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_value_counts_convenience(self):
        # #152
        result = self.table.g.value_counts()
        expected = (self.table.group_by('g')
                    .aggregate(self.table.count().name('count')))

        assert_equal(result, expected)
コード例 #13
0
ファイル: test_table.py プロジェクト: cloudera/ibis
def test_unravel_compound_equijoin(table):
    t1 = ibis.table(
        [
            ('key1', 'string'),
            ('key2', 'string'),
            ('key3', 'string'),
            ('value1', 'double'),
        ],
        'foo_table',
    )

    t2 = ibis.table(
        [
            ('key1', 'string'),
            ('key2', 'string'),
            ('key3', 'string'),
            ('value2', 'double'),
        ],
        'bar_table',
    )

    p1 = t1.key1 == t2.key1
    p2 = t1.key2 == t2.key2
    p3 = t1.key3 == t2.key3

    joined = t1.inner_join(t2, [p1 & p2 & p3])
    expected = t1.inner_join(t2, [p1, p2, p3])
    assert_equal(joined, expected)
コード例 #14
0
ファイル: test_partition.py プロジェクト: koverholt/ibis
    def test_create_table_with_partition_column(self):
        schema = ibis.schema([('year', 'int32'),
                              ('month', 'int8'),
                              ('day', 'int8'),
                              ('value', 'double')])

        name = _tmp_name()
        self.con.create_table(name, schema=schema,
                              database=self.tmp_db,
                              partition=['year', 'month'],
                              location=self._temp_location())
        self.temp_tables.append(name)

        # the partition column get put at the end of the table
        ex_schema = ibis.schema([('day', 'int8'),
                                 ('value', 'double'),
                                 ('year', 'int32'),
                                 ('month', 'int8')])
        table_schema = self.con.get_schema(name, database=self.tmp_db)
        assert_equal(table_schema, ex_schema)

        partition_schema = self.db.table(name).partition_schema()

        expected = ibis.schema([('year', 'int32'),
                                ('month', 'int8')])
        assert_equal(partition_schema, expected)
コード例 #15
0
ファイル: test_table.py プロジェクト: cloudera/ibis
def test_having(table):
    m = table.mutate(foo=table.f * 2, bar=table.e / 2)

    expr = m.group_by('foo').having(lambda x: x.foo.sum() > 10).size()
    expected = m.group_by('foo').having(m.foo.sum() > 10).size()

    assert_equal(expr, expected)
コード例 #16
0
ファイル: test_string.py プロジェクト: cloudera/ibis
def test_contains(table):
    expr = table.g.contains('foo')
    expected = table.g.find('foo') >= 0
    assert_equal(expr, expected)

    with pytest.raises(TypeError):
        'foo' in table.g
コード例 #17
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_self_join(self):
        # Self-joins are problematic with this design because column
        # expressions may reference either the left or right self. For example:
        #
        # SELECT left.key, sum(left.value - right.value) as total_deltas
        # FROM table left
        #  INNER JOIN table right
        #    ON left.current_period = right.previous_period + 1
        # GROUP BY 1
        #
        # One way around the self-join issue is to force the user to add
        # prefixes to the joined fields, then project using those. Not that
        # satisfying, though.
        left = self.table
        right = self.table.view()
        metric = (left['a'] - right['b']).mean().name('metric')

        joined = left.inner_join(right, [right['g'] == left['g']])
        # basic check there's no referential problems
        result_repr = repr(joined)
        assert 'ref_0' in result_repr
        assert 'ref_1' in result_repr

        # Cannot be immediately materialized because of the schema overlap
        self.assertRaises(RelationError, joined.materialize)

        # Project out left table schema
        proj = joined[[left]]
        assert_equal(proj.schema(), left.schema())

        # Try aggregating on top of joined
        aggregated = joined.aggregate([metric], by=[left['g']])
        ex_schema = api.Schema(['g', 'metric'], ['string', 'double'])
        assert_equal(aggregated.schema(), ex_schema)
コード例 #18
0
ファイル: test_client.py プロジェクト: teamclairvoyant/ibis
    def test_database_layer(self):
        db = self.con.database()

        t = db.functional_alltypes
        assert_equal(t, self.alltypes)

        assert db.list_tables() == self.con.list_tables()
コード例 #19
0
ファイル: test_table.py プロジェクト: deepfield/ibis
def test_set_column(table):
    def g(x):
        return x.f * 2

    result = table.set_column('f', g)
    expected = table.set_column('f', table.f * 2)
    assert_equal(result, expected)
コード例 #20
0
ファイル: test_value_exprs.py プロジェクト: nataliaking/ibis
    def test_null(self):
        expr = ibis.literal(None)
        assert isinstance(expr, ir.NullScalar)
        assert isinstance(expr.op(), ir.NullLiteral)

        expr2 = ibis.null()
        assert_equal(expr, expr2)
コード例 #21
0
ファイル: test_value_exprs.py プロジェクト: wesm/ibis
def test_null():
    expr = ibis.literal(None)
    assert isinstance(expr, ir.NullScalar)
    assert isinstance(expr.op(), ir.NullLiteral)
    assert expr._arg.value is None

    expr2 = ibis.null()
    assert_equal(expr, expr2)
コード例 #22
0
ファイル: test_table.py プロジェクト: cloudera/ibis
def test_groupby_mutate(table):
    t = table

    g = t.group_by('g').order_by('f')
    expr = g.mutate(foo=lambda x: x.f.lag(), bar=lambda x: x.f.rank())
    expected = g.mutate(foo=t.f.lag(), bar=t.f.rank())

    assert_equal(expr, expected)
コード例 #23
0
ファイル: test_table.py プロジェクト: cloudera/ibis
def test_replace_column(table):
    tb = api.table([('a', 'int32'), ('b', 'double'), ('c', 'string')])

    expr = tb.b.cast('int32')
    tb2 = tb.set_column('b', expr)
    expected = tb[tb.a, expr.name('b'), tb.c]

    assert_equal(tb2, expected)
コード例 #24
0
    def test_coalesce_instance_method(self):
        v7 = self.table.v7
        v5 = self.table.v5.cast('string')
        v8 = self.table.v8.cast('string')

        result = v7.coalesce(v5, v8, 'foo')
        expected = ibis.coalesce(v7, v5, v8, 'foo')
        assert_equal(result, expected)
コード例 #25
0
ファイル: test_client.py プロジェクト: koverholt/ibis
    def test_sql_with_limit(self):
        query = """\
SELECT *
FROM functional_alltypes
LIMIT 10"""
        table = self.con.sql(query)
        ex_schema = self.con.get_schema('functional_alltypes')
        assert_equal(table.schema(), ex_schema)
コード例 #26
0
ファイル: test_impala_e2e.py プロジェクト: megvuyyuru/ibis
    def test_query_parquet_file_like_table(self):
        hdfs_path = pjoin(self.test_data_dir, "parquet/tpch_region")

        ex_schema = ibis.schema([("r_regionkey", "int16"), ("r_name", "string"), ("r_comment", "string")])

        table = self.con.parquet_file(hdfs_path, like_table="tpch_region")

        assert_equal(table.schema(), ex_schema)
コード例 #27
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_mutate(self):
        one = self.table.f * 2
        foo = (self.table.a + self.table.b).name('foo')

        expr = self.table.mutate(foo, one=one, two=2)
        expected = self.table[self.table, foo, one.name('one'),
                              ibis.literal(2).name('two')]
        assert_equal(expr, expected)
コード例 #28
0
ファイル: test_table.py プロジェクト: zuxfoucault/ibis
    def test_join_no_predicate_list(self):
        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')

        pred = region.r_regionkey == nation.n_regionkey
        joined = region.inner_join(nation, pred)
        expected = region.inner_join(nation, [pred])
        assert_equal(joined, expected)
コード例 #29
0
    def test_window_bind_to_table(self):
        w = ibis.window(group_by='g', order_by=ibis.desc('f'))

        w2 = w.bind(self.t)
        expected = ibis.window(group_by=self.t.g,
                               order_by=ibis.desc(self.t.f))

        assert_equal(w2, expected)
コード例 #30
0
ファイル: test_string.py プロジェクト: BabelTower/ibis
    def test_getitem_slice(self):
        cases = [
            (self.table.g[:3], self.table.g.substr(0, 3)),
            (self.table.g[2:6], self.table.g.substr(2, 4)),
        ]

        for case, expected in cases:
            assert_equal(case, expected)
コード例 #31
0
ファイル: test_table.py プロジェクト: qwshy/ibis
 def test_projection_array_expr(self):
     result = self.table[self.table.a]
     expected = self.table[[self.table.a]]
     assert_equal(result, expected)
コード例 #32
0
def test_sql_with_limit(con):
    table = con.sql("SELECT * FROM functional_alltypes LIMIT 10")
    ex_schema = con.get_schema('functional_alltypes')
    assert_equal(table.schema(), ex_schema)
コード例 #33
0
ファイル: test_table.py プロジェクト: tonyfast/ibis
def test_add_column_proxies_to_mutate(table):
    result = table.add_column(ibis.now().cast('date'), name='date')
    expected = table.mutate(date=ibis.now().cast('date'))
    assert_equal(result, expected)
コード例 #34
0
ファイル: test_client.py プロジェクト: wkusnierczyk/ibis
def test_get_schema(con, test_data_db):
    t = con.table('tpch_lineitem')
    schema = con.get_schema('tpch_lineitem', database=test_data_db)
    assert_equal(t.schema(), schema)
コード例 #35
0
ファイル: test_table.py プロジェクト: qwshy/ibis
 def test_projection_convenient_syntax(self):
     proj = self.table[self.table, self.table['a'].name('foo')]
     proj2 = self.table[[self.table, self.table['a'].name('foo')]]
     assert_equal(proj, proj2)
コード例 #36
0
def test_projection_self(table):
    result = table[table]
    expected = table.projection(table)

    assert_equal(result, expected)
コード例 #37
0
ファイル: test_analytics.py プロジェクト: ibis-project/ibis
def test_topk_function_late_bind(airlines):
    # GH #520
    expr1 = airlines.dest.topk(5, by=lambda x: x.arrdelay.mean())
    expr2 = airlines.dest.topk(5, by=airlines.arrdelay.mean())

    assert_equal(expr1.to_aggregation(), expr2.to_aggregation())
コード例 #38
0
ファイル: test_value_exprs.py プロジェクト: Sahanduiuc/ibis
def test_distinct_count(dtable):
    result = dtable.string_col.distinct().count()
    expected = dtable.string_col.nunique().name('count')
    assert_equal(result, expected)
    assert isinstance(result.op(), ops.CountDistinct)
コード例 #39
0
def test_projection_convenient_syntax(table):
    proj = table[table, table['a'].name('foo')]
    proj2 = table[[table, table['a'].name('foo')]]
    assert_equal(proj, proj2)
コード例 #40
0
def test_projection_no_list(table):
    expr = (table.f * 2).name('bar')
    result = table.select(expr)
    expected = table.projection([expr])
    assert_equal(result, expected)
コード例 #41
0
 def test_get_schema(self):
     t = self.con.table('tpch_lineitem')
     schema = self.con.get_schema('tpch_lineitem',
                                  database=self.test_data_db)
     assert_equal(t.schema(), schema)
コード例 #42
0
def test_value_counts_unnamed_expr(con):
    nation = con.table('tpch_nation')

    expr = nation.n_name.lower().value_counts()
    expected = nation.n_name.lower().name('unnamed').value_counts()
    assert_equal(expr, expected)
コード例 #43
0
def test_value_counts_convenience(table):
    # #152
    result = table.g.value_counts()
    expected = table.group_by('g').aggregate(table.count().name('count'))

    assert_equal(result, expected)
コード例 #44
0
ファイル: test_lineage.py プロジェクト: ibis-project/ibis
def test_lineage(companies):
    # single table dependency
    funding_buckets = [
        0,
        1000000,
        10000000,
        50000000,
        100000000,
        500000000,
        1000000000,
    ]

    bucket = companies.funding_total_usd.bucket(
        funding_buckets, include_over=True
    )

    mutated = companies.mutate(
        bucket=bucket, status=companies.status.fillna('Unknown')
    )

    filtered = mutated[
        (companies.founded_at > '2010-01-01') | companies.founded_at.isnull()
    ]

    grouped = filtered.group_by(['bucket', 'status']).size()

    results = list(lin.lineage(bucket))
    expected = [bucket, companies.funding_total_usd, companies]
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(mutated.bucket))
    expected = [
        mutated.bucket,
        mutated,
        bucket.name('bucket'),
        bucket,
        companies.funding_total_usd,
        companies,
    ]
    assert len(results) == len(expected)
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(filtered.bucket))
    expected = [
        filtered.bucket,
        filtered,
        bucket.name('bucket'),
        bucket,
        companies.funding_total_usd,
        companies,
    ]
    assert len(results) == len(expected)
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(grouped.bucket))
    expected = [
        grouped.bucket,
        grouped,
        filtered.bucket,
        filtered,
        bucket.name('bucket'),
        bucket,
        companies.funding_total_usd,
        companies,
    ]
    assert len(results) == len(expected)
    for r, e in zip(results, expected):
        assert_equal(r, e)
コード例 #45
0
ファイル: test_table.py プロジェクト: qwshy/ibis
    def test_projection_self(self):
        result = self.table[self.table]
        expected = self.table.projection(self.table)

        assert_equal(result, expected)
コード例 #46
0
ファイル: test_string.py プロジェクト: wangxiong2015/ibis
    def test_contains(self):
        expr = self.table.g.contains('foo')
        expected = self.table.g.like('%foo%')
        assert_equal(expr, expected)

        self.assertRaises(Exception, lambda: 'foo' in self.table.g)
コード例 #47
0
def test_projection_array_expr(table):
    result = table[table.a]
    expected = table[[table.a]]
    assert_equal(result, expected)
コード例 #48
0
def test_groupby_alias(table):
    t = table

    result = t.groupby('g').size()
    expected = t.group_by('g').size()
    assert_equal(result, expected)
コード例 #49
0
def test_lineage(companies):
    # single table dependency
    funding_buckets = [
        0,
        1000000,
        10000000,
        50000000,
        100000000,
        500000000,
        1000000000,
    ]

    bucket_names = [
        '0 to 1m',
        '1m to 10m',
        '10m to 50m',
        '50m to 100m',
        '100m to 500m',
        '500m to 1b',
        'Over 1b',
    ]

    bucket = companies.funding_total_usd.bucket(
        funding_buckets, include_over=True
    )

    mutated = companies.mutate(
        bucket=bucket, status=companies.status.fillna('Unknown')
    )

    filtered = mutated[
        (companies.founded_at > '2010-01-01') | companies.founded_at.isnull()
    ]

    grouped = filtered.group_by(['bucket', 'status']).size()

    # TODO(cpcloud): Should this be used?
    joined = grouped.mutate(  # noqa
        bucket_name=lambda x: x.bucket.label(bucket_names).fillna('Unknown')
    )

    results = list(lin.lineage(bucket))
    expected = [bucket, companies.funding_total_usd, companies]
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(mutated.bucket))
    expected = [
        mutated.bucket,
        mutated,
        bucket.name('bucket'),
        companies.funding_total_usd,
        companies,
    ]
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(filtered.bucket))
    expected = [
        filtered.bucket,
        filtered,
        bucket.name('bucket'),
        companies.funding_total_usd,
        companies,
    ]
    for r, e in zip(results, expected):
        assert_equal(r, e)

    results = list(lin.lineage(grouped.bucket))
    expected = [
        grouped.bucket,
        grouped,
        filtered.bucket,
        filtered,
        bucket.name('bucket'),
        companies.funding_total_usd,
        companies,
    ]
    for r, e in zip(results, expected):
        assert_equal(r, e)
コード例 #50
0
def test_filter_no_list(table):
    pred = table.a > 5

    result = table.filter(pred)
    expected = table[pred]
    assert_equal(result, expected)
コード例 #51
0
def test_distinct_count(functional_alltypes):
    result = functional_alltypes.string_col.distinct().count()
    expected = functional_alltypes.string_col.nunique().name('count')
    assert_equal(result, expected)
    assert isinstance(result.op(), ops.CountDistinct)