Python schema Examples, ibis.schema Python Examples

Example #1

0

Show file

File: test_partition.py Project: cloudera/ibis

def test_create_table_with_partition_column(con, temp_table_db):
    schema = ibis.schema(
        [
            ('year', 'int32'),
            ('month', 'string'),
            ('day', 'int8'),
            ('value', 'double'),
        ]
    )

    tmp_db, name = temp_table_db
    con.create_table(
        name, schema=schema, database=tmp_db, partition=['year', 'month']
    )

    # the partition column get put at the end of the table
    ex_schema = ibis.schema(
        [
            ('day', 'int8'),
            ('value', 'double'),
            ('year', 'int32'),
            ('month', 'string'),
        ]
    )
    table_schema = con.get_schema(name, database=tmp_db)
    assert_equal(table_schema, ex_schema)

    partition_schema = con.database(tmp_db).table(name).partition_schema()

    expected = ibis.schema([('year', 'int32'), ('month', 'string')])
    assert_equal(partition_schema, expected)

Example #2

0

Show file

File: test_data_admin.py Project: zuxfoucault/ibis

def create_parquet_tables(con):
    parquet_files = con.hdfs.ls(pjoin(ENV.test_data_dir, 'parquet'))
    schemas = {
        'functional_alltypes': ibis.schema(
            [('id', 'int32'),
             ('bool_col', 'boolean'),
             ('tinyint_col', 'int8'),
             ('smallint_col', 'int16'),
             ('int_col', 'int32'),
             ('bigint_col', 'int64'),
             ('float_col', 'float'),
             ('double_col', 'double'),
             ('date_string_col', 'string'),
             ('string_col', 'string'),
             ('timestamp_col', 'timestamp'),
             ('year', 'int32'),
             ('month', 'int32')]),
        'tpch_region': ibis.schema(
            [('r_regionkey', 'int16'),
             ('r_name', 'string'),
             ('r_comment', 'string')])}
    tables = []
    for path in parquet_files:
        head, table_name = osp.split(path)
        print('Creating {0}'.format(table_name))
        # if no schema infer!
        schema = schemas.get(table_name)
        table = con.parquet_file(path, schema=schema, name=table_name,
                                 database=ENV.test_data_db, persist=True)
        tables.append(table)
    return tables

Example #3

0

Show file

File: test_partition.py Project: koverholt/ibis

    def test_create_table_with_partition_column(self):
        schema = ibis.schema([('year', 'int32'),
                              ('month', 'int8'),
                              ('day', 'int8'),
                              ('value', 'double')])

        name = _tmp_name()
        self.con.create_table(name, schema=schema,
                              database=self.tmp_db,
                              partition=['year', 'month'],
                              location=self._temp_location())
        self.temp_tables.append(name)

        # the partition column get put at the end of the table
        ex_schema = ibis.schema([('day', 'int8'),
                                 ('value', 'double'),
                                 ('year', 'int32'),
                                 ('month', 'int8')])
        table_schema = self.con.get_schema(name, database=self.tmp_db)
        assert_equal(table_schema, ex_schema)

        partition_schema = self.db.table(name).partition_schema()

        expected = ibis.schema([('year', 'int32'),
                                ('month', 'int8')])
        assert_equal(partition_schema, expected)

Example #4

0

Show file

File: load_test_data.py Project: raderaj/ibis

def create_parquet_tables(con):
    parquet_files = con.hdfs.ls(pjoin(ENV.test_data_dir, "parquet"))
    schemas = {
        "functional_alltypes": ibis.schema(
            [
                ("id", "int32"),
                ("bool_col", "boolean"),
                ("tinyint_col", "int8"),
                ("smallint_col", "int16"),
                ("int_col", "int32"),
                ("bigint_col", "int64"),
                ("float_col", "float"),
                ("double_col", "double"),
                ("date_string_col", "string"),
                ("string_col", "string"),
                ("timestamp_col", "timestamp"),
                ("year", "int32"),
                ("month", "int32"),
            ]
        ),
        "tpch_region": ibis.schema([("r_regionkey", "int16"), ("r_name", "string"), ("r_comment", "string")]),
    }

    tables = []

    for path in parquet_files:
        head, table_name = posixpath.split(path)
        print("Creating {0}".format(table_name))
        # if no schema infer!
        schema = schemas.get(table_name)
        t = con.parquet_file(path, schema=schema, name=table_name, database=ENV.test_data_db, persist=True)
        tables.append(t)

    return tables

Example #5

0

Show file

File: test_schema.py Project: cloudera/ibis

def test_schema_subset():
    s1 = ibis.schema([('a', dt.int64), ('b', dt.int32), ('c', dt.string)])

    s2 = ibis.schema([('a', dt.int64), ('c', dt.string)])

    assert s1 > s2
    assert s2 < s1

    assert s1 >= s2
    assert s2 <= s1

Example #6

0

Show file

File: test_impala_e2e.py Project: megvuyyuru/ibis

    def test_create_table_with_partition_column(self):
        schema = ibis.schema([("year", "int32"), ("month", "int8"), ("day", "int8"), ("value", "double")])

        name = util.guid()
        self.con.create_table(name, schema=schema, partition=["year", "month"])
        self.temp_tables.append(name)

        # the partition column get put at the end of the table
        ex_schema = ibis.schema([("day", "int8"), ("value", "double"), ("year", "int32"), ("month", "int8")])
        table_schema = self.con.get_schema(name)
        assert_equal(table_schema, ex_schema)

        partition_schema = self.con.get_partition_schema(name)
        expected = ibis.schema([("year", "int32"), ("month", "int8")])
        assert_equal(partition_schema, expected)

Example #7

0

Show file

File: util.py Project: nataliaking/ibis

def pandas_to_ibis_schema(frame):
    # no analog for decimal in pandas
    pairs = []
    for col_name in frame:
        ibis_type = pandas_col_to_ibis_type(frame[col_name])
        pairs.append((col_name, ibis_type))
    return ibis.schema(pairs)

Example #8

0

Show file

File: test_client.py Project: cloudera/ibis

def test_create_table_schema(con):
    t_name = 'mytable'

    con.drop_table(t_name, force=True)

    schema = ibis.schema(
        [
            ('a', 'float'),
            ('b', 'double'),
            ('c', 'int32'),
            ('d', 'int64'),
            ('x', 'point'),
            ('y', 'linestring'),
            ('z', 'polygon'),
            ('w', 'multipolygon'),
        ]
    )

    con.create_table(t_name, schema=schema)

    try:
        t = con.table(t_name)

        assert isinstance(t.a, ir.FloatingColumn)
        assert isinstance(t.b, ir.FloatingColumn)
        assert isinstance(t.c, ir.IntegerColumn)
        assert isinstance(t.d, ir.IntegerColumn)
        assert isinstance(t.x, ir.PointColumn)
        assert isinstance(t.y, ir.LineStringColumn)
        assert isinstance(t.z, ir.PolygonColumn)
        assert isinstance(t.w, ir.MultiPolygonColumn)
    finally:
        con.drop_table(t_name)

Example #9

0

Show file

File: test_ddl_compilation.py Project: cloudera/ibis

def test_create_table_parquet_with_schema():
    directory = '/path/to/'

    schema = ibis.schema(
        [('foo', 'string'), ('bar', 'int8'), ('baz', 'int16')]
    )

    statement = ddl.CreateTableParquet(
        'new_table',
        directory,
        schema=schema,
        external=True,
        can_exist=True,
        database='foo',
    )

    result = statement.compile()
    expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`foo` string,
 `bar` tinyint,
 `baz` smallint)
STORED AS PARQUET
LOCATION '{0}'""".format(
        directory
    )

    assert result == expected

Example #10

0

Show file

File: test_kudu_support.py Project: cloudera/ibis

    def test_create_external_ddl(self):
        schema = ibis.schema(
            [('key1', 'int32'), ('key2', 'int64'), ('value1', 'double')]
        )

        stmt = ksupport.CreateTableKudu(
            'impala_name',
            'kudu_name',
            ['master1.d.com:7051', 'master2.d.com:7051'],
            schema,
            ['key1', 'key2'],
        )

        result = stmt.compile()
        expected = """\
CREATE EXTERNAL TABLE `impala_name`
(`key1` int,
 `key2` bigint,
 `value1` double)
TBLPROPERTIES (
  'kudu.key_columns'='key1, key2',
  'kudu.master_addresses'='master1.d.com:7051, master2.d.com:7051',
  'kudu.table_name'='kudu_name',
  'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'
)"""
        assert result == expected

Example #11

0

Show file

File: test_ddl_compilation.py Project: cloudera/ibis

def test_add_partition_string_key():
    part_schema = ibis.schema([('foo', 'int32'), ('bar', 'string')])
    stmt = ddl.AddPartition('tbl', {'foo': 5, 'bar': 'qux'}, part_schema)

    result = stmt.compile()
    expected = 'ALTER TABLE tbl ADD PARTITION (foo=5, bar="qux")'
    assert result == expected

Example #12

0

Show file

File: test_ddl_compilation.py Project: cloudera/ibis

def test_create_table_with_location_compile():
    path = '/path/to/table'
    schema = ibis.schema(
        [('foo', 'string'), ('bar', 'int8'), ('baz', 'int16')]
    )
    statement = ddl.CreateTableWithSchema(
        'another_table',
        schema,
        can_exist=False,
        format='parquet',
        path=path,
        database='foo',
    )
    result = statement.compile()

    expected = """\
CREATE TABLE foo.`another_table`
(`foo` string,
 `bar` tinyint,
 `baz` smallint)
STORED AS PARQUET
LOCATION '{0}'""".format(
        path
    )
    assert result == expected

Example #13

0

Show file

File: alchemy.py Project: cloudera/ibis

    def create_table(self, name, expr=None, schema=None, database=None):
        if database is not None and database != self.engine.url.database:
            raise NotImplementedError(
                'Creating tables from a different database is not yet '
                'implemented'
            )

        if expr is None and schema is None:
            raise ValueError('You must pass either an expression or a schema')

        if expr is not None and schema is not None:
            if not expr.schema().equals(ibis.schema(schema)):
                raise TypeError(
                    'Expression schema is not equal to passed schema. '
                    'Try passing the expression without the schema'
                )
        if schema is None:
            schema = expr.schema()

        self._schemas[self._fully_qualified_name(name, database)] = schema
        t = table_from_schema(name, self.meta, schema)

        with self.begin() as bind:
            t.create(bind=bind)
            if expr is not None:
                bind.execute(
                    t.insert().from_select(list(expr.columns), expr.compile())
                )

Example #14

0

Show file

File: test_kudu_support.py Project: cloudera/ibis

    def test_kudu_schema_convert(self):
        spec = [
            # name, type, is_nullable, is_primary_key
            ('a', dt.Int8(False), 'int8', False, True),
            ('b', dt.Int16(False), 'int16', False, True),
            ('c', dt.Int32(False), 'int32', False, False),
            ('d', dt.Int64(True), 'int64', True, False),
            ('e', dt.String(True), 'string', True, False),
            ('f', dt.Boolean(False), 'bool', False, False),
            ('g', dt.Float(False), 'float', False, False),
            ('h', dt.Double(True), 'double', True, False),
            # TODO
            # ('i', 'binary', False, False),
            ('j', dt.Timestamp(True), 'timestamp', True, False),
        ]

        builder = kudu.schema_builder()
        primary_keys = []
        ibis_types = []
        for name, itype, type_, is_nullable, is_primary_key in spec:
            builder.add_column(name, type_, nullable=is_nullable)

            if is_primary_key:
                primary_keys.append(name)

            ibis_types.append((name, itype))

        builder.set_primary_keys(primary_keys)
        kschema = builder.build()

        ischema = ksupport.schema_kudu_to_ibis(kschema)
        expected = ibis.schema(ibis_types)

        assert_equal(ischema, expected)

Example #15

0

Show file

File: test_partition.py Project: cloudera/ibis

def test_add_drop_partition_owned_by_impala(hdfs, con, temp_table):
    schema = ibis.schema(
        [('foo', 'string'), ('year', 'int32'), ('month', 'int16')]
    )
    name = temp_table
    con.create_table(name, schema=schema, partition=['year', 'month'])

    table = con.table(name)

    part = {'year': 2007, 'month': 4}

    subdir = util.guid()
    basename = util.guid()
    path = '/tmp/{}/{}'.format(subdir, basename)

    hdfs.mkdir('/tmp/{}'.format(subdir))
    hdfs.chown('/tmp/{}'.format(subdir), owner='impala', group='supergroup')

    table.add_partition(part, location=path)

    assert len(table.partitions()) == 2

    table.drop_partition(part)

    assert len(table.partitions()) == 1

Example #16

0

Show file

File: test_partition.py Project: cloudera/ibis

def test_is_partitioned(con, temp_table):
    schema = ibis.schema(
        [('foo', 'string'), ('year', 'int32'), ('month', 'string')]
    )
    name = temp_table
    con.create_table(name, schema=schema, partition=['year', 'month'])
    assert con.table(name).is_partitioned

Example #17

0

Show file

File: test_schema.py Project: cloudera/ibis

def test_apply_to_schema_with_timezone():
    data = {'time': pd.date_range('2018-01-01', '2018-01-02', freq='H')}
    df = pd.DataFrame(data)
    expected = df.assign(time=df.time.astype('datetime64[ns, EST]'))
    desired_schema = ibis.schema([('time', 'timestamp("EST")')])
    result = desired_schema.apply_to(df.copy())
    tm.assert_frame_equal(expected, result)

Example #18

0

Show file

File: test_ddl.py Project: koverholt/ibis

    def test_create_table_delimited(self):
        path = '/path/to/files/'
        schema = ibis.schema([('a', 'string'),
                              ('b', 'int32'),
                              ('c', 'double'),
                              ('d', 'decimal(12,2)')])

        stmt = ddl.CreateTableDelimited('new_table', path, schema,
                                        delimiter='|',
                                        escapechar='\\',
                                        lineterminator='\0',
                                        database='foo',
                                        can_exist=True)

        result = stmt.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`a` string,
 `b` int,
 `c` double,
 `d` decimal(12,2))
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
ESCAPED BY '\\'
LINES TERMINATED BY '\0'
LOCATION '{0}'""".format(path)
        assert result == expected

Example #19

0

Show file

File: test_ddl.py Project: zuxfoucault/ibis

    def test_create_table_delimited(self):
        path = "/path/to/files/"
        schema = ibis.schema([("a", "string"), ("b", "int32"), ("c", "double"), ("d", "decimal(12,2)")])

        stmt = ddl.CreateTableDelimited(
            "new_table",
            path,
            schema,
            delimiter="|",
            escapechar="\\",
            lineterminator="\0",
            database="foo",
            can_exist=True,
        )

        result = stmt.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`a` string,
 `b` int,
 `c` double,
 `d` decimal(12,2))
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
ESCAPED BY '\\'
LINES TERMINATED BY '\0'
LOCATION '{0}'""".format(
            path
        )
        assert result == expected

Example #20

0

Show file

File: test_exprs.py Project: deepfield/ibis

def test_filter_with_analytic():
    x = ibis.table(ibis.schema([('col', 'int32')]), 'x')
    with_filter_col = x[x.columns + [ibis.null().name('filter')]]
    filtered = with_filter_col[with_filter_col['filter'].isnull()]
    subquery = filtered[filtered.columns]

    with_analytic = subquery[['col', subquery.count().name('analytic')]]
    expr = with_analytic[with_analytic.columns]

    result = ibis.impala.compile(expr)
    expected = """\
SELECT `col`, `analytic`
FROM (
  SELECT `col`, count(*) OVER () AS `analytic`
  FROM (
    SELECT `col`, `filter`
    FROM (
      SELECT *
      FROM (
        SELECT `col`, NULL AS `filter`
        FROM x
      ) t3
      WHERE `filter` IS NULL
    ) t2
  ) t1
) t0"""

    assert result == expected

Example #21

0

Show file

File: test_sqlalchemy.py Project: thekingofhero/ibis

    def test_sqla_schema_conversion(self):
        typespec = [
            # name, type, nullable
            ("smallint", sat.SmallInteger, False, dt.int16),
            ("int", sat.Integer, True, dt.int32),
            ("integer", sat.INTEGER(), True, dt.int64),
            ("bigint", sat.BigInteger, False, dt.int64),
            ("real", sat.REAL, True, dt.double),
            ("bool", sat.Boolean, True, dt.boolean),
            ("timestamp", sat.DateTime, True, dt.timestamp),
        ]

        sqla_types = []
        ibis_types = []
        for name, t, nullable, ibis_type in typespec:
            sqla_type = sa.Column(name, t, nullable=nullable)
            sqla_types.append(sqla_type)
            ibis_types.append((name, ibis_type(nullable)))

        table = sa.Table("tname", self.meta, *sqla_types)

        schema = alch.schema_from_table(table)
        expected = ibis.schema(ibis_types)

        assert_equal(schema, expected)

Example #22

0

Show file

File: test_pandas_interop.py Project: cloudera/ibis

def test_timestamp_with_timezone():
    df = pd.DataFrame(
        {'A': pd.date_range('20130101', periods=3, tz='US/Eastern')}
    )
    schema = sch.infer(df)
    expected = ibis.schema([('A', "timestamp('US/Eastern')")])
    assert schema.equals(expected)
    assert schema.types[0].equals(dt.Timestamp('US/Eastern'))

Example #23

0

Show file

File: test_pandas_interop.py Project: zuxfoucault/ibis

 def test_dtype_datetime64(self):
     df = pd.DataFrame({
         'col': [pd.Timestamp('2010-11-01 00:01:00'),
                 pd.Timestamp('2010-11-01 00:02:00.1000'),
                 pd.Timestamp('2010-11-01 00:03:00.300000')]})
     inferred = pandas_to_ibis_schema(df)
     expected = ibis.schema([('col', 'timestamp')])
     assert inferred == expected

Example #24

0

Show file

File: test_pandas_interop.py Project: zuxfoucault/ibis

 def test_dtype_timedelta64(self):
     df = pd.DataFrame({
         'col': [pd.Timedelta('1 days'),
                 pd.Timedelta('-1 days 2 min 3us'),
                 pd.Timedelta('-2 days +23:57:59.999997')]})
     inferred = pandas_to_ibis_schema(df)
     expected = ibis.schema([('col', 'int64')])
     assert inferred == expected

Example #25

0

Show file

File: test_impala_e2e.py Project: megvuyyuru/ibis

    def test_query_parquet_file_like_table(self):
        hdfs_path = pjoin(self.test_data_dir, "parquet/tpch_region")

        ex_schema = ibis.schema([("r_regionkey", "int16"), ("r_name", "string"), ("r_comment", "string")])

        table = self.con.parquet_file(hdfs_path, like_table="tpch_region")

        assert_equal(table.schema(), ex_schema)

Example #26

0

Show file

File: test_partition.py Project: laserson/ibis

 def test_is_partitioned(self):
     schema = ibis.schema([('foo', 'string'),
                           ('year', 'int32'),
                           ('month', 'int16')])
     name = _tmp_name()
     self.db.create_table(name, schema=schema,
                          partition=['year', 'month'])
     assert self.db.table(name).is_partitioned

Example #27

0

Show file

File: impalamgr.py Project: cloudera/ibis

def create_parquet_tables(con, executor):
    def create_table(table_name):
        logger.info('Creating %s', table_name)
        schema = schemas.get(table_name)
        path = os.path.join(ENV.test_data_dir, 'parquet', table_name)
        table = con.parquet_file(
            path,
            schema=schema,
            name=table_name,
            database=ENV.test_data_db,
            persist=True,
        )
        return table

    parquet_files = con.hdfs.ls(os.path.join(ENV.test_data_dir, 'parquet'))
    schemas = {
        'functional_alltypes': ibis.schema(
            [
                ('id', 'int32'),
                ('bool_col', 'boolean'),
                ('tinyint_col', 'int8'),
                ('smallint_col', 'int16'),
                ('int_col', 'int32'),
                ('bigint_col', 'int64'),
                ('float_col', 'float'),
                ('double_col', 'double'),
                ('date_string_col', 'string'),
                ('string_col', 'string'),
                ('timestamp_col', 'timestamp'),
                ('year', 'int32'),
                ('month', 'int32'),
            ]
        ),
        'tpch_region': ibis.schema(
            [
                ('r_regionkey', 'int16'),
                ('r_name', 'string'),
                ('r_comment', 'string'),
            ]
        ),
    }
    return (
        executor.submit(create_table, table_name)
        for table_name in parquet_files
    )

Example #28

0

Show file

File: test_ddl.py Project: koverholt/ibis

    def test_query_parquet_file_like_table(self):
        hdfs_path = pjoin(self.test_data_dir, 'parquet/tpch_region')

        ex_schema = ibis.schema([('r_regionkey', 'int16'),
                                 ('r_name', 'string'),
                                 ('r_comment', 'string')])

        table = self.con.parquet_file(hdfs_path, like_table='tpch_region')

        assert_equal(table.schema(), ex_schema)

Example #29

0

Show file

File: test_impala_e2e.py Project: megvuyyuru/ibis

    def test_query_parquet_infer_schema(self):
        hdfs_path = pjoin(self.test_data_dir, "parquet/tpch_region")
        table = self.con.parquet_file(hdfs_path)

        # NOTE: the actual schema should have an int16, but bc this is being
        # inferred from a parquet file, which has no notion of int16, the
        # inferred schema will have an int32 instead.
        ex_schema = ibis.schema([("r_regionkey", "int32"), ("r_name", "string"), ("r_comment", "string")])

        assert_equal(table.schema(), ex_schema)

Example #30

0

Show file

File: test_partition.py Project: laserson/ibis

    def test_create_partitioned_separate_schema(self):
        schema = ibis.schema([('day', 'int8'),
                              ('value', 'double')])
        part_schema = ibis.schema([('year', 'int32'),
                                   ('month', 'int8')])

        name = _tmp_name()
        self.con.create_table(name, schema=schema, partition=part_schema)
        self.temp_tables.append(name)

        # the partition column get put at the end of the table
        ex_schema = ibis.schema([('day', 'int8'),
                                 ('value', 'double'),
                                 ('year', 'int32'),
                                 ('month', 'int8')])
        table_schema = self.con.get_schema(name)
        assert_equal(table_schema, ex_schema)

        partition_schema = self.con.table(name).partition_schema()
        assert_equal(partition_schema, part_schema)

Example #31

0

Show file

    def test_create_table_with_location(self):
        path = '/path/to/table'
        schema = ibis.schema([('foo', 'string'),
                              ('bar', 'int8'),
                              ('baz', 'int16')])
        statement = ddl.CreateTableWithSchema('another_table', schema,
                                              can_exist=False,
                                              format='parquet',
                                              path=path, database='foo')
        result = statement.compile()

        expected = """\
CREATE TABLE foo.`another_table`
(`foo` string,
 `bar` tinyint,
 `baz` smallint)
STORED AS PARQUET
LOCATION '{0}'""".format(path)
        assert result == expected

Example #32

0

Show file

File: test_client.py Project: admariner/ibis

def test_load_data_sqlalchemy(backend, con, temp_table):
    sch = ibis.schema([
        ('first_name', 'string'),
        ('last_name', 'string'),
        ('department_name', 'string'),
        ('salary', 'float64'),
    ])

    df = pd.DataFrame({
        'first_name': ['A', 'B', 'C'],
        'last_name': ['D', 'E', 'F'],
        'department_name': ['AA', 'BB', 'CC'],
        'salary': [100.0, 200.0, 300.0],
    })
    con.create_table(temp_table, schema=sch)
    con.load_data(temp_table, df, if_exists='append')
    result = con.table(temp_table).execute()

    backend.assert_frame_equal(df, result)

Example #33

0

Show file

 def batting(self) -> ir.TableExpr:
     schema = ibis.schema(
         [
             ('lgID', dt.string),
             ('G', dt.float64),
             ('AB', dt.float64),
             ('R', dt.float64),
             ('H', dt.float64),
             ('X2B', dt.float64),
             ('X3B', dt.float64),
             ('HR', dt.float64),
             ('RBI', dt.float64),
             ('SB', dt.float64),
             ('CS', dt.float64),
             ('BB', dt.float64),
             ('SO', dt.float64),
         ]
     )
     return self.connection.table('batting', schema=schema)

Example #34

0

Show file

def test_nullable_input_output(con, backend, temp_table):
    # - Impala, PySpark and Spark non-nullable issues #2138 and #2137
    if not hasattr(con, 'create_table') or not hasattr(con, 'drop_table'):
        pytest.xfail(
            '{} backend doesn\'t have create_table or drop_table methods.')

    sch = ibis.schema([
        ('foo', 'int64'),
        ('bar', ibis.expr.datatypes.int64(nullable=False)),
        ('baz', 'boolean*'),
    ])

    con.create_table(temp_table, schema=sch)

    t = con.table(temp_table)

    assert t.schema().types[0].nullable
    assert not t.schema().types[1].nullable
    assert t.schema().types[2].nullable

Example #35

0

Show file

File: test_partition.py Project: xmnlab/ibis

    def test_add_drop_partition_no_location(self):
        schema = ibis.schema([('foo', 'string'), ('year', 'int32'),
                              ('month', 'int16')])
        name = _tmp_name()
        self.db.create_table(name, schema=schema, partition=['year', 'month'])

        table = self.db.table(name)

        part = {'year': 2007, 'month': 4}

        table.add_partition(part)

        assert len(table.partitions()) == 2

        table.drop_partition(part)

        assert len(table.partitions()) == 1

        table.drop()

Example #36

0

Show file

    def test_query_delimited_file_directory(self):
        hdfs_path = pjoin(self.test_data_dir, 'csv')

        schema = ibis.schema([('foo', 'string'),
                              ('bar', 'double'),
                              ('baz', 'int8')])
        name = 'delimited_table_test1'
        table = self.con.delimited_file(hdfs_path, schema, name=name,
                                        database=self.tmp_db,
                                        delimiter=',')
        try:
            expr = (table
                    [table.bar > 0]
                    .group_by('foo')
                    .aggregate([table.bar.sum().name('sum(bar)'),
                                table.baz.sum().name('mean(baz)')]))
            expr.execute()
        finally:
            self.con.drop_table(name, database=self.tmp_db)

Example #37

0

Show file

    def test_add_drop_partition(self):
        pytest.skip('HIVE-12613')
        schema = ibis.schema([('foo', 'string'), ('year', 'int32'),
                              ('month', 'int16')])
        name = _tmp_name()
        self.db.create_table(name, schema=schema, partition=['year', 'month'])

        table = self.db.table(name)

        part = {'year': 2007, 'month': 4}

        path = '/tmp/tmp-{0}'.format(util.guid())
        table.add_partition(part, location=path)

        assert len(table.partitions()) == 2

        table.drop_partition(part)

        assert len(table.partitions()) == 1

Example #38

0

Show file

File: test_partition.py Project: sanjc/ibis

def test_add_drop_partition_hive_bug(con, temp_table):
    schema = ibis.schema([('foo', 'string'), ('year', 'int32'),
                          ('month', 'int16')])
    name = temp_table
    con.create_table(name, schema=schema, partition=['year', 'month'])

    table = con.table(name)

    part = {'year': 2007, 'month': 4}

    path = '/tmp/{}'.format(util.guid())

    table.add_partition(part, location=path)

    assert len(table.partitions()) == 2

    table.drop_partition(part)

    assert len(table.partitions()) == 1

Example #39

0

Show file

File: test_parquet_ddl.py Project: randxie/ibis

def test_query_parquet_file_with_schema(con, test_data_dir):
    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')

    schema = ibis.schema([
        ('r_regionkey', 'int16'),
        ('r_name', 'string'),
        ('r_comment', 'string'),
    ])

    table = con.parquet_file(hdfs_path, schema=schema)

    name = table.op().name

    # table exists
    con.table(name)

    expr = table.r_name.value_counts()
    expr.execute()

    assert table.count().execute() == 5

Example #40

0

Show file

File: test_parquet_ddl.py Project: randxie/ibis

def test_persist_parquet_file_with_name(con, test_data_dir, temp_table_db):
    import gc

    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')

    tmp_db, name = temp_table_db
    schema = ibis.schema([
        ('r_regionkey', 'int16'),
        ('r_name', 'string'),
        ('r_comment', 'string'),
    ])
    con.parquet_file(hdfs_path,
                     schema=schema,
                     name=name,
                     database=tmp_db,
                     persist=True)
    gc.collect()

    # table still exists
    con.table(name, database=tmp_db)

Example #41

0

Show file

def test_mutation_fusion_no_overwrite():
    """Test fusion with chained mutation that doesn't overwrite existing
    columns.
    """
    t = ibis.table(ibis.schema([('col', 'int32')]), 't')

    result = t
    result = result.mutate(col1=t['col'] + 1)
    result = result.mutate(col2=t['col'] + 2)
    result = result.mutate(col3=t['col'] + 3)

    first_selection = result

    assert len(result.op().selections) == 4
    assert (first_selection.op().selections[1].equals(
        (t['col'] + 1).name('col1')))
    assert (first_selection.op().selections[2].equals(
        (t['col'] + 2).name('col2')))
    assert (first_selection.op().selections[3].equals(
        (t['col'] + 3).name('col3')))

Example #42

0

Show file

    def create_table(self, name, expr=None, schema=None, database=None):
        if database is not None and database != self.engine.url.database:
            raise NotImplementedError(
                'Creating tables from a different database is not yet '
                'implemented')

        if expr is None and schema is None:
            raise ValueError('You must pass either an expression or a schema')

        if expr is not None and schema is not None:
            if not expr.schema().equals(ibis.schema(schema)):
                raise TypeError(
                    'Expression schema is not equal to passed schema. '
                    'Try passing the expression without the schema')
        t = table_from_schema(name, self.meta, schema or expr.schema())
        with self.con.begin() as bind:
            t.create(bind=bind)
            if expr is not None:
                bind.execute(t.insert().from_select(list(expr.columns),
                                                    expr.compile()))

Example #43

0

Show file

File: conftest.py Project: ibis-project/ibis

def impala_create_test_database(con, env):
    con.drop_database(env.test_data_db, force=True)
    con.create_database(env.test_data_db)
    con.create_table(
        'alltypes',
        schema=ibis.schema(
            [
                ('a', 'int8'),
                ('b', 'int16'),
                ('c', 'int32'),
                ('d', 'int64'),
                ('e', 'float'),
                ('f', 'double'),
                ('g', 'string'),
                ('h', 'boolean'),
                ('i', 'timestamp'),
            ]
        ),
        database=env.test_data_db,
    )

Example #44

0

Show file

def test_create_table_schema(con, temp_table, properties):
    schema = ibis.schema([
        ('a', 'float'),
        ('b', 'double'),
        ('c', 'int8'),
        ('d', 'int16'),
        ('e', 'int32'),
        ('f', 'int64'),
        ('x', 'point'),
        ('y', 'linestring'),
        ('z', 'polygon'),
        ('w', 'multipolygon'),
    ])

    con.create_table(temp_table, schema=schema, **properties)

    t = con.table(temp_table)

    for k, i_type in t.schema().items():
        assert schema[k] == i_type

Example #45

0

Show file

def get_type(expr):
    try:
        return str(expr.type())
    except (AttributeError, NotImplementedError):
        pass

    try:
        schema = expr.schema()
    except (AttributeError, NotImplementedError):
        try:
            # As a last resort try get the name of the output_type class
            return expr.op().output_type().__name__
        except (AttributeError, NotImplementedError):
            return '\u2205'  # empty set character
    except com.IbisError:
        op = expr.op()
        assert isinstance(op, ops.Join)
        left_table_name = getattr(op.left.op(), 'name', None) or ops.genname()
        left_schema = op.left.schema()
        right_table_name = (
            getattr(op.right.op(), 'name', None) or ops.genname()
        )
        right_schema = op.right.schema()
        pairs = [
            ('{}.{}'.format(left_table_name, left_column), type)
            for left_column, type in left_schema.items()
        ] + [
            ('{}.{}'.format(right_table_name, right_column), type)
            for right_column, type in right_schema.items()
        ]
        schema = ibis.schema(pairs)

    return (
        ''.join(
            '<BR ALIGN="LEFT" />  <I>{}</I>: {}'.format(
                escape(name), escape(str(type))
            )
            for name, type in zip(schema.names, schema.types)
        )
        + '<BR ALIGN="LEFT" />'
    )

Example #46

0

Show file

File: test_ddl.py Project: shshe/ibis

    def test_load_data_partitioned(self):
        path = '/path/to/data'
        part = {'year': 2007, 'month': 7}
        part_schema = ibis.schema([('year', 'int32'), ('month', 'int32')])
        stmt = ddl.LoadData('functional_alltypes', path,
                            database='foo',
                            partition=part,
                            partition_schema=part_schema)

        result = stmt.compile()
        expected = """\
LOAD DATA INPATH '/path/to/data' INTO TABLE foo.`functional_alltypes`
PARTITION (year=2007, month=7)"""
        assert result == expected

        stmt.overwrite = True
        result = stmt.compile()
        expected = """\
LOAD DATA INPATH '/path/to/data' OVERWRITE INTO TABLE foo.`functional_alltypes`
PARTITION (year=2007, month=7)"""
        assert result == expected

Example #47

0

Show file

File: test_schema.py Project: zhenyu-captain/ibis

def test_convert_parquet(parquet_schema):
    strings = [dt.string, dt.string, dt.string]

    # uint32, int8, int16 stored as upcasted types
    types = ([
        dt.uint8,
        dt.uint16,
        dt.int64,
        dt.uint64,
        dt.int16,
        dt.int16,
        dt.int32,
        dt.int64,
        dt.float32,
        dt.float64,
        dt.boolean,
        dt.timestamp,
    ] + strings + [dt.binary, dt.int64])
    names = [
        'uint8',
        'uint16',
        'uint32',
        'uint64',
        'int8',
        'int16',
        'int32',
        'int64',
        'float32',
        'float64',
        'bool',
        'datetime',
        'str',
        'str_with_nulls',
        'empty_str',
        'bytes',
    ]
    expected = ibis.schema(zip(names, types))

    result = ibis.infer_schema(parquet_schema)
    assert result == expected

Example #48

0

Show file

def create_test_database(con):
    if con.exists_database(ENV.test_data_db):
        con.drop_database(ENV.test_data_db, force=True)
    con.create_database(ENV.test_data_db)
    logger.info('Created database %s', ENV.test_data_db)

    con.create_table(
        'alltypes',
        schema=ibis.schema([
            ('a', 'int8'),
            ('b', 'int16'),
            ('c', 'int32'),
            ('d', 'int64'),
            ('e', 'float'),
            ('f', 'double'),
            ('g', 'string'),
            ('h', 'boolean'),
            ('i', 'timestamp'),
        ]),
        database=ENV.test_data_db,
    )
    logger.info('Created empty table %s.`alltypes`', ENV.test_data_db)

Example #49

0

Show file

File: test_ddl.py Project: descarteslabs/ibis-orig

def test_query_delimited_file_directory(con, test_data_dir, tmp_db):
    hdfs_path = pjoin(test_data_dir, 'csv')

    schema = ibis.schema(
        [('foo', 'string'), ('bar', 'double'), ('baz', 'int8')]
    )
    name = 'delimited_table_test1'
    table = con.delimited_file(
        hdfs_path, schema, name=name, database=tmp_db, delimiter=','
    )

    expr = (
        table[table.bar > 0]
        .group_by('foo')
        .aggregate(
            [
                table.bar.sum().name('sum(bar)'),
                table.baz.sum().name('mean(baz)'),
            ]
        )
    )
    assert expr.execute() is not None

Example #50

0

Show file

def test_load_data_sqlalchemy(backend, con, temp_table):
    if not isinstance(con.dialect(), ibis.sql.alchemy.AlchemyDialect):
        pytest.skip('{} is not a SQL Alchemy Client.'.format(backend.name))

    sch = ibis.schema([
        ('first_name', 'string'),
        ('last_name', 'string'),
        ('department_name', 'string'),
        ('salary', 'float64'),
    ])

    df = pd.DataFrame({
        'first_name': ['A', 'B', 'C'],
        'last_name': ['D', 'E', 'F'],
        'department_name': ['AA', 'BB', 'CC'],
        'salary': [100.0, 200.0, 300.0],
    })
    con.create_table(temp_table, schema=sch)
    con.load_data(temp_table, df, if_exists='append')
    result = con.table(temp_table).execute()

    backend.assert_frame_equal(df, result)

Example #51

0

Show file

File: test_kudu_support.py Project: missing-semicolon/ibis

    def test_create_external_ddl(self):
        schema = ibis.schema([('key1', 'int32'), ('key2', 'int64'),
                              ('value1', 'double')])

        stmt = ksupport.CreateTableKudu(
            'impala_name', 'kudu_name',
            ['master1.d.com:7051', 'master2.d.com:7051'], schema,
            ['key1', 'key2'])

        result = stmt.compile()
        expected = """\
CREATE EXTERNAL TABLE `impala_name`
(`key1` int,
 `key2` bigint,
 `value1` double)
TBLPROPERTIES (
  'kudu.key_columns'='key1, key2',
  'kudu.master_addresses'='master1.d.com:7051, master2.d.com:7051',
  'kudu.table_name'='kudu_name',
  'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'
)"""
        assert result == expected

Example #52

0

Show file

def test_convert_parquet(parquet_schema):
    # TODO(jreback)
    # not entirely sure this is correct
    # should these be strings in py2?
    if PY2:
        strings = [dt.binary, dt.binary, dt.binary]
    else:
        strings = [dt.string, dt.string, dt.string]

    # uint32, int8, int16 stored as upcasted types
    types = [
        dt.uint8, dt.uint16, dt.int64, dt.uint64, dt.int16, dt.int16, dt.int32,
        dt.int64, dt.float32, dt.float64, dt.boolean, dt.timestamp
    ] + strings + [dt.binary, dt.int64]
    names = [
        'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32',
        'int64', 'float32', 'float64', 'bool', 'datetime', 'str',
        'str_with_nulls', 'empty_str', 'bytes', '__index_level_0__'
    ]
    expected = ibis.schema(zip(names, types))

    result = ibis.infer_schema(parquet_schema)
    assert result == expected

Example #53

0

Show file

    def test_create_table_parquet_with_schema(self):
        directory = '/path/to/'

        schema = ibis.schema([('foo', 'string'), ('bar', 'int8'),
                              ('baz', 'int16')])

        statement = ddl.CreateTableParquet('new_table',
                                           directory,
                                           schema=schema,
                                           external=True,
                                           can_exist=True,
                                           database='foo')

        result = statement.compile()
        expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`foo` string,
 `bar` tinyint,
 `baz` smallint)
STORED AS PARQUET
LOCATION '{0}'""".format(directory)

        assert result == expected

Example #54

0

Show file

def test_create_table_delimited():
    path = '/path/to/files/'
    schema = ibis.schema(
        [
            ('a', 'string'),
            ('b', 'int32'),
            ('c', 'double'),
            ('d', 'decimal(12, 2)'),
        ]
    )

    stmt = ddl.CreateTableDelimited(
        'new_table',
        path,
        schema,
        delimiter='|',
        escapechar='\\',
        lineterminator='\0',
        database='foo',
        can_exist=True,
    )

    result = stmt.compile()
    expected = """\
CREATE EXTERNAL TABLE IF NOT EXISTS foo.`new_table`
(`a` string,
 `b` int,
 `c` double,
 `d` decimal(12, 2))
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
ESCAPED BY '\\'
LINES TERMINATED BY '\0'
LOCATION '{0}'""".format(
        path
    )
    assert result == expected

Example #55

0

Show file

File: test_numeric.py Project: vijoin/ibis

def test_sa_default_numeric_precision_and_scale(con, backend, dialects,
                                                default_precisions,
                                                default_scales):
    # TODO: find a better way to access ibis.sql.alchemy
    import ibis.sql.alchemy as alch

    dialect = dialects[backend.name]
    default_precision = default_precisions[backend.name]
    default_scale = default_scales[backend.name]

    typespec = [
        # name, sqlalchemy type, ibis type
        ('n1', dialect.NUMERIC, dt.Decimal(default_precision, default_scale)),
        ('n2', dialect.NUMERIC(5), dt.Decimal(5, default_scale)),
        ('n3', dialect.NUMERIC(None, 4), dt.Decimal(default_precision, 4)),
        ('n4', dialect.NUMERIC(10, 2), dt.Decimal(10, 2)),
    ]

    sqla_types = []
    ibis_types = []
    for name, t, ibis_type in typespec:
        sqla_type = sa.Column(name, t, nullable=True)
        sqla_types.append(sqla_type)
        ibis_types.append((name, ibis_type(nullable=True)))

    # Create a table with the numeric types.
    table_name = 'test_sa_default_param_decimal'
    engine = con.con
    table = sa.Table(table_name, sa.MetaData(bind=engine), *sqla_types)

    # Check that we can correctly recover the default precision and scale.
    schema = alch.schema_from_table(table)
    expected = ibis.schema(ibis_types)

    assert_equal(schema, expected)
    con.drop_table(table_name, force=True)

Example #56

0

Show file

File: test_partition.py Project: jelitox/ibis

def test_add_drop_partition_owned_by_impala(hdfs, con, temp_table):
    schema = ibis.schema([('foo', 'string'), ('year', 'int32'),
                          ('month', 'int16')])
    name = temp_table
    con.create_table(name, schema=schema, partition=['year', 'month'])

    table = con.table(name)

    part = {'year': 2007, 'month': 4}

    subdir = util.guid()
    basename = util.guid()
    path = f'/tmp/{subdir}/{basename}'

    hdfs.mkdir(f'/tmp/{subdir}')
    hdfs.chown(f'/tmp/{subdir}', owner='impala', group='supergroup')

    table.add_partition(part, location=path)

    assert len(table.partitions()) == 2

    table.drop_partition(part)

    assert len(table.partitions()) == 1

Example #57

0

Show file

File: test_client.py Project: zhenyu-captain/ibis

def test_create_table_schema(con):
    t_name = 'mytable'

    con.drop_table(t_name, force=True)

    schema = ibis.schema([('a', 'float'), ('b', 'double'), ('c', 'int32'),
                          ('d', 'int64'), ('x', 'point'), ('y', 'linestring'),
                          ('z', 'polygon'), ('w', 'multipolygon')])

    con.create_table(t_name, schema=schema)

    try:
        t = con.table(t_name)

        assert isinstance(t.a, ir.FloatingColumn)
        assert isinstance(t.b, ir.FloatingColumn)
        assert isinstance(t.c, ir.IntegerColumn)
        assert isinstance(t.d, ir.IntegerColumn)
        assert isinstance(t.x, ir.PointColumn)
        assert isinstance(t.y, ir.LineStringColumn)
        assert isinstance(t.z, ir.PolygonColumn)
        assert isinstance(t.w, ir.MultiPolygonColumn)
    finally:
        con.drop_table(t_name)

Example #58

0

Show file

File: test_client.py Project: vnlitvinov/ibis

def test_read_csv(con, temp_table, filename):
    schema = ibis.schema(
        [
            ('index', 'int64'),
            ('Unnamed__0', 'int64'),
            ('id', 'int32'),
            ('bool_col', 'bool'),
            ('tinyint_col', 'int16'),
            ('smallint_col', 'int16'),
            ('int_col', 'int32'),
            ('bigint_col', 'int64'),
            ('float_col', 'float32'),
            ('double_col', 'double'),
            ('date_string_col', 'string'),
            ('string_col', 'string'),
            ('timestamp_col', 'timestamp'),
            ('year_', 'int32'),
            ('month_', 'int32'),
        ]
    )
    con.create_table(temp_table, schema=schema)

    # prepare csv file inside omnisci docker container
    # if the file exists, then it will be overwritten
    con._execute(
        "COPY (SELECT * FROM functional_alltypes) TO '{}'".format(filename)
    )

    db = con.database()
    table = db.table(temp_table)
    table.read_csv(filename, header=False, quotechar='"', delimiter=",")

    df_read_csv = table.execute()
    df_expected = db.table("functional_alltypes").execute()

    pd.testing.assert_frame_equal(df_expected, df_read_csv)

Example #59

0

Show file

def test_sqla_schema_conversion(con):
    typespec = [
        # name, type, nullable
        ('smallint', sat.SmallInteger, False, dt.int16),
        ('int', sat.Integer, True, dt.int32),
        ('integer', sat.INTEGER(), True, dt.int32),
        ('bigint', sat.BigInteger, False, dt.int64),
        ('real', sat.REAL, True, dt.float32),
        ('bool', sat.Boolean, True, dt.bool),
        ('timestamp', sat.DateTime, True, dt.timestamp),
    ]

    sqla_types = []
    ibis_types = []
    for name, t, nullable, ibis_type in typespec:
        sqla_types.append(sa.Column(name, t, nullable=nullable))
        ibis_types.append((name, ibis_type(nullable=nullable)))

    table = sa.Table('tname', con.meta, *sqla_types)

    schema = schema_from_table(table)
    expected = ibis.schema(ibis_types)

    assert_equal(schema, expected)

Example #60

0

Show file

File: test_client.py Project: yssource/ibis

def test_query_schema(backend, con, alltypes, expr_fn, expected):
    if not hasattr(con, '_build_ast'):
        pytest.skip(
            '{} backend has no _build_ast method'.format(
                type(backend).__name__
            )
        )

    expr = expr_fn(alltypes)

    # we might need a public API for it
    ast = con._build_ast(expr, backend.make_context())
    query = con.query_class(con, ast)
    schema = query.schema()

    # clickhouse columns has been defined as non-nullable
    # whereas other backends don't support non-nullable columns yet
    expected = ibis.schema(
        [
            (name, dtype(nullable=schema[name].nullable))
            for name, dtype in expected
        ]
    )
    assert query.schema().equals(expected)