Python infer Examples, ibis.expr.schema.infer Python Examples

Example #1

0

Show file

File: test_pandas_interop.py Project: cloudera/ibis

def test_timestamp_with_timezone():
    df = pd.DataFrame(
        {'A': pd.date_range('20130101', periods=3, tz='US/Eastern')}
    )
    schema = sch.infer(df)
    expected = ibis.schema([('A', "timestamp('US/Eastern')")])
    assert schema.equals(expected)
    assert schema.types[0].equals(dt.Timestamp('US/Eastern'))

Example #2

0

Show file

File: hdf5.py Project: cloudera/ibis

    def table(self, name, path):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        # get the schema
        with pd.HDFStore(str(path), mode='r') as store:
            df = store.select(name, start=0, stop=0)
            schema = sch.infer(df)

        t = self.table_class(name, schema, self).to_expr()
        self.dictionary[name] = path
        return t

Example #3

0

Show file

File: test_pandas_interop.py Project: cloudera/ibis

def test_insert(con, temp_table_db, exhaustive_df):
    tmp_db, table_name = temp_table_db
    schema = sch.infer(exhaustive_df)

    con.create_table(table_name, database=tmp_db, schema=schema)

    con.insert(table_name, exhaustive_df.iloc[:4], database=tmp_db)
    con.insert(table_name, exhaustive_df.iloc[4:], database=tmp_db)

    table = con.table(table_name, database=tmp_db)

    result = (
        table.execute().sort_values(by='tinyint_col').reset_index(drop=True)
    )
    assert_frame_equal(result, exhaustive_df)

Example #4

0

Show file

File: client.py Project: cloudera/ibis

    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        return sch.infer(self.dictionary[table_name])

Example #5

0

Show file

File: parquet.py Project: cloudera/ibis

    def table(self, name, path):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        if path is None:
            path = self.root

        # get the schema
        f = path / "{}.parquet".format(name)

        parquet_file = pq.ParquetFile(str(f))
        schema = sch.infer(parquet_file.schema)

        table = self.table_class(name, schema, self).to_expr()
        self.dictionary[name] = f

        return table

Example #6

0

Show file

File: csv.py Project: cloudera/ibis

    def table(self, name, path=None, schema=None, **kwargs):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        if path is None:
            path = self.root

        # get the schema
        f = path / "{}.{}".format(name, self.extension)

        # read sample
        schema = schema or sch.schema([])
        sample = _read_csv(f, schema=schema, header=0, nrows=50, **kwargs)

        # infer sample's schema and define table
        schema = sch.infer(sample)
        table = self.table_class(name, schema, self, **kwargs).to_expr()

        self.dictionary[name] = f

        return table

Example #7

0

Show file

File: client.py Project: martint/ibis

    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string
          Spark does not have a database argument for its table() method,
          so this must be None

        Returns
        -------
        schema : ibis Schema
        """
        if database is not None:
            raise com.UnsupportedArgumentError(
                'Spark does not support database param for table')

        df = self._session.table(table_name)

        return sch.infer(df)

Example #8

0

Show file

File: __init__.py Project: jelitox/ibis

    def table(
        self,
        name: str,
        schema: sch.Schema | None = None,
    ) -> ir.TableExpr:
        """Get an ibis expression representing a DataFusion table.

        Parameters
        ----------
        name
            The name of the table to retreive
        schema
            An optional schema for the table

        Returns
        -------
        TableExpr
            A table expression
        """
        catalog = self._context.catalog()
        database = catalog.database('public')
        table = database.table(name)
        schema = sch.infer(table.schema)
        return self.table_class(name, schema, self).to_expr()

Example #9

0

Show file

File: client.py Project: descarteslabs/ibis-bigquery

 def get_schema(self, name, database=None):
     project, dataset = self._parse_project_and_dataset(database)
     table_ref = self.client.dataset(dataset, project=project).table(name)
     bq_table = self.client.get_table(table_ref)
     return sch.infer(bq_table)

Example #10

0

Show file

File: pandas_interop.py Project: cloudera/ibis

 def get_schema(self):
     # define a temporary table using delimited data
     return sch.infer(self.df)

Example #11

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_categorical():
    df = pd.DataFrame({'col': ['a', 'b', 'c', 'a']}, dtype='category')
    inferred = sch.infer(df)
    expected = ibis.schema([('col', dt.Category())])
    assert inferred == expected

Example #12

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_uint64():
    df = pd.DataFrame({'col': np.uint64([666, 2, 3])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'uint64')])
    assert inferred == expected

Example #13

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_uint16():
    df = pd.DataFrame({'col': np.uint16([5569, 1, 33])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'uint16')])
    assert inferred == expected

Example #14

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_float32():
    df = pd.DataFrame({'col': np.float32([45e-3, -0.4, 99.0])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'float')])
    assert inferred == expected

Example #15

0

Show file

 def get_schema(self, name, database=None):
     table_id = self._fully_qualified_name(name, database)
     table_ref = bq.TableReference.from_string(table_id)
     bq_table = self.client.get_table(table_ref)
     return sch.infer(bq_table)

Example #16

0

Show file

def test_schema_infer(col_data, schema_type):
    df = pd.DataFrame({'col': col_data})

    inferred = sch.infer(df)
    expected = ibis.schema([('col', schema_type)])
    assert inferred == expected

Example #17

0

Show file

File: test_schema.py Project: cloudera/ibis

def test_infer_exhaustive_dataframe():
    df = pd.DataFrame(
        {
            'bigint_col': np.array(
                [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'
            ),
            'bool_col': np.array(
                [
                    True,
                    False,
                    True,
                    False,
                    True,
                    None,
                    True,
                    False,
                    True,
                    False,
                ],
                dtype=np.bool_,
            ),
            'bool_obj_col': np.array(
                [
                    True,
                    False,
                    np.nan,
                    False,
                    True,
                    np.nan,
                    True,
                    np.nan,
                    True,
                    False,
                ],
                dtype=np.object_,
            ),
            'date_string_col': [
                '11/01/10',
                None,
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
            ],
            'double_col': np.array(
                [
                    0.0,
                    10.1,
                    np.nan,
                    30.299999999999997,
                    40.399999999999999,
                    50.5,
                    60.599999999999994,
                    70.700000000000003,
                    80.799999999999997,
                    90.899999999999991,
                ],
                dtype=np.float64,
            ),
            'float_col': np.array(
                [
                    np.nan,
                    1.1000000238418579,
                    2.2000000476837158,
                    3.2999999523162842,
                    4.4000000953674316,
                    5.5,
                    6.5999999046325684,
                    7.6999998092651367,
                    8.8000001907348633,
                    9.8999996185302734,
                ],
                dtype='f4',
            ),
            'int_col': np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i4'),
            'month': [11, 11, 11, 11, 2, 11, 11, 11, 11, 11],
            'smallint_col': np.array(
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i2'
            ),
            'string_col': [
                '0',
                '1',
                None,
                'double , whammy',
                '4',
                '5',
                '6',
                '7',
                '8',
                '9',
            ],
            'timestamp_col': [
                pd.Timestamp('2010-11-01 00:00:00'),
                None,
                pd.Timestamp('2010-11-01 00:02:00.100000'),
                pd.Timestamp('2010-11-01 00:03:00.300000'),
                pd.Timestamp('2010-11-01 00:04:00.600000'),
                pd.Timestamp('2010-11-01 00:05:00.100000'),
                pd.Timestamp('2010-11-01 00:06:00.150000'),
                pd.Timestamp('2010-11-01 00:07:00.210000'),
                pd.Timestamp('2010-11-01 00:08:00.280000'),
                pd.Timestamp('2010-11-01 00:09:00.360000'),
            ],
            'tinyint_col': np.array(
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i1'
            ),
            'year': [
                2010,
                2010,
                2010,
                2010,
                2010,
                2009,
                2009,
                2009,
                2009,
                2009,
            ],
        }
    )

    expected = [
        ('bigint_col', dt.int64),
        ('bool_col', dt.boolean),
        ('bool_obj_col', dt.boolean),
        ('date_string_col', dt.string),
        ('double_col', dt.double),
        ('float_col', dt.float),
        ('int_col', dt.int32),
        ('month', dt.int64),
        ('smallint_col', dt.int16),
        ('string_col', dt.string),
        ('timestamp_col', dt.timestamp),
        ('tinyint_col', dt.int8),
        ('year', dt.int64),
    ]

    assert sch.infer(df) == ibis.schema(expected)

Example #18

0

Show file

File: test_schema.py Project: cloudera/ibis

def test_infer_simple_dataframe(column, expected_dtype):
    df = pd.DataFrame({'col': column})
    assert sch.infer(df) == ibis.schema([('col', expected_dtype)])

Example #19

0

Show file

File: __init__.py Project: gerrymanoim/ibis

 def get_schema(self, table_name, database=None):
     return sch.infer(self.dictionary[table_name])

Example #20

0

Show file

File: alchemy.py Project: cloudera/ibis

 def __init__(self, table, source, schema=None):
     schema = sch.infer(table, schema=schema)
     super().__init__(table.name, schema, source)
     self.sqla_table = table

Example #21

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_int64():
    df = pd.DataFrame({'col': np.int64([102, 67_228_734, -0])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'int64')])
    assert inferred == expected

Example #22

0

Show file

File: test_schema.py Project: zdog234/ibis

def test_infer_simple_dataframe(column, expected_dtype):
    df = pd.DataFrame({'col': column})
    assert sch.infer(df) == ibis.schema([('col', expected_dtype)])

Example #23

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_float64():
    df = pd.DataFrame({'col': np.float64([-3e43, 43.0, 10_000_000.0])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'double')])
    assert inferred == expected

Example #24

0

Show file

File: pandas_interop.py Project: zbrookle/ibis

 def get_schema(self):
     # define a temporary table using delimited data
     return sch.infer(self.df)

Example #25

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_uint32():
    df = pd.DataFrame({'col': np.uint32([100, 0, 6])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'uint32')])
    assert inferred == expected

Example #26

0

Show file

File: test_pandas_interop.py Project: xmnlab/ibis

 def test_dtype_uint8(self):
     df = pd.DataFrame({'col': np.uint8([3, 0, 16])})
     inferred = sch.infer(df)
     expected = ibis.schema([('col', 'uint8')])
     assert inferred == expected

Example #27

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_string():
    df = pd.DataFrame({'col': ['foo', 'bar', 'hello']})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'string')])
    assert inferred == expected

Example #28

0

Show file

 def table(self, name, schema=None):
     df = self.dictionary[name]
     schema = sch.infer(df, schema=schema)
     return self.table_class(name, schema, self).to_expr()

Example #29

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_bool():
    df = pd.DataFrame({'col': [True, False, False]})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'boolean')])
    assert inferred == expected

Example #30

0

Show file

 def __init__(self, table, source, schema=None):
     schema = sch.infer(table, schema=schema)
     super().__init__(table.name, schema, source)
     self.sqla_table = table

Example #31

0

Show file

File: __init__.py Project: cpcloud/ibis

 def table(self, name: str, schema: sch.Schema = None):
     df = self.dictionary[name]
     schema = sch.infer(df, schema=schema or self.schemas.get(name, None))
     return self.table_class(name, schema, self).to_expr()

Example #32

0

Show file

File: client.py Project: cloudera/ibis

 def table(self, name, schema=None):
     df = self.dictionary[name]
     schema = sch.infer(df, schema=schema)
     return PandasTable(name, schema, self).to_expr()

Example #33

0

Show file

def test_schema_infer(col_data, schema_type):
    df = dd.from_pandas(pd.DataFrame({'col': col_data}), npartitions=1)
    inferred = sch.infer(df)
    expected = ibis.schema([('col', schema_type)])
    assert inferred == expected

Example #34

0

Show file

File: client.py Project: shshe/ibis

 def get_schema(self, name, database=None):
     (table_id, dataset_id) = _ensure_split(name, database)
     bq_table = self._proxy.get_table(table_id, dataset_id)
     return sch.infer(bq_table)

Example #35

0

Show file

File: client.py Project: esloch/ibis

 def table(self, name, schema=None):
     df = self.dictionary[name]
     schema = sch.infer(df, schema=schema)
     return PandasTable(name, schema, self).to_expr()

Example #36

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_int8():
    df = pd.DataFrame({'col': np.int8([-3, 9, 17])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'int8')])
    assert inferred == expected

Example #37

0

Show file

 def table(self, name: str, schema: sch.Schema = None) -> DaskTable:
     df = self.dictionary[name]
     schema = sch.infer(df, schema=schema)
     return DaskTable(name, schema, self).to_expr()

Example #38

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_int16():
    df = pd.DataFrame({'col': np.int16([-5, 0, 12])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'int16')])
    assert inferred == expected

Example #39

0

Show file

def test_infer_exhaustive_dataframe():
    df = dd.from_pandas(
        pd.DataFrame({
            'bigint_col':
            np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'),
            'bool_col':
            np.array(
                [
                    True,
                    False,
                    True,
                    False,
                    True,
                    None,
                    True,
                    False,
                    True,
                    False,
                ],
                dtype=np.bool_,
            ),
            'bool_obj_col':
            np.array(
                [
                    True,
                    False,
                    np.nan,
                    False,
                    True,
                    np.nan,
                    True,
                    np.nan,
                    True,
                    False,
                ],
                dtype=np.object_,
            ),
            'date_string_col': [
                '11/01/10',
                None,
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
                '11/01/10',
            ],
            'double_col':
            np.array(
                [
                    0.0,
                    10.1,
                    np.nan,
                    30.299999999999997,
                    40.399999999999999,
                    50.5,
                    60.599999999999994,
                    70.700000000000003,
                    80.799999999999997,
                    90.899999999999991,
                ],
                dtype=np.float64,
            ),
            'float_col':
            np.array(
                [
                    np.nan,
                    1.1000000238418579,
                    2.2000000476837158,
                    3.2999999523162842,
                    4.4000000953674316,
                    5.5,
                    6.5999999046325684,
                    7.6999998092651367,
                    8.8000001907348633,
                    9.8999996185302734,
                ],
                dtype='f4',
            ),
            'int_col':
            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i4'),
            'month': [11, 11, 11, 11, 2, 11, 11, 11, 11, 11],
            'smallint_col':
            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i2'),
            'string_col': [
                '0',
                '1',
                None,
                'double , whammy',
                '4',
                '5',
                '6',
                '7',
                '8',
                '9',
            ],
            'timestamp_col': [
                pd.Timestamp('2010-11-01 00:00:00'),
                None,
                pd.Timestamp('2010-11-01 00:02:00.100000'),
                pd.Timestamp('2010-11-01 00:03:00.300000'),
                pd.Timestamp('2010-11-01 00:04:00.600000'),
                pd.Timestamp('2010-11-01 00:05:00.100000'),
                pd.Timestamp('2010-11-01 00:06:00.150000'),
                pd.Timestamp('2010-11-01 00:07:00.210000'),
                pd.Timestamp('2010-11-01 00:08:00.280000'),
                pd.Timestamp('2010-11-01 00:09:00.360000'),
            ],
            'tinyint_col':
            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='i1'),
            'year': [
                2010,
                2010,
                2010,
                2010,
                2010,
                2009,
                2009,
                2009,
                2009,
                2009,
            ],
        }),
        npartitions=1,
    )

    expected = [
        ('bigint_col', dt.int64),
        ('bool_col', dt.boolean),
        ('bool_obj_col', dt.boolean),
        ('date_string_col', dt.string),
        ('double_col', dt.double),
        ('float_col', dt.float),
        ('int_col', dt.int32),
        ('month', dt.int64),
        ('smallint_col', dt.int16),
        ('string_col', dt.string),
        ('timestamp_col', dt.timestamp),
        ('tinyint_col', dt.int8),
        ('year', dt.int64),
    ]

    assert sch.infer(df) == ibis.schema(expected)

Example #40

0

Show file

File: test_datatypes.py Project: wkusnierczyk/ibis

def test_dtype_int32():
    df = pd.DataFrame({'col': np.int32([-12, 3, 25000])})
    inferred = sch.infer(df)
    expected = ibis.schema([('col', 'int32')])
    assert inferred == expected