Beispiel #1
0
def test_compound_primary_key_with_single_reference():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    product_sku: string,
                                    name: ?string,
                                    price: ?float64
                                }
                            """, primary_key=['product_no', 'product_sku'])
        # TODO: should this fail everywhere? e.g., this fails in postgres, but
        # not in sqlite because postgres doesn't allow partial foreign keys
        # might be best to let the backend handle this
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          foreign_keys=dict(product_no=products.c.product_no),
                          primary_key=['order_id'])
        assert discover(orders) == dshape(
            """var * {
                order_id: int32,
                product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                quantity: ?int32
            }
            """
        )
Beispiel #2
0
def test_schema_of_complex_interaction():
    a = TableSymbol("a", "{x: int, y: int, z: int}")
    expr = (a["x"] + a["y"]) / a["z"]
    assert expr.schema == dshape("float64")

    expr = expr.label("foo")
    assert expr.schema == dshape("float64")
Beispiel #3
0
def test_dtype():
    accounts = TableSymbol('accounts',
                           '{name: string, balance: int32, id: int32}')

    assert accounts['name'].dtype == dshape('string')
    assert accounts['balance'].dtype == dshape('int32')
    assert (accounts['balance'] > accounts['id']).dtype == dshape('bool')
Beispiel #4
0
def test_compound_primary_key_with_fkey():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    product_sku: string,
                                    name: ?string,
                                    price: ?float64
                                }
                            """,
                            primary_key=['product_no', 'product_sku'])
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          product_sku: map[int32, U],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          primary_key=['order_id'],
                          foreign_keys={
                              'product_no': products.c.product_no,
                              'product_sku': products.c.product_sku
                          })
        assert discover(orders) == dshape(
            """var * {
                order_id: int32,
                product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                product_sku: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                quantity: ?int32
            }
            """
        )
Beispiel #5
0
 def test_jit_promotion(self):
     expr = make_expr(dshape('10, int32'), dshape('10, float32'))
     result = blaze.eval(expr, strategy='jit')
     expected = blaze.array([ 0,  2,  6, 12, 20, 30, 42, 56, 72, 90],
                            dshape=dshape('10, float64'))
     self.assertEqual(type(result), blaze.Array)
     self.assertTrue(np.all(result == expected))
Beispiel #6
0
    def __init__(self, data, name=None, columns=None, schema=None,
            iscolumn=False):
        if not schema:
            schema = discover(data).subshape[0]
            types = None
            if isinstance(schema[0], Tuple):
                columns = columns or list(range(len(schema[0].dshapes)))
                types = schema[0].dshapes
            if isinstance(schema[0], Record):
                columns = columns or schema[0].names
                types = schema[0].types
            if isinstance(schema[0], Fixed):
                types = (schema[1],) * int(schema[0])
            if not columns:
                raise TypeError("Could not infer column names from data. "
                                "Please specify column names with `column=` "
                                "keyword")
            if not types:
                raise TypeError("Could not infer data types from data. "
                                "Please specify schema with `schema=` keyword")

            schema = dshape(Record(list(zip(columns, types))))
        self.schema = dshape(schema)

        self.data = data
        self.name = name or next(names)
        self.iscolumn = iscolumn
Beispiel #7
0
    def __init__(self, engine, tablename, primary_key='', schema=None):
        if isinstance(engine, _strtypes):
            engine = sql.create_engine(engine)
        self.engine = engine
        self.tablename = tablename
        metadata = sql.MetaData()

        if engine.has_table(tablename):
            metadata.reflect(engine)
            table = metadata.tables[tablename]
            engine_schema = discover(table).subshape[0]
            if schema and dshape(schema) != engine_schema:
                raise ValueError("Mismatched schemas:\n"
                                 "\tIn database: %s\n"
                                 "\nGiven: %s" % (engine_schema, schema))
            schema = engine_schema
        elif isinstance(schema, (_strtypes, datashape.DataShape)):
            columns = dshape_to_alchemy(schema)
            for column in columns:
                if column.name == primary_key:
                    column.primary_key = True
            table = sql.Table(tablename, metadata, *columns)
        else:
            raise ValueError('Must provide schema or point to valid table. '
                             'Table %s does not exist' % tablename)

        self._schema = datashape.dshape(schema)
        self.table = table
        metadata.create_all(engine)
 def __init__(self, url, dshape=None):
     from ..io.client import requests
     self.url = url
     if dshape is None:
         self._dshape = datashape.dshape(requests.get_remote_datashape(url))
     else:
         self._dshape = datashape.dshape(dshape)
Beispiel #9
0
def test_keepdims():
    x = Symbol('x', '5 * 3 * float32')
    assert x.sum(axis=0, keepdims=True).dshape == dshape('1 * 3 * float32')
    assert x.sum(axis=1, keepdims=True).dshape == dshape('5 * 1 * float32')
    assert x.sum(axis=(0, 1), keepdims=True).dshape == dshape('1 * 1 * float32')

    assert x.std(axis=0, keepdims=True).shape == (1, 3)
Beispiel #10
0
def test_concat_arr():
    a = symbol('a', '3 * int32')
    b = symbol('b', '5 * int32')
    v = symbol('v', 'var * int32')

    assert concat(a, b).dshape == dshape('8 * int32')
    assert concat(a, v).dshape == dshape('var * int32')
Beispiel #11
0
def test_concat_table():
    a = symbol('a', '3 * {a: int32, b: int32}')
    b = symbol('a', '5 * {a: int32, b: int32}')
    v = symbol('v', 'var * {a: int32, b: int32}')

    assert concat(a, b).dshape == dshape('8 * {a: int32, b: int32}')
    assert concat(a, v).dshape == dshape('var * {a: int32, b: int32}')
Beispiel #12
0
 def test_init(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, schema=self.schema)
         self.assertEquals(tuple(dd), self.tuples)
         assert dd.dshape in set((
             datashape.dshape('var * {name: string, amount: int32}'),
             datashape.dshape('5 * {name: string, amount: int32}')))
Beispiel #13
0
def test_merge_options():
    s = symbol('s', 'var * {a: ?A, b: ?B}')

    merged = merge(a=s.a, b=s.b)
    assert_dshape_equal(merged.dshape, dshape('var * {a: ?A, b: ?B}'))
    assert_dshape_equal(merged.a.dshape, dshape('var * ?A'))
    assert_dshape_equal(merged.b.dshape, dshape('var * ?B'))
Beispiel #14
0
 def test_datetime(self):
     self.assertEqual(dshape('datetime')[0].tz, None)
     self.assertEqual(dshape('datetime[tz="UTC"]')[0].tz, 'UTC')
     self.assertEqual(dshape('datetime[tz="America/Vancouver"]')[0].tz,
                      'America/Vancouver')
     self.assertEqual(str(dshape('datetime[tz="UTC"]')),
                      "datetime[tz='UTC']")
Beispiel #15
0
def test_schema_of_complex_interaction():
    a = symbol('a', 'var * {x: int, y: int, z: int}')
    expr = (a['x'] + a['y']) / a['z']
    assert expr.schema == dshape('float64')

    expr = expr.label('foo')
    assert expr.schema == dshape('float64')
Beispiel #16
0
def test_schema_of_complex_interaction():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    expr = (a['x'] + a['y']) / a['z']
    assert expr.schema == dshape('real')

    expr = expr.label('foo')
    assert expr.schema == dshape('real')
 def test_best_match_ellipses(self):
     d1 = dshape('10, T1, int32')
     d2 = dshape('..., float32')
     match = best_match(g, [d1, d2])
     self.assertEqual(str(match.sig), 'X, Y, float32 -> ..., float32 -> X, int32')
     self.assertEqual(str(match.resolved_sig),
                      '10, T1, float32 -> ..., float32 -> 10, int32')
Beispiel #18
0
def discover_typeengine(typ):
    if isinstance(typ, sa.types.Interval):
        if typ.second_precision is None and typ.day_precision is None:
            return datashape.TimeDelta(unit='us')
        elif typ.second_precision == 0 and typ.day_precision == 0:
            return datashape.TimeDelta(unit='s')

        if typ.second_precision in units_of_power and not typ.day_precision:
            units = units_of_power[typ.second_precision]
        elif typ.day_precision > 0:
            units = 'D'
        else:
            raise ValueError('Cannot infer INTERVAL type with parameters'
                             'second_precision=%d, day_precision=%d' %
                             (typ.second_precision, typ.day_precision))
        return datashape.TimeDelta(unit=units)
    if typ in revtypes:
        return dshape(revtypes[typ])[0]
    if type(typ) in revtypes:
        return dshape(revtypes[type(typ)])[0]
    if isinstance(typ, (sa.String, sa.Unicode)):
        return datashape.String(typ.length, typ.collation)
    else:
        for k, v in revtypes.items():
            if isinstance(k, type) and (isinstance(typ, k) or
                                        hasattr(typ, 'impl') and
                                        isinstance(typ.impl, k)):
                return v
            if k == typ:
                return v
    raise NotImplementedError("No SQL-datashape match for type %s" % typ)
Beispiel #19
0
def test_integrative():
    data = [{'name': 'Alice', 'amount': '100'},
            {'name': 'Bob', 'amount': '200'},
            {'name': 'Charlie', 'amount': '300'}]

    assert (dshape(discover(data)) ==
            dshape('3 * {amount: int64, name: string}'))
Beispiel #20
0
    def __init__(self, path, mode='rt', schema=None, dshape=None, open=open,
                 nrows_discovery=50):
        self.path = path
        self.mode = mode
        self.open = open
        if dshape:
            dshape = datashape.dshape(dshape)
        if schema:
            schema = datashape.dshape(schema)
        if dshape and not schema and isdimension(dshape[0]):
            schema = dshape.subshape[0]
        if schema and not dshape:
            dshape = var * schema

        if not schema and not dshape:
            try:
                f = open(self.path, 'r')
            except:
                raise ValueError('No schema detected')
            data = list(map(json.loads,
                            islice(f, 1, nrows_discovery)))
            f.close()
            dshape = discover(data)
            schema = dshape.subshape[0]
        # Initially the array is not loaded (is this necessary?)
        self._cache_arr = None

        self._schema = schema
        self._dshape = dshape
 def test_from_numpy_fields(self):
     import numpy as np
     dt = np.dtype('i4,i8,f8')
     ds = datashape.from_numpy((), dt)
     self.assertEqual(ds.names, ['f0', 'f1', 'f2'])
     self.assertEqual(ds.types,
                      [dshape('int32'), dshape('int64'), dshape('float64')])
Beispiel #22
0
    def __init__(self, data, name=None, columns=None, schema=None,
            iscolumn=False):
        if not schema:
            schema = discover(data).subshape[0]
            types = None
            if isinstance(schema[0], Tuple):
                columns = columns or list(range(len(schema[0].dshapes)))
                types = schema[0].dshapes
            if isinstance(schema[0], Record):
                columns = columns or schema[0].names
                types = schema[0].types
            if isinstance(schema[0], Fixed):
                types = (schema[1],) * int(schema[0])
            if not columns:
                raise TypeError("Could not infer column names from data. "
                                "Please specify column names with `column=` "
                                "keyword")
            if not types:
                raise TypeError("Could not infer data types from data. "
                                "Please specify schema with `schema=` keyword")

            schema = dshape(Record(list(zip(columns, types))))
        self.schema = dshape(schema)

        self.data = data

        if (hasattr(data, 'schema')
             and isinstance(data.schema, (DataShape, str))
             and self.schema != data.schema):
            raise TypeError('%s schema %s does not match %s schema %s' %
                            (type(data).__name__, data.schema,
                             type(self).__name__, self.schema))

        self._name = name or next(names)
        self.iscolumn = iscolumn
 def test_match_equation_dtype(self):
     # A simple coercion
     eqns = _match_equation(dshape('int32'), dshape('int64'))
     self.assertEqual(eqns, [(T.int32, T.int64)])
     # Matching a data type variable
     eqns = _match_equation(dshape('int32'), dshape('D'))
     self.assertEqual(eqns, [(T.int32, T.TypeVar('D'))])
    def test_mixed_quotes_01(self):
        quotes_dshape = """{ 'field \" with \\' quotes' : string, 'doublequote \" field \\'' : int64 }"""

        ds1 = dshape(quotes_dshape)
        ds2 = dshape(str(ds1))

        assert str(ds1) == str(ds2)
Beispiel #25
0
 def test_cat_dshapes(self):
     # concatenating 1 dshape is a no-op
     dslist = [dshape("3 * 10 * int32")]
     self.assertEqual(datashape.cat_dshapes(dslist), dslist[0])
     # two dshapes
     dslist = [dshape("3 * 10 * int32"), dshape("7 * 10 * int32")]
     self.assertEqual(datashape.cat_dshapes(dslist), dshape("10 * 10 * int32"))
    def test_spaces_02(self):
        big_space_dshape = """{ 'Unique Key' : ?int64, 'Created Date' : string,
'Closed Date' : string, Agency : string, 'Agency Name' : string,
'Complaint Type' : string, Descriptor : string, 'Location Type' : string,
'Incident Zip' : ?int64, 'Incident Address' : ?string, 'Street Name' : ?string,
'Cross Street 1' : ?string, 'Cross Street 2' : ?string,
'Intersection Street 1' : ?string, 'Intersection Street 2' : ?string,
'Address Type' : string, City : string, Landmark : string,
'Facility Type' : string, Status : string, 'Due Date' : string,
'Resolution Action Updated Date' : string, 'Community Board' : string,
Borough : string, 'X Coordinate (State Plane)' : ?int64,
'Y Coordinate (State Plane)' : ?int64, 'Park Facility Name' : string,
'Park Borough' : string, 'School Name' : string, 'School Number' : string,
'School Region' : string, 'School Code' : string,
'School Phone Number' : string, 'School Address' : string,
'School City' : string, 'School State' : string, 'School Zip' : string,
'School Not Found' : string, 'School or Citywide Complaint' : string,
'Vehicle Type' : string, 'Taxi Company Borough' : string,
'Taxi Pick Up Location' : string, 'Bridge Highway Name' : string,
'Bridge Highway Direction' : string, 'Road Ramp' : string,
'Bridge Highway Segment' : string, 'Garage Lot Name' : string,
'Ferry Direction' : string, 'Ferry Terminal Name' : string,
Latitude : ?float64, Longitude : ?float64, Location : string }"""

        ds1 = dshape(big_space_dshape)
        ds2 = dshape(str(ds1))

        assert str(ds1) == str(ds2)
Beispiel #27
0
    def __init__(self, path, mode='rt', schema=None, dshape=None, open=open,
            **kwargs):
        self.path = path
        self._abspath = os.path.abspath(path)
        self.mode = mode
        self.open = open
        if dshape:
            dshape = datashape.dshape(dshape)
        if schema:
            schema = datashape.dshape(schema)
        if dshape and not schema and isdimension(dshape[0]):
            schema = dshape.subarray(1)

        if not schema and not dshape:
            try:
                f = open(self.path, 'r')
            except:
                raise ValueError('No schema detected')
            dshape = discover(json.load(f))
            f.close()
        # Initially the array is not loaded (is this necessary?)
        self._cache_arr = None

        self._schema = schema
        self._dshape = dshape
Beispiel #28
0
def test_foreign_keys_auto_construct():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    name: ?string,
                                    price: ?float64
                                }
                            """,
                            primary_key=['product_no'])
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          foreign_keys=dict(product_no=products.c.product_no),
                          primary_key=['order_id'])
        assert discover(orders) == dshape("""
            var * {
                order_id: int32,
                product_no: map[int32, {
                                    product_no: int32,
                                    name: ?string,
                                    price: ?float64
                                }],
                quantity: ?int32
            }
        """)
Beispiel #29
0
def Data(data, dshape=None, name=None, fields=None, columns=None, schema=None,
         **kwargs):
    sub_uri = ''
    if isinstance(data, _strtypes):
        if '::' in data:
            data, sub_uri = data.split('::')
        data = resource(data, schema=schema, dshape=dshape, columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator) and
            not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if columns:
        warnings.warn("columns kwarg deprecated.  Use fields instead",
                      DeprecationWarning)
    if columns and not fields:
        fields = columns
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure,) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema,)))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError('data column names %s\n'
                                 '\tnot equal to fields parameter %s,\n'
                                 '\tuse Data(data).relabel(%s) to rename fields'
                                 % (names,
                                    fields,
                                    ', '.join('%s=%r' % (k, v)
                                              for k, v in zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))

    ds = datashape.dshape(dshape)
    result = InteractiveSymbol(data, ds, name)

    if sub_uri:
        for field in sub_uri.split('/'):
            if field:
                result = result[field]

    return result
def test_discover_mixed():
    i = discover(1)
    f = discover(1.0)
    exp = 10 * Tuple([i, i, f, f])
    assert dshape(discover([[1, 2, 1.0, 2.0]] * 10)) == exp

    exp = 10 * (4 * f)
    assert dshape(discover([[1, 2, 1.0, 2.0], [1.0, 2.0, 1, 2]] * 5)) == exp
Beispiel #31
0
def test_create_with_raw_data():
    t = Data(data, fields=['name', 'amount'])
    assert t.schema == dshape('{name: string, amount: int64}')
    assert t.name
    assert t.data == data
Beispiel #32
0
 def out_dshape(self, in_dshape):
     return dshape(
         Record([(k, v.out_dshape(in_dshape))
                 for (k, v) in zip(self.keys, self.values)]))
Beispiel #33
0
 def _schema(self):
     return dshape(self._dtype)
Beispiel #34
0
def test_discover_on_data():
    assert discover(t) == dshape("2 * {name: string, amount: int64}")
Beispiel #35
0
from __future__ import absolute_import, division, print_function

import os
from itertools import product
import pytest
pytest.importorskip('sqlalchemy')

from datashape import dshape, discover
from odo import resource, odo
from odo.utils import tmpfile, filetext

ds = dshape('var *  {a: int32, b: int32}')
data = [(1, 2), (10, 20), (100, 200)]


@pytest.yield_fixture
def csv():
    with tmpfile('csv') as filename:
        csv = odo(data, filename, dshape=ds, has_header=False)
        yield csv


def test_simple_into(csv):
    tbl = 'testtable'
    with tmpfile('db') as filename:
        engine = resource('sqlite:///' + filename)
        t = resource('sqlite:///' + filename + '::' + tbl, dshape=ds)

        odo(csv, t, dshape=ds)
        conn = engine.raw_connection()
        cursor = conn.cursor()
Beispiel #36
0
def create_from_datashape(o, ds, **kwargs):
    return create_from_datashape(o, dshape(ds), **kwargs)
Beispiel #37
0
def test_Field():
    e = symbol('e', '3 * 5 * {name: string, amount: int}')
    assert 'name' in dir(e)
    assert e.name.dshape == dshape('3 * 5 * string')
    assert e.name.schema == dshape('string')
    assert e.amount._name == 'amount'
Beispiel #38
0
def test_point_validate():
    p = Point('x', 'y')
    p.validate(dshape("{x: int32, y: float32}"))
    with pytest.raises(ValueError):
        p.validate(dshape("{x: string, y: float32}"))
Beispiel #39
0
 def dshape(self):
     return datashape.dshape(self._dshape or datashape.Var() * self.schema)
Beispiel #40
0
def test_table_raises_on_inconsistent_inputs():
    with pytest.raises(ValueError):
        t = Data(data,
                 schema='{name: string, amount: float32}',
                 dshape=dshape("{name: string, amount: float32}"))
Beispiel #41
0
def test_scalar_sql_compute():
    t = into('sqlite:///:memory:::t',
             data,
             dshape=dshape('var * {name: string, amount: int}'))
    d = Data(t)
    assert repr(d.amount.sum()) == '300'
Beispiel #42
0
def test_create_with_schema():
    t = Data(data, schema='{name: string, amount: float32}')
    assert t.schema == dshape('{name: string, amount: float32}')
Beispiel #43
0
 def out_dshape(self, input_dshape):
     cats = input_dshape.measure[self.column].categories
     return dshape(Record([(c, ct.int32) for c in cats]))
Beispiel #44
0
    def _schema(self):
        """

        Examples
        --------
        >>> from blaze import symbol
        >>> t = symbol('t', 'var * {name: string, amount: int}')
        >>> s = symbol('t', 'var * {name: string, id: int}')

        >>> join(t, s).schema
        dshape("{name: string, amount: int32, id: int32}")

        >>> join(t, s, how='left').schema
        dshape("{name: string, amount: int32, id: ?int32}")

        Overlapping but non-joined fields append _left, _right

        >>> a = symbol('a', 'var * {x: int, y: int}')
        >>> b = symbol('b', 'var * {x: int, y: int}')
        >>> join(a, b, 'x').fields
        ['x', 'y_left', 'y_right']
        """

        option = lambda dt: dt if isinstance(dt, Option) else Option(dt)

        on_left = self.on_left
        if not isinstance(on_left, list):
            on_left = on_left,

        on_right = self.on_right
        if not isinstance(on_right, list):
            on_right = on_right,

        right_types = keymap(
            dict(zip(on_right, on_left)).get,
            self.rhs.dshape.measure.dict,
        )
        joined = ((name, promote(dt, right_types[name], promote_option=False))
                  for n, (name, dt) in enumerate(
                      filter(
                          compose(op.contains(on_left), first),
                          self.lhs.dshape.measure.fields,
                      )))

        left = [(name, dt) for name, dt in zip(
            self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs))
                if name not in on_left]

        right = [(name, dt) for name, dt in zip(
            self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs))
                 if name not in on_right]

        # Handle overlapping but non-joined case, e.g.
        left_other = set(name for name, dt in left if name not in on_left)
        right_other = set(name for name, dt in right if name not in on_right)
        overlap = left_other & right_other

        left_suffix, right_suffix = self.suffixes
        left = ((name + left_suffix if name in overlap else name, dt)
                for name, dt in left)
        right = ((name + right_suffix if name in overlap else name, dt)
                 for name, dt in right)

        if self.how in ('right', 'outer'):
            left = ((name, option(dt)) for name, dt in left)
        if self.how in ('left', 'outer'):
            right = ((name, option(dt)) for name, dt in right)

        return dshape(Record(chain(joined, left, right)))
Beispiel #45
0
def test_discover_dataframe():
    df = pd.DataFrame([('Alice', 100), ('Bob', 200)],
                      columns=['name', 'amount'])

    assert discover(df) == dshape('2 * {name: ?string, amount: int64}')
Beispiel #46
0
def test_reduction_dshape():
    x = symbol('x', '5 * 3 * float32')
    assert x.sum().dshape == dshape('float64')
    assert x.sum(axis=0).dshape == dshape('3 * float64')
    assert x.sum(axis=1).dshape == dshape('5 * float64')
    assert x.sum(axis=(0, 1)).dshape == dshape('float64')
Beispiel #47
0
def test_Symbol():
    e = symbol('e', '3 * 5 * {name: string, amount: int}')
    assert e.dshape == dshape('3 * 5 * {name: string, amount: int}')
    assert e.shape == (3, 5)
    assert str(e) == 'e'
Beispiel #48
0
def test_summary_keepdims():
    x = symbol('x', '5 * 3 * float32')
    assert summary(a=x.min(), b=x.max()).dshape == \
        dshape('{a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), keepdims=True).dshape == \
        dshape('1 * 1 * {a: float32, b: float32}')
Beispiel #49
0
def test_csv_infer_header():
    with tmpfile('db') as dbfilename:
        with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename:
            t = odo(csvfilename, 'sqlite:///%s::mytable' % dbfilename)
            assert discover(t) == dshape('var * {a: int64, b: int64}')
            assert odo(t, set) == set([(1, 2), (3, 4)])
Beispiel #50
0
def _make_sig(tplist):
    """Converts a type tuples into datashape function signatures"""
    dslist = [datashape.dshape("A..., " + str(x)) for x in tplist]
    return datashape.Function(*(dslist[1:] + [dslist[0]]))
Beispiel #51
0
def test_table_raises_on_inconsistent_inputs():
    with pytest.raises(ValueError) as excinfo:
        data(tdata, schema='{name: string, amount: float32}',
             dshape=dshape("{name: string, amount: float32}"))
    assert "specify one of schema= or dshape= keyword" in str(excinfo.value)
Beispiel #52
0
def test_apply():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')
    s = t['amount'].apply(sum, dshape='real')
    r = t['amount'].apply(sum, dshape='3 * real')
    assert s.dshape == dshape('real')
    assert r.schema == dshape('real')
Beispiel #53
0
    expr = tdata.name[tdata.balance > a]
    assert expr_repr(expr) == 'data[data.balance > a].name'


def test_isidentical_regr():
    # regression test for #1387
    tdata = np.array([(np.nan,), (np.nan,)], dtype=[('a', 'float64')])
    ds = data(tdata)
    assert ds.a.isidentical(ds.a)


@pytest.mark.parametrize('data,dshape,exp_type',
                         [(1, symbol('x', 'int').dshape, int),
                          # test 1-d to series
                          (into(da.core.Array, [1, 2], chunks=(10,)),
                           dshape('2 * int'),
                           pd.Series),
                          # test 2-d tabular to dataframe
                          (into(da.core.Array,
                                [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
                                chunks=(10, 10)),
                           dshape('2 * {a: int, b: int}'),
                           pd.DataFrame),
                          # test 2-d non tabular to ndarray
                          (into(da.core.Array,
                                [[1, 2], [3, 4]],
                                chunks=(10, 10)),
                           dshape('2 *  2 * int'),
                           np.ndarray)])
def test_coerce_core(data, dshape, exp_type):
    assert isinstance(coerce_core(data, dshape), exp_type)
Beispiel #54
0
def test_joined_column_first_in_schema():
    t = TableSymbol('t', '{x: int, y: int, z: int}')
    s = TableSymbol('s', '{w: int, y: int}')

    assert join(t, s).schema == dshape('{y: int, x: int, z: int, w: int}')
Beispiel #55
0
def test_dshape():
    t = TableSymbol('t', '{name: string, amount: int}')
    assert t.dshape == dshape('var * {name: string, amount: int}')
Beispiel #56
0
def test_Projection_retains_shape():
    t = TableSymbol('t', '5 * {name: string, amount: int, id: int32}')

    assert t[['name', 'amount']].dshape == \
            dshape('5 * {name: string, amount: int}')
Beispiel #57
0
def test_resource_with_variable_length():
    with tmpfile('.hdf5') as fn:
        ds = datashape.dshape('var * 4 * int32')
        r = resource(fn + '::/data', dshape=ds)

        assert r.shape == (0, 4)
Beispiel #58
0
    h = hash(e)
    assert isinstance(h, int)
    assert h == hash(e)

    assert hash(symbol('e', 'int')) == hash(symbol('e', 'int'))

    f = symbol('f', 'int')
    assert hash(e) != hash(f)

    assert hash(e._subs({'e': 'f'})) != hash(e)
    assert hash(e._subs({'e': 'f'})) == hash(f)


@pytest.mark.parametrize(
    'dshape',
    [var * float32, dshape('var * float32'), 'var * float32'])
def test_coerce(dshape):
    s = symbol('s', dshape)
    expr = s.coerce('int64')
    assert str(expr) == "s.coerce(to='int64')"
    assert expr.dshape == var * int64
    assert expr.schema == datashape.dshape('int64')
    assert expr.schema == expr.to


@pytest.mark.xfail(raises=AttributeError, reason='Should this be valid?')
def test_coerce_record():
    s = symbol('s', 'var * {a: int64, b: float64}')
    expr = s.coerce('{a: float64, b: float32}')
    assert str(expr) == "s.coerce(to='{a: float64, b: float32}')"
Beispiel #59
0
def test_coalesce():
    # check case where lhs is not optional
    s = symbol('s', 'int32')
    t = symbol('t', 'int32')
    expr = coalesce(s, t)
    assert expr.isidentical(s)

    s_expr = s + s
    t_expr = t * 3
    expr = coalesce(s_expr, t_expr)
    assert expr.isidentical(s_expr)

    a = symbol('a', 'string')
    b = symbol('b', 'string')
    expr = coalesce(a, b)
    assert expr.isidentical(a)

    a_expr = a + a
    b_expr = b * 3
    expr = coalesce(a_expr, b_expr)
    assert expr.isidentical(a_expr)

    c = symbol('c', '{a: int32, b: int32}')
    d = symbol('d', '{a: int32, b: int32}')
    expr = coalesce(c, d)
    assert expr.isidentical(c)

    c_expr = transform(c, a=c.a + 1)
    d_expr = transform(d, a=d.a * 3)
    expr = coalesce(c_expr, d_expr)
    assert expr.isidentical(c_expr)

    # check case where lhs is null dshape
    u = symbol('u', 'null')
    expr = coalesce(u, s)
    assert expr.isidentical(s)

    expr = coalesce(u, a)
    assert expr.isidentical(a)

    expr = coalesce(u, c)
    assert expr.isidentical(c)

    # check optional lhs non-optional rhs
    v = symbol('v', '?int32')
    expr = coalesce(v, s)
    # rhs is not optional so the expression cannot be null
    assert_dshape_equal(expr.dshape, dshape('int32'))
    assert expr.lhs.isidentical(v)
    assert expr.rhs.isidentical(s)

    e = symbol('e', '?string')
    expr = coalesce(e, a)
    assert_dshape_equal(expr.dshape, dshape('string'))
    assert expr.lhs.isidentical(e)
    assert expr.rhs.isidentical(a)

    f = symbol('f', '?{a: int32, b: int32}')
    expr = coalesce(f, c)
    assert_dshape_equal(expr.dshape, dshape('{a: int32, b: int32}'))
    assert expr.lhs.isidentical(f)
    assert expr.rhs.isidentical(c)

    # check optional lhs non-optional rhs with promotion
    w = symbol('w', 'int64')
    expr = coalesce(v, w)
    # rhs is not optional so the expression cannot be null
    # there are no either types in datashape so we are a type large enough
    # to hold either result
    assert_dshape_equal(expr.dshape, dshape('int64'))
    assert expr.lhs.isidentical(v)
    assert expr.rhs.isidentical(w)

    # check optional lhs and rhs
    x = symbol('x', '?int32')
    expr = coalesce(v, x)
    # rhs and lhs are optional so this might be null
    assert_dshape_equal(expr.dshape, dshape('?int32'))
    assert expr.lhs.isidentical(v)
    assert expr.rhs.isidentical(x)

    # check optional lhs and rhs with promotion
    y = symbol('y', '?int64')
    expr = coalesce(v, y)
    # rhs and lhs are optional so this might be null
    # there are no either types in datashape so we are a type large enough
    # to hold either result
    assert_dshape_equal(expr.dshape, dshape('?int64'))
    assert expr.lhs.isidentical(v)
    assert expr.rhs.isidentical(y)
Beispiel #60
0
def test_discover():
    assert discover(b) == dshape('3 * {a: int64, b: float64, date: date}')
    assert discover(b['a']) == dshape('3 * int64')