def test_compound_primary_key_with_single_reference(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, product_sku: string, name: ?string, price: ?float64 } """, primary_key=['product_no', 'product_sku']) # TODO: should this fail everywhere? e.g., this fails in postgres, but # not in sqlite because postgres doesn't allow partial foreign keys # might be best to let the backend handle this ds = dshape("""var * { order_id: int32, product_no: map[int32, T], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, foreign_keys=dict(product_no=products.c.product_no), primary_key=['order_id']) assert discover(orders) == dshape( """var * { order_id: int32, product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], quantity: ?int32 } """ )
def test_schema_of_complex_interaction(): a = TableSymbol("a", "{x: int, y: int, z: int}") expr = (a["x"] + a["y"]) / a["z"] assert expr.schema == dshape("float64") expr = expr.label("foo") assert expr.schema == dshape("float64")
def test_dtype(): accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') assert accounts['name'].dtype == dshape('string') assert accounts['balance'].dtype == dshape('int32') assert (accounts['balance'] > accounts['id']).dtype == dshape('bool')
def test_compound_primary_key_with_fkey(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, product_sku: string, name: ?string, price: ?float64 } """, primary_key=['product_no', 'product_sku']) ds = dshape("""var * { order_id: int32, product_no: map[int32, T], product_sku: map[int32, U], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, primary_key=['order_id'], foreign_keys={ 'product_no': products.c.product_no, 'product_sku': products.c.product_sku }) assert discover(orders) == dshape( """var * { order_id: int32, product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], product_sku: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], quantity: ?int32 } """ )
def test_jit_promotion(self): expr = make_expr(dshape('10, int32'), dshape('10, float32')) result = blaze.eval(expr, strategy='jit') expected = blaze.array([ 0, 2, 6, 12, 20, 30, 42, 56, 72, 90], dshape=dshape('10, float64')) self.assertEqual(type(result), blaze.Array) self.assertTrue(np.all(result == expected))
def __init__(self, data, name=None, columns=None, schema=None, iscolumn=False): if not schema: schema = discover(data).subshape[0] types = None if isinstance(schema[0], Tuple): columns = columns or list(range(len(schema[0].dshapes))) types = schema[0].dshapes if isinstance(schema[0], Record): columns = columns or schema[0].names types = schema[0].types if isinstance(schema[0], Fixed): types = (schema[1],) * int(schema[0]) if not columns: raise TypeError("Could not infer column names from data. " "Please specify column names with `column=` " "keyword") if not types: raise TypeError("Could not infer data types from data. " "Please specify schema with `schema=` keyword") schema = dshape(Record(list(zip(columns, types)))) self.schema = dshape(schema) self.data = data self.name = name or next(names) self.iscolumn = iscolumn
def __init__(self, engine, tablename, primary_key='', schema=None): if isinstance(engine, _strtypes): engine = sql.create_engine(engine) self.engine = engine self.tablename = tablename metadata = sql.MetaData() if engine.has_table(tablename): metadata.reflect(engine) table = metadata.tables[tablename] engine_schema = discover(table).subshape[0] if schema and dshape(schema) != engine_schema: raise ValueError("Mismatched schemas:\n" "\tIn database: %s\n" "\nGiven: %s" % (engine_schema, schema)) schema = engine_schema elif isinstance(schema, (_strtypes, datashape.DataShape)): columns = dshape_to_alchemy(schema) for column in columns: if column.name == primary_key: column.primary_key = True table = sql.Table(tablename, metadata, *columns) else: raise ValueError('Must provide schema or point to valid table. ' 'Table %s does not exist' % tablename) self._schema = datashape.dshape(schema) self.table = table metadata.create_all(engine)
def __init__(self, url, dshape=None): from ..io.client import requests self.url = url if dshape is None: self._dshape = datashape.dshape(requests.get_remote_datashape(url)) else: self._dshape = datashape.dshape(dshape)
def test_keepdims(): x = Symbol('x', '5 * 3 * float32') assert x.sum(axis=0, keepdims=True).dshape == dshape('1 * 3 * float32') assert x.sum(axis=1, keepdims=True).dshape == dshape('5 * 1 * float32') assert x.sum(axis=(0, 1), keepdims=True).dshape == dshape('1 * 1 * float32') assert x.std(axis=0, keepdims=True).shape == (1, 3)
def test_concat_arr(): a = symbol('a', '3 * int32') b = symbol('b', '5 * int32') v = symbol('v', 'var * int32') assert concat(a, b).dshape == dshape('8 * int32') assert concat(a, v).dshape == dshape('var * int32')
def test_concat_table(): a = symbol('a', '3 * {a: int32, b: int32}') b = symbol('a', '5 * {a: int32, b: int32}') v = symbol('v', 'var * {a: int32, b: int32}') assert concat(a, b).dshape == dshape('8 * {a: int32, b: int32}') assert concat(a, v).dshape == dshape('var * {a: int32, b: int32}')
def test_init(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, schema=self.schema) self.assertEquals(tuple(dd), self.tuples) assert dd.dshape in set(( datashape.dshape('var * {name: string, amount: int32}'), datashape.dshape('5 * {name: string, amount: int32}')))
def test_merge_options(): s = symbol('s', 'var * {a: ?A, b: ?B}') merged = merge(a=s.a, b=s.b) assert_dshape_equal(merged.dshape, dshape('var * {a: ?A, b: ?B}')) assert_dshape_equal(merged.a.dshape, dshape('var * ?A')) assert_dshape_equal(merged.b.dshape, dshape('var * ?B'))
def test_datetime(self): self.assertEqual(dshape('datetime')[0].tz, None) self.assertEqual(dshape('datetime[tz="UTC"]')[0].tz, 'UTC') self.assertEqual(dshape('datetime[tz="America/Vancouver"]')[0].tz, 'America/Vancouver') self.assertEqual(str(dshape('datetime[tz="UTC"]')), "datetime[tz='UTC']")
def test_schema_of_complex_interaction(): a = symbol('a', 'var * {x: int, y: int, z: int}') expr = (a['x'] + a['y']) / a['z'] assert expr.schema == dshape('float64') expr = expr.label('foo') assert expr.schema == dshape('float64')
def test_schema_of_complex_interaction(): a = TableSymbol('a', '{x: int, y: int, z: int}') expr = (a['x'] + a['y']) / a['z'] assert expr.schema == dshape('real') expr = expr.label('foo') assert expr.schema == dshape('real')
def test_best_match_ellipses(self): d1 = dshape('10, T1, int32') d2 = dshape('..., float32') match = best_match(g, [d1, d2]) self.assertEqual(str(match.sig), 'X, Y, float32 -> ..., float32 -> X, int32') self.assertEqual(str(match.resolved_sig), '10, T1, float32 -> ..., float32 -> 10, int32')
def discover_typeengine(typ): if isinstance(typ, sa.types.Interval): if typ.second_precision is None and typ.day_precision is None: return datashape.TimeDelta(unit='us') elif typ.second_precision == 0 and typ.day_precision == 0: return datashape.TimeDelta(unit='s') if typ.second_precision in units_of_power and not typ.day_precision: units = units_of_power[typ.second_precision] elif typ.day_precision > 0: units = 'D' else: raise ValueError('Cannot infer INTERVAL type with parameters' 'second_precision=%d, day_precision=%d' % (typ.second_precision, typ.day_precision)) return datashape.TimeDelta(unit=units) if typ in revtypes: return dshape(revtypes[typ])[0] if type(typ) in revtypes: return dshape(revtypes[type(typ)])[0] if isinstance(typ, (sa.String, sa.Unicode)): return datashape.String(typ.length, typ.collation) else: for k, v in revtypes.items(): if isinstance(k, type) and (isinstance(typ, k) or hasattr(typ, 'impl') and isinstance(typ.impl, k)): return v if k == typ: return v raise NotImplementedError("No SQL-datashape match for type %s" % typ)
def test_integrative(): data = [{'name': 'Alice', 'amount': '100'}, {'name': 'Bob', 'amount': '200'}, {'name': 'Charlie', 'amount': '300'}] assert (dshape(discover(data)) == dshape('3 * {amount: int64, name: string}'))
def __init__(self, path, mode='rt', schema=None, dshape=None, open=open, nrows_discovery=50): self.path = path self.mode = mode self.open = open if dshape: dshape = datashape.dshape(dshape) if schema: schema = datashape.dshape(schema) if dshape and not schema and isdimension(dshape[0]): schema = dshape.subshape[0] if schema and not dshape: dshape = var * schema if not schema and not dshape: try: f = open(self.path, 'r') except: raise ValueError('No schema detected') data = list(map(json.loads, islice(f, 1, nrows_discovery))) f.close() dshape = discover(data) schema = dshape.subshape[0] # Initially the array is not loaded (is this necessary?) self._cache_arr = None self._schema = schema self._dshape = dshape
def test_from_numpy_fields(self): import numpy as np dt = np.dtype('i4,i8,f8') ds = datashape.from_numpy((), dt) self.assertEqual(ds.names, ['f0', 'f1', 'f2']) self.assertEqual(ds.types, [dshape('int32'), dshape('int64'), dshape('float64')])
def __init__(self, data, name=None, columns=None, schema=None, iscolumn=False): if not schema: schema = discover(data).subshape[0] types = None if isinstance(schema[0], Tuple): columns = columns or list(range(len(schema[0].dshapes))) types = schema[0].dshapes if isinstance(schema[0], Record): columns = columns or schema[0].names types = schema[0].types if isinstance(schema[0], Fixed): types = (schema[1],) * int(schema[0]) if not columns: raise TypeError("Could not infer column names from data. " "Please specify column names with `column=` " "keyword") if not types: raise TypeError("Could not infer data types from data. " "Please specify schema with `schema=` keyword") schema = dshape(Record(list(zip(columns, types)))) self.schema = dshape(schema) self.data = data if (hasattr(data, 'schema') and isinstance(data.schema, (DataShape, str)) and self.schema != data.schema): raise TypeError('%s schema %s does not match %s schema %s' % (type(data).__name__, data.schema, type(self).__name__, self.schema)) self._name = name or next(names) self.iscolumn = iscolumn
def test_match_equation_dtype(self): # A simple coercion eqns = _match_equation(dshape('int32'), dshape('int64')) self.assertEqual(eqns, [(T.int32, T.int64)]) # Matching a data type variable eqns = _match_equation(dshape('int32'), dshape('D')) self.assertEqual(eqns, [(T.int32, T.TypeVar('D'))])
def test_mixed_quotes_01(self): quotes_dshape = """{ 'field \" with \\' quotes' : string, 'doublequote \" field \\'' : int64 }""" ds1 = dshape(quotes_dshape) ds2 = dshape(str(ds1)) assert str(ds1) == str(ds2)
def test_cat_dshapes(self): # concatenating 1 dshape is a no-op dslist = [dshape("3 * 10 * int32")] self.assertEqual(datashape.cat_dshapes(dslist), dslist[0]) # two dshapes dslist = [dshape("3 * 10 * int32"), dshape("7 * 10 * int32")] self.assertEqual(datashape.cat_dshapes(dslist), dshape("10 * 10 * int32"))
def test_spaces_02(self): big_space_dshape = """{ 'Unique Key' : ?int64, 'Created Date' : string, 'Closed Date' : string, Agency : string, 'Agency Name' : string, 'Complaint Type' : string, Descriptor : string, 'Location Type' : string, 'Incident Zip' : ?int64, 'Incident Address' : ?string, 'Street Name' : ?string, 'Cross Street 1' : ?string, 'Cross Street 2' : ?string, 'Intersection Street 1' : ?string, 'Intersection Street 2' : ?string, 'Address Type' : string, City : string, Landmark : string, 'Facility Type' : string, Status : string, 'Due Date' : string, 'Resolution Action Updated Date' : string, 'Community Board' : string, Borough : string, 'X Coordinate (State Plane)' : ?int64, 'Y Coordinate (State Plane)' : ?int64, 'Park Facility Name' : string, 'Park Borough' : string, 'School Name' : string, 'School Number' : string, 'School Region' : string, 'School Code' : string, 'School Phone Number' : string, 'School Address' : string, 'School City' : string, 'School State' : string, 'School Zip' : string, 'School Not Found' : string, 'School or Citywide Complaint' : string, 'Vehicle Type' : string, 'Taxi Company Borough' : string, 'Taxi Pick Up Location' : string, 'Bridge Highway Name' : string, 'Bridge Highway Direction' : string, 'Road Ramp' : string, 'Bridge Highway Segment' : string, 'Garage Lot Name' : string, 'Ferry Direction' : string, 'Ferry Terminal Name' : string, Latitude : ?float64, Longitude : ?float64, Location : string }""" ds1 = dshape(big_space_dshape) ds2 = dshape(str(ds1)) assert str(ds1) == str(ds2)
def __init__(self, path, mode='rt', schema=None, dshape=None, open=open, **kwargs): self.path = path self._abspath = os.path.abspath(path) self.mode = mode self.open = open if dshape: dshape = datashape.dshape(dshape) if schema: schema = datashape.dshape(schema) if dshape and not schema and isdimension(dshape[0]): schema = dshape.subarray(1) if not schema and not dshape: try: f = open(self.path, 'r') except: raise ValueError('No schema detected') dshape = discover(json.load(f)) f.close() # Initially the array is not loaded (is this necessary?) self._cache_arr = None self._schema = schema self._dshape = dshape
def test_foreign_keys_auto_construct(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, name: ?string, price: ?float64 } """, primary_key=['product_no']) ds = dshape("""var * { order_id: int32, product_no: map[int32, T], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, foreign_keys=dict(product_no=products.c.product_no), primary_key=['order_id']) assert discover(orders) == dshape(""" var * { order_id: int32, product_no: map[int32, { product_no: int32, name: ?string, price: ?float64 }], quantity: ?int32 } """)
def Data(data, dshape=None, name=None, fields=None, columns=None, schema=None, **kwargs): sub_uri = '' if isinstance(data, _strtypes): if '::' in data: data, sub_uri = data.split('::') data = resource(data, schema=schema, dshape=dshape, columns=columns, **kwargs) if (isinstance(data, Iterator) and not isinstance(data, tuple(not_an_iterator))): data = tuple(data) if columns: warnings.warn("columns kwarg deprecated. Use fields instead", DeprecationWarning) if columns and not fields: fields = columns if schema and dshape: raise ValueError("Please specify one of schema= or dshape= keyword" " arguments") if schema and not dshape: dshape = var * schema if dshape and isinstance(dshape, _strtypes): dshape = datashape.dshape(dshape) if not dshape: dshape = discover(data) types = None if isinstance(dshape.measure, Tuple) and fields: types = dshape[1].dshapes schema = Record(list(zip(fields, types))) dshape = DataShape(*(dshape.shape + (schema,))) elif isscalar(dshape.measure) and fields: types = (dshape.measure,) * int(dshape[-2]) schema = Record(list(zip(fields, types))) dshape = DataShape(*(dshape.shape[:-1] + (schema,))) elif isrecord(dshape.measure) and fields: ds = discover(data) assert isrecord(ds.measure) names = ds.measure.names if names != fields: raise ValueError('data column names %s\n' '\tnot equal to fields parameter %s,\n' '\tuse Data(data).relabel(%s) to rename fields' % (names, fields, ', '.join('%s=%r' % (k, v) for k, v in zip(names, fields)))) types = dshape.measure.types schema = Record(list(zip(fields, types))) dshape = DataShape(*(dshape.shape + (schema,))) ds = datashape.dshape(dshape) result = InteractiveSymbol(data, ds, name) if sub_uri: for field in sub_uri.split('/'): if field: result = result[field] return result
def test_discover_mixed(): i = discover(1) f = discover(1.0) exp = 10 * Tuple([i, i, f, f]) assert dshape(discover([[1, 2, 1.0, 2.0]] * 10)) == exp exp = 10 * (4 * f) assert dshape(discover([[1, 2, 1.0, 2.0], [1.0, 2.0, 1, 2]] * 5)) == exp
def test_create_with_raw_data(): t = Data(data, fields=['name', 'amount']) assert t.schema == dshape('{name: string, amount: int64}') assert t.name assert t.data == data
def out_dshape(self, in_dshape): return dshape( Record([(k, v.out_dshape(in_dshape)) for (k, v) in zip(self.keys, self.values)]))
def _schema(self): return dshape(self._dtype)
def test_discover_on_data(): assert discover(t) == dshape("2 * {name: string, amount: int64}")
from __future__ import absolute_import, division, print_function import os from itertools import product import pytest pytest.importorskip('sqlalchemy') from datashape import dshape, discover from odo import resource, odo from odo.utils import tmpfile, filetext ds = dshape('var * {a: int32, b: int32}') data = [(1, 2), (10, 20), (100, 200)] @pytest.yield_fixture def csv(): with tmpfile('csv') as filename: csv = odo(data, filename, dshape=ds, has_header=False) yield csv def test_simple_into(csv): tbl = 'testtable' with tmpfile('db') as filename: engine = resource('sqlite:///' + filename) t = resource('sqlite:///' + filename + '::' + tbl, dshape=ds) odo(csv, t, dshape=ds) conn = engine.raw_connection() cursor = conn.cursor()
def create_from_datashape(o, ds, **kwargs): return create_from_datashape(o, dshape(ds), **kwargs)
def test_Field(): e = symbol('e', '3 * 5 * {name: string, amount: int}') assert 'name' in dir(e) assert e.name.dshape == dshape('3 * 5 * string') assert e.name.schema == dshape('string') assert e.amount._name == 'amount'
def test_point_validate(): p = Point('x', 'y') p.validate(dshape("{x: int32, y: float32}")) with pytest.raises(ValueError): p.validate(dshape("{x: string, y: float32}"))
def dshape(self): return datashape.dshape(self._dshape or datashape.Var() * self.schema)
def test_table_raises_on_inconsistent_inputs(): with pytest.raises(ValueError): t = Data(data, schema='{name: string, amount: float32}', dshape=dshape("{name: string, amount: float32}"))
def test_scalar_sql_compute(): t = into('sqlite:///:memory:::t', data, dshape=dshape('var * {name: string, amount: int}')) d = Data(t) assert repr(d.amount.sum()) == '300'
def test_create_with_schema(): t = Data(data, schema='{name: string, amount: float32}') assert t.schema == dshape('{name: string, amount: float32}')
def out_dshape(self, input_dshape): cats = input_dshape.measure[self.column].categories return dshape(Record([(c, ct.int32) for c in cats]))
def _schema(self): """ Examples -------- >>> from blaze import symbol >>> t = symbol('t', 'var * {name: string, amount: int}') >>> s = symbol('t', 'var * {name: string, id: int}') >>> join(t, s).schema dshape("{name: string, amount: int32, id: int32}") >>> join(t, s, how='left').schema dshape("{name: string, amount: int32, id: ?int32}") Overlapping but non-joined fields append _left, _right >>> a = symbol('a', 'var * {x: int, y: int}') >>> b = symbol('b', 'var * {x: int, y: int}') >>> join(a, b, 'x').fields ['x', 'y_left', 'y_right'] """ option = lambda dt: dt if isinstance(dt, Option) else Option(dt) on_left = self.on_left if not isinstance(on_left, list): on_left = on_left, on_right = self.on_right if not isinstance(on_right, list): on_right = on_right, right_types = keymap( dict(zip(on_right, on_left)).get, self.rhs.dshape.measure.dict, ) joined = ((name, promote(dt, right_types[name], promote_option=False)) for n, (name, dt) in enumerate( filter( compose(op.contains(on_left), first), self.lhs.dshape.measure.fields, ))) left = [(name, dt) for name, dt in zip( self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs)) if name not in on_left] right = [(name, dt) for name, dt in zip( self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs)) if name not in on_right] # Handle overlapping but non-joined case, e.g. left_other = set(name for name, dt in left if name not in on_left) right_other = set(name for name, dt in right if name not in on_right) overlap = left_other & right_other left_suffix, right_suffix = self.suffixes left = ((name + left_suffix if name in overlap else name, dt) for name, dt in left) right = ((name + right_suffix if name in overlap else name, dt) for name, dt in right) if self.how in ('right', 'outer'): left = ((name, option(dt)) for name, dt in left) if self.how in ('left', 'outer'): right = ((name, option(dt)) for name, dt in right) return dshape(Record(chain(joined, left, right)))
def test_discover_dataframe(): df = pd.DataFrame([('Alice', 100), ('Bob', 200)], columns=['name', 'amount']) assert discover(df) == dshape('2 * {name: ?string, amount: int64}')
def test_reduction_dshape(): x = symbol('x', '5 * 3 * float32') assert x.sum().dshape == dshape('float64') assert x.sum(axis=0).dshape == dshape('3 * float64') assert x.sum(axis=1).dshape == dshape('5 * float64') assert x.sum(axis=(0, 1)).dshape == dshape('float64')
def test_Symbol(): e = symbol('e', '3 * 5 * {name: string, amount: int}') assert e.dshape == dshape('3 * 5 * {name: string, amount: int}') assert e.shape == (3, 5) assert str(e) == 'e'
def test_summary_keepdims(): x = symbol('x', '5 * 3 * float32') assert summary(a=x.min(), b=x.max()).dshape == \ dshape('{a: float32, b: float32}') assert summary(a=x.min(), b=x.max(), keepdims=True).dshape == \ dshape('1 * 1 * {a: float32, b: float32}')
def test_csv_infer_header(): with tmpfile('db') as dbfilename: with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename: t = odo(csvfilename, 'sqlite:///%s::mytable' % dbfilename) assert discover(t) == dshape('var * {a: int64, b: int64}') assert odo(t, set) == set([(1, 2), (3, 4)])
def _make_sig(tplist): """Converts a type tuples into datashape function signatures""" dslist = [datashape.dshape("A..., " + str(x)) for x in tplist] return datashape.Function(*(dslist[1:] + [dslist[0]]))
def test_table_raises_on_inconsistent_inputs(): with pytest.raises(ValueError) as excinfo: data(tdata, schema='{name: string, amount: float32}', dshape=dshape("{name: string, amount: float32}")) assert "specify one of schema= or dshape= keyword" in str(excinfo.value)
def test_apply(): t = Symbol('t', 'var * {name: string, amount: int32, id: int32}') s = t['amount'].apply(sum, dshape='real') r = t['amount'].apply(sum, dshape='3 * real') assert s.dshape == dshape('real') assert r.schema == dshape('real')
expr = tdata.name[tdata.balance > a] assert expr_repr(expr) == 'data[data.balance > a].name' def test_isidentical_regr(): # regression test for #1387 tdata = np.array([(np.nan,), (np.nan,)], dtype=[('a', 'float64')]) ds = data(tdata) assert ds.a.isidentical(ds.a) @pytest.mark.parametrize('data,dshape,exp_type', [(1, symbol('x', 'int').dshape, int), # test 1-d to series (into(da.core.Array, [1, 2], chunks=(10,)), dshape('2 * int'), pd.Series), # test 2-d tabular to dataframe (into(da.core.Array, [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}], chunks=(10, 10)), dshape('2 * {a: int, b: int}'), pd.DataFrame), # test 2-d non tabular to ndarray (into(da.core.Array, [[1, 2], [3, 4]], chunks=(10, 10)), dshape('2 * 2 * int'), np.ndarray)]) def test_coerce_core(data, dshape, exp_type): assert isinstance(coerce_core(data, dshape), exp_type)
def test_joined_column_first_in_schema(): t = TableSymbol('t', '{x: int, y: int, z: int}') s = TableSymbol('s', '{w: int, y: int}') assert join(t, s).schema == dshape('{y: int, x: int, z: int, w: int}')
def test_dshape(): t = TableSymbol('t', '{name: string, amount: int}') assert t.dshape == dshape('var * {name: string, amount: int}')
def test_Projection_retains_shape(): t = TableSymbol('t', '5 * {name: string, amount: int, id: int32}') assert t[['name', 'amount']].dshape == \ dshape('5 * {name: string, amount: int}')
def test_resource_with_variable_length(): with tmpfile('.hdf5') as fn: ds = datashape.dshape('var * 4 * int32') r = resource(fn + '::/data', dshape=ds) assert r.shape == (0, 4)
h = hash(e) assert isinstance(h, int) assert h == hash(e) assert hash(symbol('e', 'int')) == hash(symbol('e', 'int')) f = symbol('f', 'int') assert hash(e) != hash(f) assert hash(e._subs({'e': 'f'})) != hash(e) assert hash(e._subs({'e': 'f'})) == hash(f) @pytest.mark.parametrize( 'dshape', [var * float32, dshape('var * float32'), 'var * float32']) def test_coerce(dshape): s = symbol('s', dshape) expr = s.coerce('int64') assert str(expr) == "s.coerce(to='int64')" assert expr.dshape == var * int64 assert expr.schema == datashape.dshape('int64') assert expr.schema == expr.to @pytest.mark.xfail(raises=AttributeError, reason='Should this be valid?') def test_coerce_record(): s = symbol('s', 'var * {a: int64, b: float64}') expr = s.coerce('{a: float64, b: float32}') assert str(expr) == "s.coerce(to='{a: float64, b: float32}')"
def test_coalesce(): # check case where lhs is not optional s = symbol('s', 'int32') t = symbol('t', 'int32') expr = coalesce(s, t) assert expr.isidentical(s) s_expr = s + s t_expr = t * 3 expr = coalesce(s_expr, t_expr) assert expr.isidentical(s_expr) a = symbol('a', 'string') b = symbol('b', 'string') expr = coalesce(a, b) assert expr.isidentical(a) a_expr = a + a b_expr = b * 3 expr = coalesce(a_expr, b_expr) assert expr.isidentical(a_expr) c = symbol('c', '{a: int32, b: int32}') d = symbol('d', '{a: int32, b: int32}') expr = coalesce(c, d) assert expr.isidentical(c) c_expr = transform(c, a=c.a + 1) d_expr = transform(d, a=d.a * 3) expr = coalesce(c_expr, d_expr) assert expr.isidentical(c_expr) # check case where lhs is null dshape u = symbol('u', 'null') expr = coalesce(u, s) assert expr.isidentical(s) expr = coalesce(u, a) assert expr.isidentical(a) expr = coalesce(u, c) assert expr.isidentical(c) # check optional lhs non-optional rhs v = symbol('v', '?int32') expr = coalesce(v, s) # rhs is not optional so the expression cannot be null assert_dshape_equal(expr.dshape, dshape('int32')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(s) e = symbol('e', '?string') expr = coalesce(e, a) assert_dshape_equal(expr.dshape, dshape('string')) assert expr.lhs.isidentical(e) assert expr.rhs.isidentical(a) f = symbol('f', '?{a: int32, b: int32}') expr = coalesce(f, c) assert_dshape_equal(expr.dshape, dshape('{a: int32, b: int32}')) assert expr.lhs.isidentical(f) assert expr.rhs.isidentical(c) # check optional lhs non-optional rhs with promotion w = symbol('w', 'int64') expr = coalesce(v, w) # rhs is not optional so the expression cannot be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape('int64')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(w) # check optional lhs and rhs x = symbol('x', '?int32') expr = coalesce(v, x) # rhs and lhs are optional so this might be null assert_dshape_equal(expr.dshape, dshape('?int32')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(x) # check optional lhs and rhs with promotion y = symbol('y', '?int64') expr = coalesce(v, y) # rhs and lhs are optional so this might be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape('?int64')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(y)
def test_discover(): assert discover(b) == dshape('3 * {a: int64, b: float64, date: date}') assert discover(b['a']) == dshape('3 * int64')