def promote(lhs, rhs): """Promote two scalar dshapes to a possibly larger, but compatible type. Examples -------- >>> from datashape import int32, int64, Option >>> x = Option(int32) >>> y = int64 >>> promote(x, y) ?int64 >>> promote(int64, int64) ctype("int64") Notes ---- This uses ``numpy.result_type`` for type promotion logic. See the numpy documentation at http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html """ if lhs == rhs: return lhs else: left, right = getattr(lhs, "ty", lhs), getattr(rhs, "ty", rhs) dtype = np.result_type(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right)) return optionify(lhs, rhs, datashape.CType.from_numpy_dtype(dtype))
def into(a, b): schema = dshape(str(b.schema).replace('?', '')) if b.iscolumn: return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema[0].types[0])) else: return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema))
def promote(lhs, rhs, promote_option=True): """Promote two scalar dshapes to a possibly larger, but compatible type. Examples -------- >>> from datashape import int32, int64, Option >>> x = Option(int32) >>> y = int64 >>> promote(x, y) Option(ty=ctype("int64")) >>> promote(int64, int64) ctype("int64") Don't promote to option types. >>> promote(x, y, promote_option=False) ctype("int64") Notes ---- This uses ``numpy.result_type`` for type promotion logic. See the numpy documentation at http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html """ if lhs == rhs: return lhs else: left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs) dtype = datashape.CType.from_numpy_dtype( np.result_type( datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right), ), ) if promote_option: dtype = optionify(lhs, rhs, dtype) return dtype
def compute_up(t, x, **kwargs): # can't use the method here, as they aren't Python functions reducer = getattr(np, t.symbol) if 'dtype' in keywords(reducer): return reducer(x, axis=t.axis, keepdims=t.keepdims, dtype=to_numpy_dtype(t.schema)) return reducer(x, axis=t.axis, keepdims=t.keepdims)
def test_uints(self): types = ['uint8', 'uint16', 'uint32', 'uint64'] for type_ in types: a = blaze.array(np.arange(3), dshape=type_) dtype = to_numpy_dtype(a.dshape) self.assertEqual(dtype, np.dtype(type_)) self.assertEqual(dd_as_py(a._data), [0, 1, 2])
def resource_bcolz(uri, dshape=None, expected_dshape=None, **kwargs): if os.path.exists(uri): try: return ctable(rootdir=uri) except IOError: # __rootdirs__ doesn't exist because we aren't a ctable return carray(rootdir=uri) else: if not dshape: raise ValueError("Must specify either existing bcolz directory or" " valid datashape") dshape = datashape.dshape(dshape) dt = datashape.to_numpy_dtype(dshape) shape_tail = tuple(map(int, dshape.shape[1:])) # tail of shape if dshape.shape[0] == datashape.var: shape = (0,) + shape_tail else: shape = (int(dshape.shape[0]),) + shape_tail x = np.empty(shape=shape, dtype=dt) kwargs = keyfilter(keywords.__contains__, kwargs) expectedlen = kwargs.pop('expectedlen', int(expected_dshape[0]) if expected_dshape is not None and isinstance(expected_dshape[0], datashape.Fixed) else None) if datashape.predicates.isrecord(dshape.measure): return ctable(x, rootdir=uri, expectedlen=expectedlen, **kwargs) else: return carray(x, rootdir=uri, expectedlen=expectedlen, **kwargs)
def into(a, b, **kwargs): c = compute(b) if isinstance(c, (list, tuple, Iterator)): kwargs['types'] = [datashape.to_numpy_dtype(t) for t in b.schema[0].types] kwargs['names'] = b.columns return into(a, c, **kwargs)
def PyTables(path, datapath, dshape=None, **kwargs): """Create or open a ``tables.Table`` object. Parameters ---------- path : str Path to a PyTables HDF5 file. datapath : str The name of the node in the ``tables.File``. dshape : str or datashape.DataShape DataShape to use to create the ``Table``. Returns ------- t : tables.Table Examples -------- >>> from blaze.utils import tmpfile >>> # create from scratch >>> with tmpfile('.h5') as f: ... t = PyTables(filename, '/bar', ... dshape='var * {volume: float64, planet: string[10, "A"]}') ... data = [(100.3, 'mars'), (100.42, 'jupyter')] ... t.append(data) ... t[:] # doctest: +SKIP ... array([(100.3, b'mars'), (100.42, b'jupyter')], dtype=[('volume', '<f8'), ('planet', 'S10')]) """ def possibly_create_table(filename, dtype): f = tb.open_file(filename, mode='a') try: if datapath not in f: if dtype is None: raise ValueError('dshape cannot be None and datapath not' ' in file') else: f.create_table('/', datapath.lstrip('/'), description=dtype) finally: f.close() if dshape: if isinstance(dshape, str): dshape = datashape.dshape(dshape) if dshape[0] == datashape.var: dshape = dshape.subshape[0] dtype = dtype_to_pytables(datashape.to_numpy_dtype(dshape)) else: dtype = None if os.path.exists(path): possibly_create_table(path, dtype) else: with tmpfile('.h5') as filename: possibly_create_table(filename, dtype) shutil.copyfile(filename, path) return tb.open_file(path, mode='a').get_node(datapath)
def into(a, b, **kwargs): c = compute(b) if isinstance(c, (list, tuple, Iterator)): kwargs['types'] = [ datashape.to_numpy_dtype(t) for t in b.schema[0].types ] kwargs['names'] = b.columns return into(a, c, **kwargs)
def compute_up(expr, data, **kwargs): measure = expr.to.measure if measure in {datashape.string, datashape.Option(datashape.string)}: return data.astype(str) elif measure in {datashape.datetime_, datashape.Option(datashape.datetime_)}: return data.astype(np.datetime64) return data.astype(to_numpy_dtype(expr.schema))
def compute_up(t, x, **kwargs): result_dtype = to_numpy_dtype(t.dshape) if issubclass(x.dtype.type, (np.floating, np.object_)): return pd.notnull(x).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype) elif issubclass(x.dtype.type, np.datetime64): return (x.view("int64") != inat).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype) else: return np.ones(x.shape, dtype=result_dtype).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype)
def test_complex(self): types = ['complex64', 'complex128'] for type_ in types: a = blaze.array(np.arange(3), dshape=type_) dtype = to_numpy_dtype(a.dshape) self.assertEqual(dtype, np.dtype(type_)) # dd_as_py does not support complexes yet.. self.assertEqual(dd_as_py(a._data), [0, 1, 2])
def into(a, b, **kwargs): names = dshape(nd.dshape_of(b))[1].names columns = [getattr(b, name) for name in names] columns = [np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O') else into(np.ndarray(0), c) for c in columns] return bcolz.ctable(columns, names=names, **kwargs)
def into(a, df, **kwargs): x = df.to_records(index=False) if 'dshape' in kwargs: ds = dshape(kwargs['dshape']).measure dt = to_numpy_dtype(ds) if x.dtype != dt: x = x.astype(dt) return x
def compute_up(expr, data, **kwargs): if datashape.dshape(expr.to) in {datashape.string, datashape.Option(datashape.string)}: return data.astype(str) elif datashape.dshape(expr.to) in {datashape.datetime_, datashape.Option(datashape.datetime_)}: return data.astype(np.datetime64) return data.astype(to_numpy_dtype(expr.schema))
def promote(lhs, rhs, promote_option=True): """Promote two scalar dshapes to a possibly larger, but compatible type. Examples -------- >>> from datashape import int32, int64, Option, string >>> x = Option(int32) >>> y = int64 >>> promote(x, y) Option(ty=ctype("int64")) >>> promote(int64, int64) ctype("int64") Don't promote to option types. >>> promote(x, y, promote_option=False) ctype("int64") Strings are handled differently than NumPy, which promotes to ctype("object") >>> x = string >>> y = Option(string) >>> promote(x, y) == promote(y, x) == Option(string) True >>> promote(x, y, promote_option=False) ctype("string") Notes ---- Except for ``datashape.string`` types, this uses ``numpy.result_type`` for type promotion logic. See the numpy documentation at: http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html """ if lhs == rhs: return lhs left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs) if left == right == datashape.string: # Special case string promotion, since numpy promotes to `object`. dtype = datashape.string else: np_res_type = np.result_type(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right)) dtype = datashape.CType.from_numpy_dtype(np_res_type) if promote_option: dtype = optionify(lhs, rhs, dtype) return dtype
def test_floats(self): types = ['float16', 'float32', 'float64'] for type_ in types: a = blaze.array(np.arange(3), dshape=type_) dtype = to_numpy_dtype(a.dshape) self.assertEqual(dtype, np.dtype(type_)) if type_ != 'float16': # dd_as_py does not support this yet self.assertEqual(dd_as_py(a._data), [0, 1, 2])
def into(a, b, **kwargs): names = dshape(nd.dshape_of(b))[1].names columns = [getattr(b, name) for name in names] columns = [ np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O') else into(np.ndarray(0), c) for c in columns ] return bcolz.ctable(columns, names=names, **kwargs)
def promote(lhs, rhs, promote_option=True): """Promote two scalar dshapes to a possibly larger, but compatible type. Examples -------- >>> from datashape import int32, int64, Option, string >>> x = Option(int32) >>> y = int64 >>> promote(x, y) Option(ty=ctype("int64")) >>> promote(int64, int64) ctype("int64") Don't promote to option types. >>> promote(x, y, promote_option=False) ctype("int64") Strings are handled differently than NumPy, which promotes to ctype("object") >>> x = string >>> y = Option(string) >>> promote(x, y) == promote(y, x) == Option(string) True >>> promote(x, y, promote_option=False) ctype("string") Notes ---- Except for ``datashape.string`` types, this uses ``numpy.result_type`` for type promotion logic. See the numpy documentation at: http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html """ if lhs == rhs: return lhs left, right = getattr(lhs, "ty", lhs), getattr(rhs, "ty", rhs) if left == right == datashape.string: # Special case string promotion, since numpy promotes to `object`. dtype = datashape.string else: np_res_type = np.result_type(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right)) dtype = datashape.CType.from_numpy_dtype(np_res_type) if promote_option: dtype = optionify(lhs, rhs, dtype) return dtype
def resource_bcolz(rootdir, **kwargs): if os.path.exists(rootdir): kwargs = keyfilter(carray_keywords.__contains__, kwargs) return ctable(rootdir=rootdir, **kwargs) else: if 'dshape' in kwargs: dtype = to_numpy_dtype(kwargs['dshape']) kwargs = keyfilter(carray_keywords.__contains__, kwargs) return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs) else: raise ValueError("File does not exist and no `dshape=` given")
def resource_bcolz(rootdir, **kwargs): if os.path.exists(rootdir): kwargs = keyfilter(keywords(ctable).__contains__, kwargs) return ctable(rootdir=rootdir, **kwargs) else: if 'dshape' in kwargs: dtype = to_numpy_dtype(kwargs['dshape']) kwargs = keyfilter(keywords(ctable).__contains__, kwargs) return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs) else: raise ValueError("File does not exist and no `dshape=` given")
def compute_up(t, x, **kwargs): result_dtype = to_numpy_dtype(t.dshape) if issubclass(x.dtype.type, (np.floating, np.object_)): return pd.notnull(x).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype) elif issubclass(x.dtype.type, np.datetime64): return (x.view('int64') != inat).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype) else: return np.ones(x.shape, dtype=result_dtype).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype)
def series_to_array(s, dshape=None, **kwargs): dtype = datashape.to_numpy_dtype(datashape.dshape(dshape)) sdtype = s.dtype values = s.values # don't lose precision of datetime64 more precise than microseconds if ((issubclass(sdtype.type, np.datetime64) and np.datetime_data(sdtype)[0] in higher_precision_freqs) or s.dtype == dtype): return values try: return values.astype(dtype) except ValueError: # object series and record dshape, e.g., a frame row return values
def __init__(self, path, datapath, mode='r', schema=None, dshape=None, **kwargs): self.path = path self.datapath = datapath self.mode = mode if schema and not dshape: dshape = 'var * ' + str(schema) # TODO: provide sane defaults for kwargs # Notably chunks and maxshape if dshape: dshape = datashape.dshape(dshape) shape = dshape.shape dtype = datashape.to_numpy_dtype(dshape[-1]) if shape[0] == datashape.Var(): kwargs['chunks'] = True kwargs['maxshape'] = kwargs.get('maxshape', (None, ) + shape[1:]) shape = (0, ) + tuple(map(int, shape[1:])) with h5py.File(path, mode) as f: dset = f.get(datapath) if dset is None: if dshape is None: raise ValueError('No dataset or dshape provided') else: f.create_dataset(datapath, shape, dtype=dtype, **kwargs) else: dshape2 = datashape.from_numpy(dset.shape, dset.dtype) dshape = dshape2 # TODO: test provided dshape against given dshape # if dshape and dshape != dshape2: # raise ValueError('Inconsistent datashapes.' # '\nGiven: %s\nFound: %s' % (dshape, dshape2)) attributes = self.attributes() if attributes['chunks']: # is there a better way to do this? words = str(dshape).split(' * ') dshape = 'var * ' + ' * '.join(words[1:]) dshape = datashape.dshape(dshape) self._dshape = dshape self._schema = schema
def dataset_from_dshape(file, datapath, ds, **kwargs): dtype = varlen_dtype(to_numpy_dtype(ds)) if datashape.var not in list(ds): shape = to_numpy(ds)[0] elif len(ds.shape) == 1: shape = (0,) else: raise ValueError("Don't know how to handle varlen nd shapes") if shape: kwargs['chunks'] = kwargs.get('chunks', True) kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:]) kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs) return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
def promote(lhs, rhs): """Promote two scalar dshapes to a possibly larger, but compatibile type Examples -------- >>> from datashape import int32, int64, Option >>> x = Option(int32) >>> y = int64 >>> promote(x, y) ?int64 Notes ---- This uses ``numpy.promote_types`` for type promotion logic. See the numpy documentation at http://docs.scipy.org/doc/numpy/reference/generated/numpy.promote_types.html """ left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs) dtype = np.promote_types(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right)) dshape = datashape.from_numpy((), dtype) return optionify(lhs, rhs, dshape)
def dataset_from_dshape(file, datapath, ds, **kwargs): dtype = varlen_dtype(to_numpy_dtype(ds)) if datashape.var not in list(ds): shape = to_numpy(ds)[0] elif datashape.var not in list(ds)[1:]: shape = (0, ) + to_numpy(ds.subshape[0])[0] else: raise ValueError("Don't know how to handle varlen nd shapes") if shape: kwargs['chunks'] = kwargs.get('chunks', True) kwargs['maxshape'] = kwargs.get('maxshape', (None, ) + shape[1:]) kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs) return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
def resource_bcolz(uri, dshape=None, **kwargs): if os.path.exists(uri): return ctable(rootdir=uri) else: if not dshape: raise ValueError("Must specify either existing bcolz directory or" "valid datashape") dshape = datashape.dshape(dshape) dt = datashape.to_numpy_dtype(dshape) x = np.empty(shape=(0,), dtype=dt) if datashape.predicates.isrecord(dshape.measure): return ctable(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs)) else: return carray(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs))
def compute_up(expr, ob, **kwargs): tp = expr.to shape = tp.shape if shape: raise TypeError( 'cannot convert scalar object %r to array or matrix shape %r' % ( ob, shape, ), ) measure = tp.measure if isinstance(measure, Option): if pd.isnull(ob): return None measure = measure.ty dtype = to_numpy_dtype(measure) return dtype.type(ob)
def __init__(self, path, datapath, mode='r', schema=None, dshape=None, **kwargs): self.path = path self.datapath = datapath self.mode = mode if schema and not dshape: dshape = 'var * ' + str(schema) # TODO: provide sane defaults for kwargs # Notably chunks and maxshape if dshape: dshape = datashape.dshape(dshape) shape = dshape.shape dtype = datashape.to_numpy_dtype(dshape[-1]) if shape[0] == datashape.Var(): kwargs['chunks'] = True kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:]) shape = (0,) + tuple(map(int, shape[1:])) with h5py.File(path, mode) as f: dset = f.get(datapath) if dset is None: if dshape is None: raise ValueError('No dataset or dshape provided') else: f.create_dataset(datapath, shape, dtype=dtype, **kwargs) else: dshape2 = datashape.from_numpy(dset.shape, dset.dtype) dshape = dshape2 # TODO: test provided dshape against given dshape # if dshape and dshape != dshape2: # raise ValueError('Inconsistent datashapes.' # '\nGiven: %s\nFound: %s' % (dshape, dshape2)) attributes = self.attributes() if attributes['chunks']: # is there a better way to do this? words = str(dshape).split(' * ') dshape = 'var * ' + ' * '.join(words[1:]) dshape = datashape.dshape(dshape) self._dshape = dshape self._schema = schema
def unit_to_dtype(ds): """ Convert a datashape Unit instance into a numpy dtype Parameters ---------- ds : DataShape The DataShape instance to convert Returns ------- np.dtype Examples -------- >>> unit_to_dtype('int32') dtype('int32') >>> unit_to_dtype('float64') dtype('float64') >>> unit_to_dtype('?int64') dtype('float64') >>> unit_to_dtype('string') dtype('O') >>> unit_to_dtype('?datetime') dtype('<M8[us]') """ if isinstance(ds, str): ds = dshape(ds) if isinstance(ds, DataShape): ds = ds.measure if isinstance(ds, Option) and isscalar(ds) and isnumeric(ds): if isinstance(ds.ty, Decimal): str_np_dtype = str(ds.ty.to_numpy_dtype()).replace('int', 'float') if str_np_dtype == 'float8': # not a valid dtype, so increase str_np_dtype = 'float16' return unit_to_dtype(str_np_dtype) return unit_to_dtype(str(ds).replace('int', 'float').replace('?', '')) if isinstance(ds, Option) and isinstance( ds.ty, (Date, DateTime, String, TimeDelta) ): ds = ds.ty if ds == string: return np.dtype('O') return to_numpy_dtype(ds)
def into(a, b, **kwargs): b = iter(b) first = next(b) b = toolz.concat([[first], b]) if isinstance(first, datetime): b = map(np.datetime64, b) if isinstance(first, (list, tuple)): if 'dtype' in kwargs: dtype = kwargs.pop('dtype') elif 'dshape' in kwargs: dtype = to_numpy_dtype(dshape(kwargs.pop('dshape'))) else: dtype = dtype_from_tuple(first) return np.rec.fromrecords([tuple(x) for x in b], dtype=dtype, **kwargs) elif hasattr(first, 'values'): #detecting sqlalchemy.engine.result.RowProxy types and similar return np.asarray([tuple(x.values()) for x in b], **kwargs) else: return np.asarray(list(b), **kwargs)
def compute_up(expr, data, **kwargs): return data.astype(to_numpy_dtype(expr.schema))
def test_string(self): self.assertEqual(to_numpy_dtype(dshape('2 * string')), np.dtype('O'))
def into(a, b): if b.iscolumn: return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(b.schema[0].types[0])) else: return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(b.schema))
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)
def into(a, b, **kwargs): schema = dshape(str(b.schema).replace('?', '')) return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema))
def test_timedelta(self): assert to_numpy_dtype(dshape('2 * timedelta')) == np.dtype('m8[us]') assert to_numpy_dtype(dshape("2 * timedelta[unit='s']")) == \ np.dtype('m8[s]')
def test_string(self): assert to_numpy_dtype(dshape('2 * string')) == np.dtype('O')
def test_date(self): assert to_numpy_dtype(dshape('2 * date')) == np.dtype('M8[D]')
def test_decimal(self): assert to_numpy_dtype(dshape('decimal[18,0]')) == np.int64 assert to_numpy_dtype(dshape('decimal[7,2]')) == np.float64 assert to_numpy_dtype(dshape('decimal[4]')) == np.int16 with pytest.raises(TypeError): to_numpy_dtype(dshape('decimal[21]'))
def test_simple(self): self.assertEqual(to_numpy_dtype(dshape('2 * int32')), np.int32) self.assertEqual(to_numpy_dtype(dshape('2 * {x: int32, y: int32}')), np.dtype([('x', '<i4'), ('y', '<i4')]))
def test_date(self): self.assertEqual(to_numpy_dtype(dshape('2 * date')), np.dtype('M8[D]'))
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)
def test_dimensions(self): return to_numpy_dtype(dshape('var * int32')) == np.int32