def test_syntax(self): self.assertEqual( datashape.Fixed(3) * dshape('int32'), dshape('3 * int32')) self.assertEqual(3 * dshape('int32'), dshape('3 * int32')) self.assertEqual(datashape.Var() * dshape('int32'), dshape('var * int32')) self.assertEqual(datashape.Var() * datashape.int32, dshape('var * int32')) self.assertEqual(datashape.Var() * 'int32', dshape('var * int32')) self.assertEqual(3 * datashape.int32, dshape('3 * int32'))
def test_extend(tmpcsv, schema): dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ') dd.extend(data) with open(tmpcsv) as f: lines = f.readlines() expected_lines = 'Alice 100', 'Bob 200', 'Alice 50' for i, eline in enumerate(expected_lines): assert lines[i].strip() == eline expected_dshape = datashape.DataShape(datashape.Var(), datashape.dshape(schema)) assert str(dd.dshape) == str(expected_dshape)
def __init__(self, path, datapath, mode='r', schema=None, dshape=None, **kwargs): self.path = path self.datapath = datapath self.mode = mode if schema and not dshape: dshape = 'var * ' + str(schema) # TODO: provide sane defaults for kwargs # Notably chunks and maxshape if dshape: dshape = datashape.dshape(dshape) shape = dshape.shape dtype = datashape.to_numpy_dtype(dshape[-1]) if shape[0] == datashape.Var(): kwargs['chunks'] = True kwargs['maxshape'] = kwargs.get('maxshape', (None, ) + shape[1:]) shape = (0, ) + tuple(map(int, shape[1:])) with h5py.File(path, mode) as f: dset = f.get(datapath) if dset is None: if dshape is None: raise ValueError('No dataset or dshape provided') else: f.create_dataset(datapath, shape, dtype=dtype, **kwargs) else: dshape2 = datashape.from_numpy(dset.shape, dset.dtype) dshape = dshape2 # TODO: test provided dshape against given dshape # if dshape and dshape != dshape2: # raise ValueError('Inconsistent datashapes.' # '\nGiven: %s\nFound: %s' % (dshape, dshape2)) attributes = self.attributes() if attributes['chunks']: # is there a better way to do this? words = str(dshape).split(' * ') dshape = 'var * ' + ' * '.join(words[1:]) dshape = datashape.dshape(dshape) self._dshape = dshape self._schema = schema
def test_extend(self): dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ') dd.extend(self.data) with open(self.filename) as f: lines = f.readlines() self.assertEqual(lines[0].strip(), 'Alice 100') self.assertEqual(lines[1].strip(), 'Bob 200') self.assertEqual(lines[2].strip(), 'Alice 50') expected_dshape = datashape.DataShape(datashape.Var(), self.schema) # TODO: datashape comparison is broken self.assertEqual( str(dd.dshape).replace(' ', ''), str(expected_dshape).replace(' ', ''))
def __init__(self, path, datapath, schema=None, dshape=None, **kwargs): self.path = path self.datapath = datapath if isinstance(schema, _strtypes): schema = datashape.dshape(schema) if isinstance(dshape, _strtypes): dshape = datashape.dshape(dshape) if schema and not dshape: dshape = var * datashape.dshape(schema) if not dshape: with h5py.File(path, 'r') as f: dset = f.get(datapath) if dset: dshape = discover(dset) else: raise ValueError("No datashape given or found. " "Please specify dshape or schema keyword args") # TODO: provide sane defaults for kwargs # Notably chunks and maxshape shape = dshape.shape dtype = varlen_dtype(dshape[-1].to_numpy_dtype()) if shape[0] == datashape.Var(): kwargs['chunks'] = True kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:]) shape = (0,) + tuple(map(int, shape[1:])) with h5py.File(path) as f: dset = f.get(datapath) if not dset: f.create_dataset(datapath, shape, dtype=dtype, **kwargs) attributes = self.attributes() if attributes['chunks']: dshape = var * dshape.subshape[0] self._dshape = dshape self._schema = schema
def dshape(self): return datashape.Var() * self.schema
def dshape(self): return datashape.DataShape(datashape.Var(), self.schema)
def test_python_containers(self): var = datashape.Var() int32 = datashape.int32 self.assertEqual(dshape('3 * int32'), dshape((3, int32))) self.assertEqual(dshape('3 * int32'), dshape([3, int32])) self.assertEqual(dshape('var * 3 * int32'), dshape((var, 3, int32)))