Example #1
0
 def test_syntax(self):
     self.assertEqual(
         datashape.Fixed(3) * dshape('int32'), dshape('3 * int32'))
     self.assertEqual(3 * dshape('int32'), dshape('3 * int32'))
     self.assertEqual(datashape.Var() * dshape('int32'),
                      dshape('var * int32'))
     self.assertEqual(datashape.Var() * datashape.int32,
                      dshape('var * int32'))
     self.assertEqual(datashape.Var() * 'int32', dshape('var * int32'))
     self.assertEqual(3 * datashape.int32, dshape('3 * int32'))
Example #2
0
def test_extend(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend(data)
    with open(tmpcsv) as f:
        lines = f.readlines()
    expected_lines = 'Alice 100', 'Bob 200', 'Alice 50'
    for i, eline in enumerate(expected_lines):
        assert lines[i].strip() == eline

    expected_dshape = datashape.DataShape(datashape.Var(),
                                          datashape.dshape(schema))

    assert str(dd.dshape) == str(expected_dshape)
Example #3
0
    def __init__(self,
                 path,
                 datapath,
                 mode='r',
                 schema=None,
                 dshape=None,
                 **kwargs):
        self.path = path
        self.datapath = datapath
        self.mode = mode

        if schema and not dshape:
            dshape = 'var * ' + str(schema)

        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        if dshape:
            dshape = datashape.dshape(dshape)
            shape = dshape.shape
            dtype = datashape.to_numpy_dtype(dshape[-1])
            if shape[0] == datashape.Var():
                kwargs['chunks'] = True
                kwargs['maxshape'] = kwargs.get('maxshape',
                                                (None, ) + shape[1:])
                shape = (0, ) + tuple(map(int, shape[1:]))

        with h5py.File(path, mode) as f:
            dset = f.get(datapath)
            if dset is None:
                if dshape is None:
                    raise ValueError('No dataset or dshape provided')
                else:
                    f.create_dataset(datapath, shape, dtype=dtype, **kwargs)
            else:
                dshape2 = datashape.from_numpy(dset.shape, dset.dtype)
                dshape = dshape2
                # TODO: test provided dshape against given dshape
                # if dshape and dshape != dshape2:
                #     raise ValueError('Inconsistent datashapes.'
                #             '\nGiven: %s\nFound: %s' % (dshape, dshape2))

        attributes = self.attributes()
        if attributes['chunks']:
            # is there a better way to do this?
            words = str(dshape).split(' * ')
            dshape = 'var * ' + ' * '.join(words[1:])
            dshape = datashape.dshape(dshape)

        self._dshape = dshape
        self._schema = schema
    def test_extend(self):
        dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
        dd.extend(self.data)
        with open(self.filename) as f:
            lines = f.readlines()
            self.assertEqual(lines[0].strip(), 'Alice 100')
            self.assertEqual(lines[1].strip(), 'Bob 200')
            self.assertEqual(lines[2].strip(), 'Alice 50')

        expected_dshape = datashape.DataShape(datashape.Var(), self.schema)
        # TODO: datashape comparison is broken
        self.assertEqual(
            str(dd.dshape).replace(' ', ''),
            str(expected_dshape).replace(' ', ''))
Example #5
0
File: hdf5.py Project: vitan/blaze
    def __init__(self, path, datapath,
                 schema=None, dshape=None, **kwargs):
        self.path = path
        self.datapath = datapath

        if isinstance(schema, _strtypes):
            schema = datashape.dshape(schema)
        if isinstance(dshape, _strtypes):
            dshape = datashape.dshape(dshape)
        if schema and not dshape:
            dshape = var * datashape.dshape(schema)

        if not dshape:
            with h5py.File(path, 'r') as f:
                dset = f.get(datapath)
                if dset:
                    dshape = discover(dset)
                else:
                    raise ValueError("No datashape given or found. "
                             "Please specify dshape or schema keyword args")


        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        shape = dshape.shape
        dtype = varlen_dtype(dshape[-1].to_numpy_dtype())
        if shape[0] == datashape.Var():
            kwargs['chunks'] = True
            kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:])
            shape = (0,) + tuple(map(int, shape[1:]))


        with h5py.File(path) as f:
            dset = f.get(datapath)
            if not dset:
                f.create_dataset(datapath, shape, dtype=dtype, **kwargs)

        attributes = self.attributes()
        if attributes['chunks']:
            dshape = var * dshape.subshape[0]

        self._dshape = dshape
        self._schema = schema
Example #6
0
 def dshape(self):
     return datashape.Var() * self.schema
 def dshape(self):
     return datashape.DataShape(datashape.Var(), self.schema)
Example #8
0
 def test_python_containers(self):
     var = datashape.Var()
     int32 = datashape.int32
     self.assertEqual(dshape('3 * int32'), dshape((3, int32)))
     self.assertEqual(dshape('3 * int32'), dshape([3, int32]))
     self.assertEqual(dshape('var * 3 * int32'), dshape((var, 3, int32)))