コード例 #1
0
ファイル: promote.py プロジェクト: telefunkenvf14/datashape
def promote(lhs, rhs):
    """Promote two scalar dshapes to a possibly larger, but compatible type.

    Examples
    --------
    >>> from datashape import int32, int64, Option
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    ?int64
    >>> promote(int64, int64)
    ctype("int64")

    Notes
    ----
    This uses ``numpy.result_type`` for type promotion logic.  See the numpy
    documentation at
    http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html
    """
    if lhs == rhs:
        return lhs
    else:
        left, right = getattr(lhs, "ty", lhs), getattr(rhs, "ty", rhs)
        dtype = np.result_type(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right))
        return optionify(lhs, rhs, datashape.CType.from_numpy_dtype(dtype))
コード例 #2
0
ファイル: table.py プロジェクト: ChrisBeaumont/blaze
def into(a, b):
    schema = dshape(str(b.schema).replace('?', ''))
    if b.iscolumn:
        return into(np.ndarray(0), compute(b),
                dtype=to_numpy_dtype(schema[0].types[0]))
    else:
        return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema))
コード例 #3
0
def promote(lhs, rhs, promote_option=True):
    """Promote two scalar dshapes to a possibly larger, but compatible type.

    Examples
    --------
    >>> from datashape import int32, int64, Option
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    Option(ty=ctype("int64"))
    >>> promote(int64, int64)
    ctype("int64")

    Don't promote to option types.
    >>> promote(x, y, promote_option=False)
    ctype("int64")

    Notes
    ----
    This uses ``numpy.result_type`` for type promotion logic.  See the numpy
    documentation at
    http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html
    """
    if lhs == rhs:
        return lhs
    else:
        left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs)
        dtype = datashape.CType.from_numpy_dtype(
            np.result_type(
                datashape.to_numpy_dtype(left),
                datashape.to_numpy_dtype(right),
            ), )
        if promote_option:
            dtype = optionify(lhs, rhs, dtype)
        return dtype
コード例 #4
0
ファイル: table.py プロジェクト: pgnepal/blaze
def into(a, b):
    schema = dshape(str(b.schema).replace('?', ''))
    if b.iscolumn:
        return into(np.ndarray(0),
                    compute(b),
                    dtype=to_numpy_dtype(schema[0].types[0]))
    else:
        return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema))
コード例 #5
0
def compute_up(t, x, **kwargs):
    # can't use the method here, as they aren't Python functions
    reducer = getattr(np, t.symbol)
    if 'dtype' in keywords(reducer):
        return reducer(x, axis=t.axis, keepdims=t.keepdims,
                       dtype=to_numpy_dtype(t.schema))
    return reducer(x, axis=t.axis, keepdims=t.keepdims)
コード例 #6
0
ファイル: test_types.py プロジェクト: mwiebe/blaze
 def test_uints(self):
     types = ['uint8', 'uint16', 'uint32', 'uint64']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #7
0
ファイル: bcolz.py プロジェクト: quasiben/odo
def resource_bcolz(uri, dshape=None, expected_dshape=None, **kwargs):
    if os.path.exists(uri):
        try:
            return ctable(rootdir=uri)
        except IOError:  # __rootdirs__ doesn't exist because we aren't a ctable
            return carray(rootdir=uri)
    else:
        if not dshape:
            raise ValueError("Must specify either existing bcolz directory or"
                             " valid datashape")
        dshape = datashape.dshape(dshape)

        dt = datashape.to_numpy_dtype(dshape)
        shape_tail = tuple(map(int, dshape.shape[1:]))  # tail of shape
        if dshape.shape[0] == datashape.var:
            shape = (0,) + shape_tail
        else:
            shape = (int(dshape.shape[0]),) + shape_tail

        x = np.empty(shape=shape, dtype=dt)

        kwargs = keyfilter(keywords.__contains__, kwargs)
        expectedlen = kwargs.pop('expectedlen',
                                 int(expected_dshape[0])
                                 if expected_dshape is not None and
                                 isinstance(expected_dshape[0], datashape.Fixed)
                                 else None)

        if datashape.predicates.isrecord(dshape.measure):
            return ctable(x, rootdir=uri, expectedlen=expectedlen, **kwargs)
        else:
            return carray(x, rootdir=uri, expectedlen=expectedlen, **kwargs)
コード例 #8
0
ファイル: into.py プロジェクト: dalejung/blaze
def into(a, b, **kwargs):
    c = compute(b)
    if isinstance(c, (list, tuple, Iterator)):
        kwargs['types'] = [datashape.to_numpy_dtype(t) for t in
                b.schema[0].types]
        kwargs['names'] = b.columns
    return into(a, c, **kwargs)
コード例 #9
0
ファイル: numpy.py プロジェクト: NeilBryant/blaze
def compute_up(t, x, **kwargs):
    # can't use the method here, as they aren't Python functions
    reducer = getattr(np, t.symbol)
    if 'dtype' in keywords(reducer):
        return reducer(x, axis=t.axis, keepdims=t.keepdims,
                       dtype=to_numpy_dtype(t.schema))
    return reducer(x, axis=t.axis, keepdims=t.keepdims)
コード例 #10
0
def PyTables(path, datapath, dshape=None, **kwargs):
    """Create or open a ``tables.Table`` object.

    Parameters
    ----------
    path : str
        Path to a PyTables HDF5 file.
    datapath : str
        The name of the node in the ``tables.File``.
    dshape : str or datashape.DataShape
        DataShape to use to create the ``Table``.

    Returns
    -------
    t : tables.Table

    Examples
    --------
    >>> from blaze.utils import tmpfile
    >>> # create from scratch
    >>> with tmpfile('.h5') as f:
    ...     t = PyTables(filename, '/bar',
    ...                  dshape='var * {volume: float64, planet: string[10, "A"]}')
    ...     data = [(100.3, 'mars'), (100.42, 'jupyter')]
    ...     t.append(data)
    ...     t[:]  # doctest: +SKIP
    ...
    array([(100.3, b'mars'), (100.42, b'jupyter')],
          dtype=[('volume', '<f8'), ('planet', 'S10')])
    """
    def possibly_create_table(filename, dtype):
        f = tb.open_file(filename, mode='a')
        try:
            if datapath not in f:
                if dtype is None:
                    raise ValueError('dshape cannot be None and datapath not'
                                     ' in file')
                else:
                    f.create_table('/',
                                   datapath.lstrip('/'),
                                   description=dtype)
        finally:
            f.close()

    if dshape:
        if isinstance(dshape, str):
            dshape = datashape.dshape(dshape)
        if dshape[0] == datashape.var:
            dshape = dshape.subshape[0]
        dtype = dtype_to_pytables(datashape.to_numpy_dtype(dshape))
    else:
        dtype = None

    if os.path.exists(path):
        possibly_create_table(path, dtype)
    else:
        with tmpfile('.h5') as filename:
            possibly_create_table(filename, dtype)
            shutil.copyfile(filename, path)
    return tb.open_file(path, mode='a').get_node(datapath)
コード例 #11
0
 def test_uints(self):
     types = ['uint8', 'uint16', 'uint32', 'uint64']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #12
0
def into(a, b, **kwargs):
    c = compute(b)
    if isinstance(c, (list, tuple, Iterator)):
        kwargs['types'] = [
            datashape.to_numpy_dtype(t) for t in b.schema[0].types
        ]
        kwargs['names'] = b.columns
    return into(a, c, **kwargs)
コード例 #13
0
ファイル: pandas.py プロジェクト: jai2033shankar/blaze
def compute_up(expr, data, **kwargs):
    measure = expr.to.measure
    if measure in {datashape.string, datashape.Option(datashape.string)}:
        return data.astype(str)
    elif measure in {datashape.datetime_,
                     datashape.Option(datashape.datetime_)}:
        return data.astype(np.datetime64)
    return data.astype(to_numpy_dtype(expr.schema))
コード例 #14
0
ファイル: numpy.py プロジェクト: hkkrishna20/blaze
def compute_up(t, x, **kwargs):
    result_dtype = to_numpy_dtype(t.dshape)
    if issubclass(x.dtype.type, (np.floating, np.object_)):
        return pd.notnull(x).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype)
    elif issubclass(x.dtype.type, np.datetime64):
        return (x.view("int64") != inat).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype)
    else:
        return np.ones(x.shape, dtype=result_dtype).sum(keepdims=t.keepdims, axis=t.axis, dtype=result_dtype)
コード例 #15
0
 def test_complex(self):
     types = ['complex64', 'complex128']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         # dd_as_py does not support complexes yet..
         self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #16
0
ファイル: test_types.py プロジェクト: mwiebe/blaze
 def test_complex(self):
     types = ['complex64', 'complex128']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         # dd_as_py does not support complexes yet..
         self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #17
0
ファイル: into.py プロジェクト: dalejung/blaze
def into(a, b, **kwargs):
    names = dshape(nd.dshape_of(b))[1].names
    columns = [getattr(b, name) for name in names]
    columns = [np.asarray(nd.as_py(c))
            if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O')
            else into(np.ndarray(0), c) for c in columns]

    return bcolz.ctable(columns, names=names, **kwargs)
コード例 #18
0
ファイル: into.py プロジェクト: leolujuyi/blaze
def into(a, df, **kwargs):
    x = df.to_records(index=False)
    if 'dshape' in kwargs:
        ds = dshape(kwargs['dshape']).measure
        dt = to_numpy_dtype(ds)
        if x.dtype != dt:
            x = x.astype(dt)
    return x
コード例 #19
0
ファイル: pandas.py プロジェクト: glasnoster/blaze
def compute_up(expr, data, **kwargs):
    if datashape.dshape(expr.to) in {datashape.string,
                                     datashape.Option(datashape.string)}:
        return data.astype(str)
    elif datashape.dshape(expr.to) in {datashape.datetime_,
                                       datashape.Option(datashape.datetime_)}:
        return data.astype(np.datetime64)
    return data.astype(to_numpy_dtype(expr.schema))
コード例 #20
0
ファイル: pytables.py プロジェクト: leolujuyi/blaze
def PyTables(path, datapath, dshape=None, **kwargs):
    """Create or open a ``tables.Table`` object.

    Parameters
    ----------
    path : str
        Path to a PyTables HDF5 file.
    datapath : str
        The name of the node in the ``tables.File``.
    dshape : str or datashape.DataShape
        DataShape to use to create the ``Table``.

    Returns
    -------
    t : tables.Table

    Examples
    --------
    >>> from blaze.utils import tmpfile
    >>> # create from scratch
    >>> with tmpfile('.h5') as f:
    ...     t = PyTables(filename, '/bar',
    ...                  dshape='var * {volume: float64, planet: string[10, "A"]}')
    ...     data = [(100.3, 'mars'), (100.42, 'jupyter')]
    ...     t.append(data)
    ...     t[:]  # doctest: +SKIP
    ...
    array([(100.3, b'mars'), (100.42, b'jupyter')],
          dtype=[('volume', '<f8'), ('planet', 'S10')])
    """
    def possibly_create_table(filename, dtype):
        f = tb.open_file(filename, mode='a')
        try:
            if datapath not in f:
                if dtype is None:
                    raise ValueError('dshape cannot be None and datapath not'
                                     ' in file')
                else:
                    f.create_table('/', datapath.lstrip('/'), description=dtype)
        finally:
            f.close()

    if dshape:
        if isinstance(dshape, str):
            dshape = datashape.dshape(dshape)
        if dshape[0] == datashape.var:
            dshape = dshape.subshape[0]
        dtype = dtype_to_pytables(datashape.to_numpy_dtype(dshape))
    else:
        dtype = None

    if os.path.exists(path):
        possibly_create_table(path, dtype)
    else:
        with tmpfile('.h5') as filename:
            possibly_create_table(filename, dtype)
            shutil.copyfile(filename, path)
    return tb.open_file(path, mode='a').get_node(datapath)
コード例 #21
0
ファイル: promote.py プロジェクト: pskyp/shareapplication
def promote(lhs, rhs, promote_option=True):
    """Promote two scalar dshapes to a possibly larger, but compatible type.

    Examples
    --------
    >>> from datashape import int32, int64, Option, string
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    Option(ty=ctype("int64"))
    >>> promote(int64, int64)
    ctype("int64")

    Don't promote to option types.
    >>> promote(x, y, promote_option=False)
    ctype("int64")

    Strings are handled differently than NumPy, which promotes to ctype("object")
    >>> x = string
    >>> y = Option(string)
    >>> promote(x, y) == promote(y, x) == Option(string)
    True
    >>> promote(x, y, promote_option=False)
    ctype("string")

    Notes
    ----
    Except for ``datashape.string`` types, this uses ``numpy.result_type`` for
    type promotion logic.  See the numpy documentation at:

    http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html
    """
    if lhs == rhs:
        return lhs
    left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs)
    if left == right == datashape.string:
        # Special case string promotion, since numpy promotes to `object`.
        dtype = datashape.string
    else:
        np_res_type = np.result_type(datashape.to_numpy_dtype(left),
                                     datashape.to_numpy_dtype(right))
        dtype = datashape.CType.from_numpy_dtype(np_res_type)
    if promote_option:
        dtype = optionify(lhs, rhs, dtype)
    return dtype
コード例 #22
0
ファイル: test_types.py プロジェクト: imclab/blaze
 def test_floats(self):
     types = ['float16', 'float32', 'float64']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         if type_ != 'float16':
             # dd_as_py does not support this yet
             self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #23
0
def into(a, b, **kwargs):
    names = dshape(nd.dshape_of(b))[1].names
    columns = [getattr(b, name) for name in names]
    columns = [
        np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c)))
        == np.dtype('O') else into(np.ndarray(0), c) for c in columns
    ]

    return bcolz.ctable(columns, names=names, **kwargs)
コード例 #24
0
 def test_floats(self):
     types = ['float16', 'float32', 'float64']
     for type_ in types:
         a = blaze.array(np.arange(3), dshape=type_)
         dtype = to_numpy_dtype(a.dshape)
         self.assertEqual(dtype, np.dtype(type_))
         if type_ != 'float16':
             # dd_as_py does not support this yet
             self.assertEqual(dd_as_py(a._data), [0, 1, 2])
コード例 #25
0
ファイル: promote.py プロジェクト: quantopian/datashape
def promote(lhs, rhs, promote_option=True):
    """Promote two scalar dshapes to a possibly larger, but compatible type.

    Examples
    --------
    >>> from datashape import int32, int64, Option, string
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    Option(ty=ctype("int64"))
    >>> promote(int64, int64)
    ctype("int64")

    Don't promote to option types.
    >>> promote(x, y, promote_option=False)
    ctype("int64")

    Strings are handled differently than NumPy, which promotes to ctype("object")
    >>> x = string
    >>> y = Option(string)
    >>> promote(x, y) == promote(y, x) == Option(string)
    True
    >>> promote(x, y, promote_option=False)
    ctype("string")

    Notes
    ----
    Except for ``datashape.string`` types, this uses ``numpy.result_type`` for
    type promotion logic.  See the numpy documentation at:

    http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html
    """
    if lhs == rhs:
        return lhs
    left, right = getattr(lhs, "ty", lhs), getattr(rhs, "ty", rhs)
    if left == right == datashape.string:
        # Special case string promotion, since numpy promotes to `object`.
        dtype = datashape.string
    else:
        np_res_type = np.result_type(datashape.to_numpy_dtype(left), datashape.to_numpy_dtype(right))
        dtype = datashape.CType.from_numpy_dtype(np_res_type)
    if promote_option:
        dtype = optionify(lhs, rhs, dtype)
    return dtype
コード例 #26
0
ファイル: bcolz.py プロジェクト: leolujuyi/blaze
def resource_bcolz(rootdir, **kwargs):
    if os.path.exists(rootdir):
        kwargs = keyfilter(carray_keywords.__contains__, kwargs)
        return ctable(rootdir=rootdir, **kwargs)
    else:
        if 'dshape' in kwargs:
            dtype = to_numpy_dtype(kwargs['dshape'])
            kwargs = keyfilter(carray_keywords.__contains__, kwargs)
            return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs)
        else:
            raise ValueError("File does not exist and no `dshape=` given")
コード例 #27
0
def resource_bcolz(rootdir, **kwargs):
    if os.path.exists(rootdir):
        kwargs = keyfilter(keywords(ctable).__contains__, kwargs)
        return ctable(rootdir=rootdir, **kwargs)
    else:
        if 'dshape' in kwargs:
            dtype = to_numpy_dtype(kwargs['dshape'])
            kwargs = keyfilter(keywords(ctable).__contains__, kwargs)
            return ctable(np.empty(0, dtype), rootdir=rootdir, **kwargs)
        else:
            raise ValueError("File does not exist and no `dshape=` given")
コード例 #28
0
def compute_up(t, x, **kwargs):
    result_dtype = to_numpy_dtype(t.dshape)
    if issubclass(x.dtype.type, (np.floating, np.object_)):
        return pd.notnull(x).sum(keepdims=t.keepdims, axis=t.axis,
                                 dtype=result_dtype)
    elif issubclass(x.dtype.type, np.datetime64):
        return (x.view('int64') != inat).sum(keepdims=t.keepdims, axis=t.axis,
                                             dtype=result_dtype)
    else:
        return np.ones(x.shape, dtype=result_dtype).sum(keepdims=t.keepdims,
                                                        axis=t.axis,
                                                        dtype=result_dtype)
コード例 #29
0
def series_to_array(s, dshape=None, **kwargs):
    dtype = datashape.to_numpy_dtype(datashape.dshape(dshape))
    sdtype = s.dtype
    values = s.values

    # don't lose precision of datetime64 more precise than microseconds
    if ((issubclass(sdtype.type, np.datetime64) and
            np.datetime_data(sdtype)[0] in higher_precision_freqs)
            or s.dtype == dtype):
        return values
    try:
        return values.astype(dtype)
    except ValueError:  # object series and record dshape, e.g., a frame row
        return values
コード例 #30
0
    def __init__(self,
                 path,
                 datapath,
                 mode='r',
                 schema=None,
                 dshape=None,
                 **kwargs):
        self.path = path
        self.datapath = datapath
        self.mode = mode

        if schema and not dshape:
            dshape = 'var * ' + str(schema)

        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        if dshape:
            dshape = datashape.dshape(dshape)
            shape = dshape.shape
            dtype = datashape.to_numpy_dtype(dshape[-1])
            if shape[0] == datashape.Var():
                kwargs['chunks'] = True
                kwargs['maxshape'] = kwargs.get('maxshape',
                                                (None, ) + shape[1:])
                shape = (0, ) + tuple(map(int, shape[1:]))

        with h5py.File(path, mode) as f:
            dset = f.get(datapath)
            if dset is None:
                if dshape is None:
                    raise ValueError('No dataset or dshape provided')
                else:
                    f.create_dataset(datapath, shape, dtype=dtype, **kwargs)
            else:
                dshape2 = datashape.from_numpy(dset.shape, dset.dtype)
                dshape = dshape2
                # TODO: test provided dshape against given dshape
                # if dshape and dshape != dshape2:
                #     raise ValueError('Inconsistent datashapes.'
                #             '\nGiven: %s\nFound: %s' % (dshape, dshape2))

        attributes = self.attributes()
        if attributes['chunks']:
            # is there a better way to do this?
            words = str(dshape).split(' * ')
            dshape = 'var * ' + ' * '.join(words[1:])
            dshape = datashape.dshape(dshape)

        self._dshape = dshape
        self._schema = schema
コード例 #31
0
ファイル: h5py.py プロジェクト: leolujuyi/blaze
def dataset_from_dshape(file, datapath, ds, **kwargs):
    dtype = varlen_dtype(to_numpy_dtype(ds))
    if datashape.var not in list(ds):
        shape = to_numpy(ds)[0]
    elif len(ds.shape) == 1:
        shape = (0,)
    else:
        raise ValueError("Don't know how to handle varlen nd shapes")

    if shape:
        kwargs['chunks'] = kwargs.get('chunks', True)
        kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:])

    kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs)
    return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
コード例 #32
0
ファイル: promote.py プロジェクト: nevermindewe/datashape
def promote(lhs, rhs):
    """Promote two scalar dshapes to a possibly larger, but compatibile type



    Examples
    --------
    >>> from datashape import int32, int64, Option
    >>> x = Option(int32)
    >>> y = int64
    >>> promote(x, y)
    ?int64

    Notes
    ----
    This uses ``numpy.promote_types`` for type promotion logic.  See the numpy
    documentation at
    http://docs.scipy.org/doc/numpy/reference/generated/numpy.promote_types.html
    """
    left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs)
    dtype = np.promote_types(datashape.to_numpy_dtype(left),
                             datashape.to_numpy_dtype(right))
    dshape = datashape.from_numpy((), dtype)
    return optionify(lhs, rhs, dshape)
コード例 #33
0
ファイル: h5py.py プロジェクト: quasiben/odo
def dataset_from_dshape(file, datapath, ds, **kwargs):
    dtype = varlen_dtype(to_numpy_dtype(ds))

    if datashape.var not in list(ds):
        shape = to_numpy(ds)[0]
    elif datashape.var not in list(ds)[1:]:
        shape = (0, ) + to_numpy(ds.subshape[0])[0]
    else:
        raise ValueError("Don't know how to handle varlen nd shapes")

    if shape:
        kwargs['chunks'] = kwargs.get('chunks', True)
        kwargs['maxshape'] = kwargs.get('maxshape', (None, ) + shape[1:])

    kwargs2 = keyfilter(h5py_attributes.__contains__, kwargs)
    return file.require_dataset(datapath, shape=shape, dtype=dtype, **kwargs2)
コード例 #34
0
ファイル: bcolz.py プロジェクト: jreback/into
def resource_bcolz(uri, dshape=None, **kwargs):
    if os.path.exists(uri):
        return ctable(rootdir=uri)
    else:
        if not dshape:
            raise ValueError("Must specify either existing bcolz directory or"
                    "valid datashape")
        dshape = datashape.dshape(dshape)

        dt = datashape.to_numpy_dtype(dshape)
        x = np.empty(shape=(0,), dtype=dt)

        if datashape.predicates.isrecord(dshape.measure):
            return ctable(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs))
        else:
            return carray(x, rootdir=uri, **keyfilter(keywords.__contains__, kwargs))
コード例 #35
0
ファイル: python.py プロジェクト: jai2033shankar/blaze
def compute_up(expr, ob, **kwargs):
    tp = expr.to
    shape = tp.shape
    if shape:
        raise TypeError(
            'cannot convert scalar object %r to array or matrix shape %r' % (
                ob,
                shape,
            ), )

    measure = tp.measure
    if isinstance(measure, Option):
        if pd.isnull(ob):
            return None
        measure = measure.ty
    dtype = to_numpy_dtype(measure)
    return dtype.type(ob)
コード例 #36
0
ファイル: python.py プロジェクト: blaze/blaze
def compute_up(expr, ob, **kwargs):
    tp = expr.to
    shape = tp.shape
    if shape:
        raise TypeError(
            'cannot convert scalar object %r to array or matrix shape %r' % (
                ob,
                shape,
            ),
        )

    measure = tp.measure
    if isinstance(measure, Option):
        if pd.isnull(ob):
            return None
        measure = measure.ty
    dtype = to_numpy_dtype(measure)
    return dtype.type(ob)
コード例 #37
0
    def __init__(self, path, datapath, mode='r', schema=None, dshape=None, **kwargs):
        self.path = path
        self.datapath = datapath
        self.mode = mode

        if schema and not dshape:
            dshape = 'var * ' + str(schema)

        # TODO: provide sane defaults for kwargs
        # Notably chunks and maxshape
        if dshape:
            dshape = datashape.dshape(dshape)
            shape = dshape.shape
            dtype = datashape.to_numpy_dtype(dshape[-1])
            if shape[0] == datashape.Var():
                kwargs['chunks'] = True
                kwargs['maxshape'] = kwargs.get('maxshape', (None,) + shape[1:])
                shape = (0,) + tuple(map(int, shape[1:]))

        with h5py.File(path, mode) as f:
            dset = f.get(datapath)
            if dset is None:
                if dshape is None:
                    raise ValueError('No dataset or dshape provided')
                else:
                    f.create_dataset(datapath, shape, dtype=dtype, **kwargs)
            else:
                dshape2 = datashape.from_numpy(dset.shape, dset.dtype)
                dshape = dshape2
                # TODO: test provided dshape against given dshape
                # if dshape and dshape != dshape2:
                #     raise ValueError('Inconsistent datashapes.'
                #             '\nGiven: %s\nFound: %s' % (dshape, dshape2))

        attributes = self.attributes()
        if attributes['chunks']:
            # is there a better way to do this?
            words = str(dshape).split(' * ')
            dshape = 'var * ' + ' * '.join(words[1:])
            dshape = datashape.dshape(dshape)

        self._dshape = dshape
        self._schema = schema
コード例 #38
0
def unit_to_dtype(ds):
    """ Convert a datashape Unit instance into a numpy dtype

    Parameters
    ----------
    ds : DataShape
        The DataShape instance to convert

    Returns
    -------
    np.dtype

    Examples
    --------
    >>> unit_to_dtype('int32')
    dtype('int32')
    >>> unit_to_dtype('float64')
    dtype('float64')
    >>> unit_to_dtype('?int64')
    dtype('float64')
    >>> unit_to_dtype('string')
    dtype('O')
    >>> unit_to_dtype('?datetime')
    dtype('<M8[us]')
    """
    if isinstance(ds, str):
        ds = dshape(ds)
    if isinstance(ds, DataShape):
        ds = ds.measure
    if isinstance(ds, Option) and isscalar(ds) and isnumeric(ds):
        if isinstance(ds.ty, Decimal):
            str_np_dtype = str(ds.ty.to_numpy_dtype()).replace('int', 'float')
            if str_np_dtype == 'float8':  # not a valid dtype, so increase
                str_np_dtype = 'float16'
            return unit_to_dtype(str_np_dtype)
        return unit_to_dtype(str(ds).replace('int', 'float').replace('?', ''))
    if isinstance(ds, Option) and isinstance(
        ds.ty, (Date, DateTime, String, TimeDelta)
    ):
        ds = ds.ty
    if ds == string:
        return np.dtype('O')
    return to_numpy_dtype(ds)
コード例 #39
0
ファイル: into.py プロジェクト: leolujuyi/blaze
def into(a, b, **kwargs):
    b = iter(b)
    first = next(b)
    b = toolz.concat([[first], b])
    if isinstance(first, datetime):
        b = map(np.datetime64, b)
    if isinstance(first, (list, tuple)):
        if 'dtype' in kwargs:
            dtype = kwargs.pop('dtype')
        elif 'dshape' in kwargs:
            dtype = to_numpy_dtype(dshape(kwargs.pop('dshape')))
        else:
            dtype = dtype_from_tuple(first)
        return np.rec.fromrecords([tuple(x) for x in b],
                                  dtype=dtype,
                                  **kwargs)
    elif hasattr(first, 'values'):
        #detecting sqlalchemy.engine.result.RowProxy types and similar
        return np.asarray([tuple(x.values()) for x in b], **kwargs)
    else:
        return np.asarray(list(b), **kwargs)
コード例 #40
0
ファイル: numpy.py プロジェクト: blaze/blaze
def compute_up(expr, data, **kwargs):
    return data.astype(to_numpy_dtype(expr.schema))
コード例 #41
0
ファイル: test_coretypes.py プロジェクト: aterrel/datashape
 def test_string(self):
     self.assertEqual(to_numpy_dtype(dshape('2 * string')), np.dtype('O'))
コード例 #42
0
ファイル: table.py プロジェクト: holdenk/blaze
def into(a, b):
    if b.iscolumn:
        return into(np.ndarray(0), compute(b),
                dtype=to_numpy_dtype(b.schema[0].types[0]))
    else:
        return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(b.schema))
コード例 #43
0
ファイル: constructors.py プロジェクト: aaronmartin0303/blaze
def array(obj, dshape=None, caps={'efficient-write': True},
          storage=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for storing the data.

    Returns
    -------
    out : a concrete blaze array.

    Bugs
    ----
    Right now the explicit dshape is ignored. This needs to be
    corrected. When the data cannot be coerced to an explicit dshape
    an exception should be raised.

    """
    dshape = _normalize_dshape(dshape)

    storage = _storage_convert(storage)

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, IDataDescriptor):
        # TODO: Validate the 'caps', convert to another kind
        #       of data descriptor if necessary
        # Note by Francesc: but if it is already an IDataDescriptor I wonder
        # if `caps` should be ignored.  Hmm, probably not...
        #
        # Note by Oscar: Maybe we shouldn't accept a datadescriptor at
        #   all at this level. If you've got a DataDescriptor you are
        #   playing with internal datastructures anyways, go to the
        #   Array constructor directly. If you want to transform to
        #   another datadescriptor... convert it yourself (you are
        #   playing with internal datastructures, remember? you should
        #   be able to do it in your own.
        dd = obj
    elif storage is not None:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(blz.barray(obj, dtype=dt, count=-1,
                                              rootdir=storage.path))
        else:
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, rootdir=storage.path))
    elif 'efficient-write' in caps and caps['efficient-write'] is True:
        # In-Memory array
        if dshape is None:
            dd = DyNDDataDescriptor(nd.asarray(obj, access='rw'))
        else:
            # Use the uniform/full dtype specification in dynd depending
            # on whether the datashape has a uniform dim
            dt = ndt.type(str(dshape))
            if dt.ndim > 0:
                dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw'))
            else:
                dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw'))
    elif 'compress' in caps and caps['compress'] is True:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        # BLZ provides compression
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1))
        else:
            dd = BLZDataDescriptor(blz.barray(obj, dtype=dt))

    elif isinstance(obj, np.ndarray):
        dd = DyNDDataDescriptor(nd.view(obj))
    elif isinstance(obj, nd.array):
        dd = DyNDDataDescriptor(obj)
    elif isinstance(obj, blz.barray):
        dd = BLZDataDescriptor(obj)
    else:
        raise TypeError(('Failed to construct blaze array from '
                        'object of type %r') % type(obj))
    return Array(dd)
コード例 #44
0
def into(a, b, **kwargs):
    schema = dshape(str(b.schema).replace('?', ''))
    return into(np.ndarray(0), compute(b), dtype=to_numpy_dtype(schema))
コード例 #45
0
 def test_timedelta(self):
     assert to_numpy_dtype(dshape('2 * timedelta')) == np.dtype('m8[us]')
     assert to_numpy_dtype(dshape("2 * timedelta[unit='s']")) == \
         np.dtype('m8[s]')
コード例 #46
0
 def test_string(self):
     assert to_numpy_dtype(dshape('2 * string')) == np.dtype('O')
コード例 #47
0
 def test_date(self):
     assert to_numpy_dtype(dshape('2 * date')) == np.dtype('M8[D]')
コード例 #48
0
 def test_decimal(self):
     assert to_numpy_dtype(dshape('decimal[18,0]')) == np.int64
     assert to_numpy_dtype(dshape('decimal[7,2]')) == np.float64
     assert to_numpy_dtype(dshape('decimal[4]')) == np.int16
     with pytest.raises(TypeError):
         to_numpy_dtype(dshape('decimal[21]'))
コード例 #49
0
ファイル: test_coretypes.py プロジェクト: aterrel/datashape
 def test_simple(self):
     self.assertEqual(to_numpy_dtype(dshape('2 * int32')), np.int32)
     self.assertEqual(to_numpy_dtype(dshape('2 * {x: int32, y: int32}')),
                      np.dtype([('x', '<i4'), ('y', '<i4')]))
コード例 #50
0
ファイル: test_coretypes.py プロジェクト: aterrel/datashape
 def test_date(self):
     self.assertEqual(to_numpy_dtype(dshape('2 * date')), np.dtype('M8[D]'))
コード例 #51
0
ファイル: constructors.py プロジェクト: xsixing/blaze
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None):
    """Create a Blaze array.

    Parameters
    ----------
    obj : array_like
        Initial contents for the array.

    dshape : datashape
        The datashape for the resulting array. By default the
        datashape will be inferred from data. If an explicit dshape is
        provided, the input data will be coerced into the provided
        dshape.

    caps : capabilities dictionary
        A dictionary containing the desired capabilities of the array.

    storage : Storage instance
        A Storage object with the necessary info for storing the data.

    Returns
    -------
    out : a concrete blaze array.

    Bugs
    ----
    Right now the explicit dshape is ignored. This needs to be
    corrected. When the data cannot be coerced to an explicit dshape
    an exception should be raised.

    """
    dshape = _normalize_dshape(dshape)

    storage = _storage_convert(storage)

    if isinstance(obj, Array):
        return obj
    elif isinstance(obj, IDataDescriptor):
        # TODO: Validate the 'caps', convert to another kind
        #       of data descriptor if necessary
        # Note by Francesc: but if it is already an IDataDescriptor I wonder
        # if `caps` should be ignored.  Hmm, probably not...
        #
        # Note by Oscar: Maybe we shouldn't accept a datadescriptor at
        #   all at this level. If you've got a DataDescriptor you are
        #   playing with internal datastructures anyways, go to the
        #   Array constructor directly. If you want to transform to
        #   another datadescriptor... convert it yourself (you are
        #   playing with internal datastructures, remember? you should
        #   be able to do it in your own.
        dd = obj
    elif storage is not None:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path))
        else:
            dd = BLZDataDescriptor(
                blz.barray(obj, dtype=dt, rootdir=storage.path))
    elif 'efficient-write' in caps and caps['efficient-write'] is True:
        # In-Memory array
        if dshape is None:
            dd = DyNDDataDescriptor(nd.asarray(obj, access='rw'))
        else:
            # Use the uniform/full dtype specification in dynd depending
            # on whether the datashape has a uniform dim
            dt = ndt.type(str(dshape))
            if dt.ndim > 0:
                dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw'))
            else:
                dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw'))
    elif 'compress' in caps and caps['compress'] is True:
        dt = None if dshape is None else to_numpy_dtype(dshape)
        # BLZ provides compression
        if inspect.isgenerator(obj):
            # TODO: Generator logic can go inside barray
            dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1))
        else:
            dd = BLZDataDescriptor(blz.barray(obj, dtype=dt))

    elif isinstance(obj, np.ndarray):
        dd = DyNDDataDescriptor(nd.view(obj))
    elif isinstance(obj, nd.array):
        dd = DyNDDataDescriptor(obj)
    elif isinstance(obj, blz.barray):
        dd = BLZDataDescriptor(obj)
    else:
        raise TypeError(('Failed to construct blaze array from '
                         'object of type %r') % type(obj))
    return Array(dd)
コード例 #52
0
 def test_date(self):
     assert to_numpy_dtype(dshape('2 * date')) == np.dtype('M8[D]')
コード例 #53
0
 def test_dimensions(self):
     return to_numpy_dtype(dshape('var * int32')) == np.int32
コード例 #54
0
 def test_timedelta(self):
     assert to_numpy_dtype(dshape('2 * timedelta')) == np.dtype('m8[us]')
     assert to_numpy_dtype(dshape("2 * timedelta[unit='s']")) == \
         np.dtype('m8[s]')
コード例 #55
0
 def test_dimensions(self):
     return to_numpy_dtype(dshape('var * int32')) == np.int32
コード例 #56
0
ファイル: pandas.py プロジェクト: nkhuyu/blaze
def compute_up(expr, data, **kwargs):
    return data.astype(to_numpy_dtype(expr.schema))
コード例 #57
0
 def test_string(self):
     assert to_numpy_dtype(dshape('2 * string')) == np.dtype('O')