Exemple #1
0
    def __init__(self, obj, dshape=None, index=None, metadata=None):
        self._datashape = dshape
        self._metadata  = NDTable._metaheader + (metadata or [])

        if isinstance(dshape, str):
            # run it through the parser
            dshape = _dshape(dshape)

        # Resolve the values
        # ------------------
        if isinstance(obj, Space):
            self.space = obj
            self.children = set(self.space.subspaces)
        else:
            spaces = injest_iterable(obj)
            self.space = Space(*spaces)
            self.children = set(self.space.subspaces)

        # Resolve the shape
        # -----------------
        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = CArraySource.infer_datashape(obj)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            if CArraySource.check_datashape(obj, given_dshape=dshape):
                self._datashape = dshape
            else:
                raise ValueError("Datashape is inconsistent with source")

        self._layout = None
Exemple #2
0
def open(uri=None, mode='a'):
    """Open a Blaze object via an `uri` (Uniform Resource Identifier).

    Parameters
    ----------
    uri : str
        Specifies the URI for the Blaze object.  It can be a regular file too.

    mode : the open mode (string)
        Specifies the mode in which the object is opened.  The supported
        values are:

          * 'r' for read-only
          * 'w' for emptying the previous underlying data
          * 'a' for allowing read/write on top of existing data

    Returns
    -------
    out : an Array or Table object.

    """
    ARRAY = 1
    TABLE = 2

    if uri is None:
        source = CArraySource()
    else:
        uri = urlparse(uri)

        if uri.scheme == 'carray':
            path = os.path.join(uri.netloc, uri.path[1:])
            parms = params(storage=path)
            source = CArraySource(params=parms)
            structure = ARRAY

        if uri.scheme == 'ctable':
            path = os.path.join(uri.netloc, uri.path[1:])
            parms = params(storage=path)
            source = CTableSource(params=parms)
            structure = TABLE

        elif uri.scheme == 'sqlite':
            path = os.path.join(uri.netloc, uri.path[1:])
            parms = params(storage=path or None)
            source = SqliteSource(params=parms)
            structure = TABLE

        else:
            # Default is to treat the URI as a regular path
            parms = params(storage=uri.path)
            source = CArraySource(params=parms)
            structure = ARRAY

    # Don't want a deferred array (yet)
    # return NDArray(source)
    if structure == ARRAY:
        return Array(source)
    elif structure == TABLE:
        return NDTable(source)
Exemple #3
0
    def __init__(self,
                 obj,
                 dshape=None,
                 metadata=None,
                 layout=None,
                 params=None):

        # Datashape
        # ---------

        if isinstance(dshape, basestring):
            dshape = _dshape(dshape)

        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = dshape = CArraySource.infer_datashape(obj)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            CArraySource.check_datashape(obj, given_dshape=dshape)
            self._datashape = dshape

        # Values
        # ------
        # Mimic NumPy behavior in that we have a variety of
        # possible arguments to the first argument which result
        # in different behavior for the values.

        if isinstance(obj, ByteProvider):
            self.data = obj
        else:
            self.data = CArraySource(obj, params=params)

        # children graph nodes
        self.children = []

        self.space = Space(self.data)

        # Layout
        # ------

        if layout:
            self._layout = layout
        elif not layout:
            self._layout = self.data.default_layout()

        # Metadata
        # --------

        self._metadata = NDArray._metaheader + (metadata or [])

        # Parameters
        # ----------
        self.params = params
Exemple #4
0
    def __init__(self, obj, dshape=None, metadata=None, layout=None,
            params=None):

        data = None

        # Values
        # ------
        # Mimic NumPy behavior in that we have a variety of
        # possible arguments to the first argument which result
        # in different behavior for the values.

        if isinstance(obj, list):
            data = obj

        #if isinstance(obj, datashape):
            #data = None

        # Datashape
        # ---------

        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = dshape = CArraySource.infer_datashape(obj)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            CArraySource.check_datashape(obj, given_dshape=dshape)
            self._datashape = dshape

        # children graph nodes
        self.children = []

        self.data = CArraySource(obj, params)
        self.space = Space(self.data)

        # Layout
        # ------

        if layout:
            self._layout = layout
        elif not layout:
            self._layout = ChunkedL(self.data, cdimension=0)

        # Metadata
        # --------

        self._metadata  = NDArray._metaheader + (metadata or [])

        # Parameters
        # ----------
        self.params = params
Exemple #5
0
def zeros(dshape, params=None):
    """ Create an Array and fill it with zeros.

    Parameters
    ----------
    dshape : str, blaze.dshape instance
        Specifies the datashape of the outcome object.
    params : blaze.params object
        Any parameter supported by the backend library.

    Returns
    -------
    out : an Array object.

    """
    if isinstance(dshape, basestring):
        dshape = _dshape(dshape)
    shape, dtype = to_numpy(dshape)
    cparams, rootdir, format_flavor = to_cparams(params or _params())
    if rootdir is not None:
        carray.zeros(shape, dtype, rootdir=rootdir, cparams=cparams)
        return open(rootdir)
    else:
        source = CArraySource(carray.zeros(shape, dtype, cparams=cparams),
                              params=params)
        return Array(source)
Exemple #6
0
def fromiter(iterable, dshape, params=None):
    """ Create an Array and fill it with values from `iterable`.

    Parameters
    ----------
    iterable : iterable object
        An iterable object providing data for the carray.
    dshape : str, blaze.dshape instance
        Specifies the datashape of the outcome object.  Only 1d shapes
        are supported right now. When the `iterator` should return an
        unknown number of items, a ``TypeVar`` can be used.
    params : blaze.params object
        Any parameter supported by the backend library.

    Returns
    -------
    out : an Array object.

    """
    if isinstance(dshape, basestring):
        dshape = _dshape(dshape)
    shape, dtype = dshape.parameters[:-1], dshape.parameters[-1]
    # Check the shape part
    if len(shape) > 1:
        raise ValueError("shape can be only 1-dimensional")
    length = shape[0]
    count = -1
    if type(length) == TypeVar:
        count = -1
    elif type(length) == Fixed:
        count = length.val

    dtype = dtype.to_dtype()
    # Now, create the Array itself (using the carray backend)
    cparams, rootdir, format_flavor = to_cparams(params or _params())
    if rootdir is not None:
        carray.fromiter(iterable,
                        dtype,
                        count=count,
                        rootdir=rootdir,
                        cparams=cparams)
        return open(rootdir)
    else:
        ica = carray.fromiter(iterable, dtype, count=count, cparams=cparams)
        source = CArraySource(ica, params=params)
        return Array(source)
Exemple #7
0
class Array(Indexable):
    """
    Manifest array, does not create a graph. Forces evaluation on every
    call.

    Parameters
    ----------

        obj : A list of byte providers, other NDTables or a Python object.

    Optional
    --------

        datashape : dshape
            Manual datashape specification for the table, if None then
            shape will be inferred if possible.
        metadata :
            Manual datashape specification for the table, if None then
            shape will be inferred if possible.

    Usage
    -----

        >>> Array([1,2,3])
        >>> Array([1,2,3], dshape='3, int32')
        >>> Array([1,2,3], dshape('3, int32'))
        >>> Array([1,2,3], params=params(clevel=3, storage='file'))

    """

    eclass = MANIFEST
    _metaheader = [
        md.manifest,
        md.arraylike,
    ]

    def __init__(self,
                 obj,
                 dshape=None,
                 metadata=None,
                 layout=None,
                 params=None):

        # Datashape
        # ---------

        if isinstance(dshape, basestring):
            dshape = _dshape(dshape)

        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = dshape = CArraySource.infer_datashape(obj)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            CArraySource.check_datashape(obj, given_dshape=dshape)
            self._datashape = dshape

        # Values
        # ------
        # Mimic NumPy behavior in that we have a variety of
        # possible arguments to the first argument which result
        # in different behavior for the values.

        if isinstance(obj, ByteProvider):
            self.data = obj
        else:
            self.data = CArraySource(obj, params=params)

        # children graph nodes
        self.children = []

        self.space = Space(self.data)

        # Layout
        # ------

        if layout:
            self._layout = layout
        elif not layout:
            self._layout = self.data.default_layout()

        # Metadata
        # --------

        self._metadata = NDArray._metaheader + (metadata or [])

        # Parameters
        # ----------
        self.params = params

    #------------------------------------------------------------------------
    # Properties
    #------------------------------------------------------------------------

    @property
    def datashape(self):
        """
        Type deconstructor
        """
        return self._datashape

    @property
    def size(self):
        """
        Size of the Array.
        """
        # TODO: need to generalize, not every Array will look
        # like Numpy
        return sum(i.val for i in self._datashape.parameters[:-1])

    @property
    def backends(self):
        """
        The storage backends that make up the space behind the
        Array.
        """
        return iter(self.space)

    #------------------------------------------------------------------------
    # Basic Slicing
    #------------------------------------------------------------------------

    # Immediete slicing
    def __getitem__(self, indexer):
        cc = self._layout.change_coordinates
        return retrieve(cc, indexer)

    # Immediete slicing ( Side-effectful )
    def __setitem__(self, indexer, value):
        cc = self._layout.change_coordinates
        write(cc, indexer, value)

    def __iter__(self):
        raise NotImplementedError

    def __eq__(self):
        raise NotImplementedError

    def __str__(self):
        return generic_str(self, deferred=False)

    def __repr__(self):
        return generic_repr('Array', self, deferred=False)
Exemple #8
0
class Array(Indexable):
    """
    Manifest array, does not create a graph. Forces evaluation on every
    call.

    Parameters
    ----------

        obj : A list of byte providers, other NDTables or a Python object.

    Optional
    --------

        datashape : dshape
            Manual datashape specification for the table, if None then
            shape will be inferred if possible.
        metadata :
            Manual datashape specification for the table, if None then
            shape will be inferred if possible.

    Usage
    -----

        >>> Array([1,2,3])
        >>> Array([1,2,3], dshape='3, int32')
        >>> Array([1,2,3], dshape('3, int32'))
        >>> Array([1,2,3], params=params(clevel=3, storage='file'))

    """

    eclass = MANIFEST
    _metaheader = [
        md.manifest,
        md.arraylike,
    ]

    def __init__(self, obj, dshape=None, metadata=None, layout=None,
            params=None):

        # Datashape
        # ---------

        if isinstance(dshape, basestring):
            dshape = _dshape(dshape)

        if not dshape:
            # The user just passed in a raw data source, try
            # and infer how it should be layed out or fail
            # back on dynamic types.
            self._datashape = dshape = CArraySource.infer_datashape(obj)
        else:
            # The user overlayed their custom dshape on this
            # data, check if it makes sense
            CArraySource.check_datashape(obj, given_dshape=dshape)
            self._datashape = dshape

        # Values
        # ------
        # Mimic NumPy behavior in that we have a variety of
        # possible arguments to the first argument which result
        # in different behavior for the values.

        if isinstance(obj, ByteProvider):
            self.data = obj
        else:
            self.data = CArraySource(obj, params=params)

        # children graph nodes
        self.children = []

        self.space = Space(self.data)

        # Layout
        # ------

        if layout:
            self._layout = layout
        elif not layout:
            self._layout = self.data.default_layout()

        # Metadata
        # --------

        self._metadata  = NDArray._metaheader + (metadata or [])

        # Parameters
        # ----------
        self.params = params


    #------------------------------------------------------------------------
    # Properties
    #------------------------------------------------------------------------

    @property
    def datashape(self):
        """
        Type deconstructor
        """
        return self._datashape

    @property
    def size(self):
        """
        Size of the Array.
        """
        # TODO: need to generalize, not every Array will look
        # like Numpy
        return sum(i.val for i in self._datashape.parameters[:-1])

    @property
    def backends(self):
        """
        The storage backends that make up the space behind the
        Array.
        """
        return iter(self.space)

    #------------------------------------------------------------------------
    # Basic Slicing
    #------------------------------------------------------------------------

    # Immediete slicing
    def __getitem__(self, indexer):
        cc = self._layout.change_coordinates
        return retrieve(cc, indexer)

    # Immediete slicing ( Side-effectful )
    def __setitem__(self, indexer, value):
        cc = self._layout.change_coordinates
        write(cc, indexer, value)

    def __iter__(self):
        raise NotImplementedError

    def __eq__(self):
        raise NotImplementedError

    def __str__(self):
        return generic_str(self, deferred=False)

    def __repr__(self):
        return generic_repr('Array', self, deferred=False)
Exemple #9
0
def open(uri, mode='a',  eclass=_eclass.manifest):
    """Open a Blaze object via an `uri` (Uniform Resource Identifier).

    Parameters
    ----------
    uri : str
        Specifies the URI for the Blaze object.  It can be a regular file too.
        The URL scheme indicates the storage type:

          * carray: Chunked array
          * ctable: Chunked table
          * sqlite: SQLite table (the URI 'sqlite://' creates in-memory table)

        If no URI scheme is given, carray is assumed.

    mode : the open mode (string)
        Specifies the mode in which the object is opened.  The supported
        values are:

          * 'r' for read-only
          * 'w' for emptying the previous underlying data
          * 'a' for allowing read/write on top of existing data

    Returns
    -------
    out : an Array or Table object.

    """
    ARRAY = 1
    TABLE = 2

    uri = urlparse(uri)
    path = uri.netloc + uri.path
    parms = params(storage=path)

    if uri.scheme == 'carray':
        source = CArraySource(params=parms)
        structure = ARRAY

    elif uri.scheme == 'ctable':
        source = CTableSource(params=parms)
        structure = TABLE

    elif uri.scheme == 'sqlite':
        # Empty path means memory storage
        parms = params(storage=path or None)
        source = SqliteSource(params=parms)
        structure = TABLE

    else:
        # Default is to treat the URI as a regular path
        parms = params(storage=path)
        source = CArraySource(params=parms)
        structure = ARRAY

    # Don't want a deferred array (yet)
    # return NDArray(source)
    if structure == ARRAY:

        if eclass is _eclass.manifest:
            return Array(source)
        elif eclass is _eclass.delayed:
            return NDArray(source)

    elif structure == TABLE:

        if eclass is _eclass.manifest:
            return Table(source)
        elif eclass is _eclass.delayed:
            return NDTable(source)