Example #1
0
def construct_dataarray(dim_num, dtype, contains_nan, dask):
    # dimnum <= 3
    rng = np.random.RandomState(0)
    shapes = [16, 8, 4][:dim_num]
    dims = ('x', 'y', 'z')[:dim_num]

    if np.issubdtype(dtype, np.floating):
        array = rng.randn(*shapes).astype(dtype)
    elif np.issubdtype(dtype, np.integer):
        array = rng.randint(0, 10, size=shapes).astype(dtype)
    elif np.issubdtype(dtype, np.bool_):
        array = rng.randint(0, 1, size=shapes).astype(dtype)
    elif dtype == str:
        array = rng.choice(['a', 'b', 'c', 'd'], size=shapes)
    else:
        raise ValueError

    if contains_nan:
        inds = rng.choice(range(array.size), int(array.size * 0.2))
        dtype, fill_value = dtypes.maybe_promote(array.dtype)
        array = array.astype(dtype)
        array.flat[inds] = fill_value

    da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da')

    if dask and has_dask:
        chunks = {d: 4 for d in dims}
        da = da.chunk(chunks)

    return da
Example #2
0
def construct_dataarray(dim_num, dtype, contains_nan, dask):
    # dimnum <= 3
    rng = np.random.RandomState(0)
    shapes = [16, 8, 4][:dim_num]
    dims = ('x', 'y', 'z')[:dim_num]

    if np.issubdtype(dtype, np.floating):
        array = rng.randn(*shapes).astype(dtype)
    elif np.issubdtype(dtype, np.integer):
        array = rng.randint(0, 10, size=shapes).astype(dtype)
    elif np.issubdtype(dtype, np.bool_):
        array = rng.randint(0, 1, size=shapes).astype(dtype)
    elif dtype == str:
        array = rng.choice(['a', 'b', 'c', 'd'], size=shapes)
    else:
        raise ValueError

    if contains_nan:
        inds = rng.choice(range(array.size), int(array.size * 0.2))
        dtype, fill_value = dtypes.maybe_promote(array.dtype)
        array = array.astype(dtype)
        array.flat[inds] = fill_value

    da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da')

    if dask and has_dask:
        chunks = {d: 4 for d in dims}
        da = da.chunk(chunks)

    return da
Example #3
0
def resample2D(array, weights, Nx, xdim, Ntheta):
    """ Indexed of resampled particles
    (deterministic scheme) """
    code = \
    """
    for(int i = 0; i < Ntheta; i++)
    {
        int j = 0;
        float csw = weights(0, i);
        for(int k = 0; k < Nx; k++)
        {
            while(csw < u(i))
            {
                j++;
                csw += weights(j, i);
            }
            for (int l = 0; l < xdim; l++){
                newarray(k, l, i) = array(j, l, i);
            }
            u(i) = u(i) + 1.;
        }
    }
    """
    u = random.uniform(size = Ntheta, low = 0, high = 1).astype(float32)
    print u[0:10],"..."
    weights = double(weights * Nx / sum(weights, axis = 0))
    weights = weights.astype(float32)
    array = array.astype(float32)
    newarray = zeros_like(array).astype(float32)
    weave.inline(code,['u','Nx', 'Ntheta','weights', 'array', 'newarray', 'xdim'], type_converters=weave.converters.blitz)
    return newarray
Example #4
0
 def select_uint_dtype(array):
     """choose appropriate dtype based on values of an array"""
     max_val = np.max(array)
     for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
         if max_val <= dtype(-1):
             return array.astype(dtype)
     raise ValueError('value exceeds dynamic range of largest numpy type')
Example #5
0
def resample2D(array, weights, Nx, xdim, Ntheta):
    """ Indexed of resampled particles
    (deterministic scheme) """
    code = \
    """
    for(int i = 0; i < Ntheta; i++)
    {
        int j = 0;
        float csw = weights(0, i);
        for(int k = 0; k < Nx; k++)
        {
            while(csw < u(i))
            {
                j++;
                csw += weights(j, i);
            }
            for (int l = 0; l < xdim; l++){
                newarray(k, l, i) = array(j, l, i);
            }
            u(i) = u(i) + 1.;
        }
    }
    """
    u = random.uniform(size=Ntheta, low=0, high=1).astype(float32)
    print u[0:10], "..."
    weights = double(weights * Nx / sum(weights, axis=0))
    weights = weights.astype(float32)
    array = array.astype(float32)
    newarray = zeros_like(array).astype(float32)
    weave.inline(code,
                 ['u', 'Nx', 'Ntheta', 'weights', 'array', 'newarray', 'xdim'],
                 type_converters=weave.converters.blitz)
    return newarray
Example #6
0
def generate_palette(array, centroids):
    '''Given an array and a number of centroids, returns a list of colors.'''
    palette = []
    result = kmeans(array.astype(float), centroids)
    for centroid in numpy.array(result[0]).tolist():
        centroid = (int(centroid[0]), int(centroid[1]), int(centroid[2]))
        palette.append(tuple(centroid))
    return palette
Example #7
0
def as_labelarray(initial_dtype, missing_value, array):
    """
    Curried wrapper around LabelArray, that round-trips the input data through
    `initial_dtype` first.
    """
    return LabelArray(
        array.astype(initial_dtype),
        missing_value=initial_dtype.type(missing_value),
    )
Example #8
0
def as_labelarray(initial_dtype, missing_value, array):
    """
    Curried wrapper around LabelArray, that round-trips the input data through
    `initial_dtype` first.
    """
    return LabelArray(
        array.astype(initial_dtype),
        missing_value=initial_dtype.type(missing_value),
    )
Example #9
0
 def to_uint32(self, array, colname):
     arrmax = array.max()
     if colname in OHLC:
         self.check_uint_safe(arrmax * 1000, colname)
         return (array * 1000).astype(uint32)
     elif colname == 'volume':
         self.check_uint_safe(arrmax, colname)
         return array.astype(uint32)
     elif colname == 'day':
         nanos_per_second = (1000 * 1000 * 1000)
         self.check_uint_safe(arrmax.view(int) / nanos_per_second, colname)
         return (array.view(int) / nanos_per_second).astype(uint32)
Example #10
0
 def to_uint32(self, array, colname):
     arrmax = array.max()
     if colname in OHLC:
         self.check_uint_safe(arrmax * 1000, colname)
         return (array * 1000).astype(uint32)
     elif colname == 'volume':
         self.check_uint_safe(arrmax, colname)
         return array.astype(uint32)
     elif colname == 'day':
         nanos_per_second = (1000 * 1000 * 1000)
         self.check_uint_safe(arrmax.view(int) / nanos_per_second, colname)
         return (array.view(int) / nanos_per_second).astype(uint32)
Example #11
0
def raster():
    m, n = 21, 11
    array = (zeros((m, n)) + linspace(0, 1, m).reshape(-1, 1)**2 +
             linspace(0, 1, n).reshape(1, -1))
    print(array.shape)
    print(f"delta_x={1/(n - 1)}")
    print(f"delta_y={1/(m - 1)}")
    return Rasterdata(shape=(m, n),
                      x_min=0,
                      y_max=1,
                      delta_x=1 / (n - 1),
                      delta_y=1 / (m - 1),
                      array=array.astype(float32),
                      coordinate_system="+init=epsg:32633",
                      info={})
Example #12
0
    def array_hash(self, array):
        """Return a short string hash of an ndarray's contents.

        For simple types we can just hash array.tobytes() but this can give different
        results on repeat runs with object dtypes, or complex dtype containing objects
        (e.g. references). For these, first casting the array to bytes seems to work.
        For structured dtypes, we need to cast the individual entries to bytes separately.
        """
        if array.dtype.kind in ['O', 'V']:
            if array.dtype.fields:
                return self.value_hash(
                    hstack([
                        array[name].astype(bytes) for name in array.dtype.names
                    ]).tobytes())
            else:
                return self.value_hash(array.astype(bytes).tobytes())
        else:
            return self.value_hash(array.tobytes())
Example #13
0
def check_array(array, accept_sparse=None, dtype="numeric", order=None,
                copy=False, force_all_finite=True, ensure_2d=True,
                allow_nd=False, ensure_min_samples=1, ensure_min_features=1,
                warn_on_dtype=False, estimator=None):
    """Input validation on an array, list, sparse matrix or similar.

    By default, the input is converted to an at least 2nd numpy array.
    If the dtype of the array is object, attempt converting to float,
    raising on failure.

    Parameters
    ----------
    array : object
        Input object to check / convert.

    accept_sparse : string, list of string or None (default=None)
        String[s] representing allowed sparse matrix formats, such as 'csc',
        'csr', etc.  None means that sparse matrix input will raise an error.
        If the input is sparse but not in the allowed format, it will be
        converted to the first listed format.

    dtype : string, type, list of types or None (default="numeric")
        Data type of result. If None, the dtype of the input is preserved.
        If "numeric", dtype is preserved unless array.dtype is object.
        If dtype is a list of types, conversion on the first type is only
        performed if the dtype of the input is not in the list.

    order : 'F', 'C' or None (default=None)
        Whether an array will be forced to be fortran or c-style.

    copy : boolean (default=False)
        Whether a forced copy will be triggered. If copy=False, a copy might
        be triggered by a conversion.

    force_all_finite : boolean (default=True)
        Whether to raise an error on np.inf and np.nan in X.

    ensure_2d : boolean (default=True)
        Whether to make X at least 2d.

    allow_nd : boolean (default=False)
        Whether to allow X.ndim > 2.

    ensure_min_samples : int (default=1)
        Make sure that the array has a minimum number of samples in its first
        axis (rows for a 2D array). Setting to 0 disables this check.

    ensure_min_features : int (default=1)
        Make sure that the 2D array has some minimum number of features
        (columns). The default value of 1 rejects empty datasets.
        This check is only enforced when the input data has effectively 2
        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
        disables this check.

    warn_on_dtype : boolean (default=False)
        Raise DataConversionWarning if the dtype of the input data structure
        does not match the requested dtype, causing a memory copy.

    estimator : str or estimator instance (default=None)
        If passed, include the name of the estimator in warning messages.

    Returns
    -------
    X_converted : object
        The converted and validated X.
    """
    if isinstance(accept_sparse, str):
        accept_sparse = [accept_sparse]

    # store whether originally we wanted numeric dtype
    dtype_numeric = dtype == "numeric"

    dtype_orig = getattr(array, "dtype", None)
    if not hasattr(dtype_orig, 'kind'):
        # not a data type (e.g. a column named dtype in a pandas DataFrame)
        dtype_orig = None

    if dtype_numeric:
        if dtype_orig is not None and dtype_orig.kind == "O":
            # if input is object, convert to float.
            dtype = np.float64
        else:
            dtype = None

    if isinstance(dtype, (list, tuple)):
        if dtype_orig is not None and dtype_orig in dtype:
            # no dtype conversion required
            dtype = None
        else:
            # dtype conversion required. Let's select the first element of the
            # list of accepted types.
            dtype = dtype[0]

    if estimator is not None:
        if isinstance(estimator, string_types):
            estimator_name = estimator
        else:
            estimator_name = estimator.__class__.__name__
    else:
        estimator_name = "Estimator"
    context = " by %s" % estimator_name if estimator is not None else ""

    if sp.issparse(array):
        array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                      force_all_finite)
    else:
        array = np.array(array, dtype=dtype, order=order, copy=copy)

        if ensure_2d:
            if array.ndim == 1:
                if ensure_min_samples >= 2:
                    raise ValueError("%s expects at least 2 samples provided "
                                     "in a 2 dimensional array-like input"
                                     % estimator_name)
                warnings.warn(
                    "Passing 1d arrays as data is deprecated in 0.17 and will "
                    "raise ValueError in 0.19. Reshape your data either using "
                    "X.reshape(-1, 1) if your data has a single feature or "
                    "X.reshape(1, -1) if it contains a single sample.",
                    DeprecationWarning)
            array = np.atleast_2d(array)
            # To ensure that array flags are maintained
            array = np.array(array, dtype=dtype, order=order, copy=copy)

        # make sure we actually converted to numeric:
        if dtype_numeric and array.dtype.kind == "O":
            array = array.astype(np.float64)
        if not allow_nd and array.ndim >= 3:
            raise ValueError("Found array with dim %d. %s expected <= 2."
                             % (array.ndim, estimator_name))
        if force_all_finite:
            _assert_all_finite(array)

    shape_repr = _shape_repr(array.shape)
    if ensure_min_samples > 0:
        n_samples = _num_samples(array)
        if n_samples < ensure_min_samples:
            raise ValueError("Found array with %d sample(s) (shape=%s) while a"
                             " minimum of %d is required%s."
                             % (n_samples, shape_repr, ensure_min_samples,
                                context))

    if ensure_min_features > 0 and array.ndim == 2:
        n_features = array.shape[1]
        if n_features < ensure_min_features:
            raise ValueError("Found array with %d feature(s) (shape=%s) while"
                             " a minimum of %d is required%s."
                             % (n_features, shape_repr, ensure_min_features,
                                context))

    if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
        msg = ("Data with input dtype %s was converted to %s%s."
               % (dtype_orig, array.dtype, context))
        warnings.warn(msg, _DataConversionWarning)
    return array
Example #14
0
def check_array(array,
                accept_sparse=None,
                dtype="numeric",
                order=None,
                copy=False,
                force_all_finite=True,
                ensure_2d=True,
                allow_nd=False,
                ensure_min_samples=1,
                ensure_min_features=1,
                warn_on_dtype=False,
                estimator=None):
    """Input validation on an array, list, sparse matrix or similar.

    By default, the input is converted to an at least 2nd numpy array.
    If the dtype of the array is object, attempt converting to float,
    raising on failure.

    Parameters
    ----------
    array : object
        Input object to check / convert.

    accept_sparse : string, list of string or None (default=None)
        String[s] representing allowed sparse matrix formats, such as 'csc',
        'csr', etc.  None means that sparse matrix input will raise an error.
        If the input is sparse but not in the allowed format, it will be
        converted to the first listed format.

    dtype : string, type, list of types or None (default="numeric")
        Data type of result. If None, the dtype of the input is preserved.
        If "numeric", dtype is preserved unless array.dtype is object.
        If dtype is a list of types, conversion on the first type is only
        performed if the dtype of the input is not in the list.

    order : 'F', 'C' or None (default=None)
        Whether an array will be forced to be fortran or c-style.

    copy : boolean (default=False)
        Whether a forced copy will be triggered. If copy=False, a copy might
        be triggered by a conversion.

    force_all_finite : boolean (default=True)
        Whether to raise an error on np.inf and np.nan in X.

    ensure_2d : boolean (default=True)
        Whether to make X at least 2d.

    allow_nd : boolean (default=False)
        Whether to allow X.ndim > 2.

    ensure_min_samples : int (default=1)
        Make sure that the array has a minimum number of samples in its first
        axis (rows for a 2D array). Setting to 0 disables this check.

    ensure_min_features : int (default=1)
        Make sure that the 2D array has some minimum number of features
        (columns). The default value of 1 rejects empty datasets.
        This check is only enforced when the input data has effectively 2
        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
        disables this check.

    warn_on_dtype : boolean (default=False)
        Raise DataConversionWarning if the dtype of the input data structure
        does not match the requested dtype, causing a memory copy.

    estimator : str or estimator instance (default=None)
        If passed, include the name of the estimator in warning messages.

    Returns
    -------
    X_converted : object
        The converted and validated X.
    """
    if isinstance(accept_sparse, str):
        accept_sparse = [accept_sparse]

    # store whether originally we wanted numeric dtype
    dtype_numeric = dtype == "numeric"

    dtype_orig = getattr(array, "dtype", None)
    if not hasattr(dtype_orig, 'kind'):
        # not a data type (e.g. a column named dtype in a pandas DataFrame)
        dtype_orig = None

    if dtype_numeric:
        if dtype_orig is not None and dtype_orig.kind == "O":
            # if input is object, convert to float.
            dtype = np.float64
        else:
            dtype = None

    if isinstance(dtype, (list, tuple)):
        if dtype_orig is not None and dtype_orig in dtype:
            # no dtype conversion required
            dtype = None
        else:
            # dtype conversion required. Let's select the first element of the
            # list of accepted types.
            dtype = dtype[0]

    if estimator is not None:
        if isinstance(estimator, string_types):
            estimator_name = estimator
        else:
            estimator_name = estimator.__class__.__name__
    else:
        estimator_name = "Estimator"
    context = " by %s" % estimator_name if estimator is not None else ""

    if sp.issparse(array):
        array = _ensure_sparse_format(array, accept_sparse, dtype, copy,
                                      force_all_finite)
    else:
        array = np.array(array, dtype=dtype, order=order, copy=copy)

        if ensure_2d:
            if array.ndim == 1:
                if ensure_min_samples >= 2:
                    raise ValueError("%s expects at least 2 samples provided "
                                     "in a 2 dimensional array-like input" %
                                     estimator_name)
                warnings.warn(
                    "Passing 1d arrays as data is deprecated in 0.17 and will "
                    "raise ValueError in 0.19. Reshape your data either using "
                    "X.reshape(-1, 1) if your data has a single feature or "
                    "X.reshape(1, -1) if it contains a single sample.",
                    DeprecationWarning)
            array = np.atleast_2d(array)
            # To ensure that array flags are maintained
            array = np.array(array, dtype=dtype, order=order, copy=copy)

        # make sure we actually converted to numeric:
        if dtype_numeric and array.dtype.kind == "O":
            array = array.astype(np.float64)
        if not allow_nd and array.ndim >= 3:
            raise ValueError("Found array with dim %d. %s expected <= 2." %
                             (array.ndim, estimator_name))
        if force_all_finite:
            _assert_all_finite(array)

    shape_repr = _shape_repr(array.shape)
    if ensure_min_samples > 0:
        n_samples = _num_samples(array)
        if n_samples < ensure_min_samples:
            raise ValueError(
                "Found array with %d sample(s) (shape=%s) while a"
                " minimum of %d is required%s." %
                (n_samples, shape_repr, ensure_min_samples, context))

    if ensure_min_features > 0 and array.ndim == 2:
        n_features = array.shape[1]
        if n_features < ensure_min_features:
            raise ValueError(
                "Found array with %d feature(s) (shape=%s) while"
                " a minimum of %d is required%s." %
                (n_features, shape_repr, ensure_min_features, context))

    if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
        msg = ("Data with input dtype %s was converted to %s%s." %
               (dtype_orig, array.dtype, context))
        warnings.warn(msg, _DataConversionWarning)
    return array