Exemple #1
0
def test_reductions_2D_int():
    x = np.arange(1, 122).reshape((11, 11)).astype('i4')
    a = da.from_array(x, chunks=(4, 4))

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #2
0
def test_reductions_2D_int():
    x = np.arange(1, 122).reshape((11, 11)).astype('i4')
    a = da.from_array(x, chunks=(4, 4))

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #3
0
def test_reductions_1D(dtype):
    x = np.arange(5).astype(dtype)
    a = da.from_array(x, chunks=(2,))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))

    assert eq(da.argmax(a, axis=0, split_every=2), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0, split_every=2), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0, split_every=2), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0, split_every=2), np.nanargmin(x, axis=0))
Exemple #4
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

    assert_eq(da.argmax(a), np.argmax(x))
    assert_eq(da.argmin(a), np.argmin(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a), np.nanargmax(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a), np.nanargmin(x))
    assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #5
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    reduction_2d_test(da.nanprod, a, nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    with pytest.warns(None):  # division by 0 warning
        reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    with pytest.warns(None):  # all NaN axis warning
        reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

    assert_eq(da.argmax(a), np.argmax(x))
    assert_eq(da.argmin(a), np.argmin(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a), np.nanargmax(x))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a), np.nanargmin(x))
    assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    with pytest.warns(None):  # all NaN axis warning
        assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #6
0
def test_reductions_1D_int():
    x = np.arange(5).astype('i4')
    a = da.from_array(x, chunks=(2, ))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
Exemple #7
0
def grid_search_model(data, model_meta, folds=5):
    """
    Perform Grid Search Cross Validation on the input model

    :param data: a pandas dataframe where each row is an hour
    :param model_meta: An dict containing the name for the model ("name"), the sklearn estimator ("model"),
                    and the parameters for Grid Search Cross Validation ("params")
    :param folds: The number of splits for cross validation
    :return: a tuple containing the best R^2 score found, the parameters used to obtain that score,
                and the estimator retrained on the whole dataset
    """
    model = model_meta["model"]
    model_params = model_meta["params"]
    model_name = model_meta["name"]

    X = data.drop("cnt", axis=1)
    y = data["cnt"]

    tscv=TimeSeriesSplit(n_splits=folds)

    grid_search = GridSearchCV(estimator=model, param_grid=model_params, scoring="r2", cv=tscv, refit=True)
    grid_search.fit(X, y)

    print("\tAverage result for best {}: {} +/- {:.5f}"
          .format(model_name,
                  grid_search.best_score_,
                  grid_search.cv_results_["std_test_score"][da.argmax(grid_search.cv_results_["mean_test_score"])]))

    print("\tBest parameters for {0}: {1}".format(model_name, grid_search.best_params_))

    # Need metrics to choose model, best estimator will have already been retrained on whole data set
    return grid_search.best_score_, grid_search.best_params_, grid_search.best_estimator_
Exemple #8
0
def max_and_argmax(data):
    """Returns max and argmax along last two axes.

    Last two axes should correspond to the x and y dimensions.

    Parameters
    ----------
    data : dask array
        data with at least 3 dimensions

    Returns
    -------
    weights : dask array
        max of `data` along the last two axes
    argmax : dask array
        argmax of `data` along the last two axes
    """
    # Slap out a dimension to nicely apply argmax and max
    flatData = data.reshape(data.shape[:-2] + (-1, ))
    argmax = da.argmax(flatData, axis=-1)
    # We can forego calculating both max and argmax as soon as
    # we have da.take_along_axis() https://github.com/dask/dask/issues/3663
    # Would a map_blocks of np.take_along_axis() work and be faster?
    weights = da.max(flatData, axis=-1)
    return weights, argmax
Exemple #9
0
 def get_labels(self) -> zarr.array:
     if "labels" not in self.data:
         self.data["labels"] = Raw.from_dask_array(
             self.path / "labels.zarr.zip",
             da.argmax(da.array(self.get_doc_topic_matrix()), axis=1),
         )
         self.save()
     return self.data["labels"].get()
Exemple #10
0
 def predict(self, X):
     client = default_client()
     class_probs = predict(client, self._Booster, X)
     if class_probs.ndim > 1:
         cidx = da.argmax(class_probs, axis=1)
     else:
         cidx = (class_probs > 0).astype(np.int64)
     return cidx
Exemple #11
0
def test_reductions_2D(dtype):
    x = np.arange(1, 122).reshape((11, 11)).astype(dtype)
    a = da.from_array(x, chunks=(4, 4))

    b = a.sum(keepdims=True)
    assert b._keys() == [[(b.name, 0, 0)]]

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))

    assert eq(da.argmax(a, axis=0, split_every=2), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0, split_every=2), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0, split_every=2), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0, split_every=2), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1, split_every=2), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1, split_every=2), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1, split_every=2), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1, split_every=2), np.nanargmin(x, axis=1))
Exemple #12
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", RuntimeWarning)
        reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
        reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
        reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
        reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
        reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

        assert_eq(da.argmax(a), np.argmax(x))
        assert_eq(da.argmin(a), np.argmin(x))
        assert_eq(da.nanargmax(a), np.nanargmax(x))
        assert_eq(da.nanargmin(a), np.nanargmin(x))

        assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
        assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
        assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
        assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))

        assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
        assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
        assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
        assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #13
0
def _get_first_unmasked_data(array, axis):
    """Get first unmasked value of an array along an axis."""
    mask = da.ma.getmaskarray(array)
    numerical_mask = da.where(mask, -1.0, 1.0)
    indices_first_positive = da.argmax(numerical_mask, axis=axis)
    indices = da.meshgrid(
        *[da.arange(array.shape[i]) for i in range(array.ndim) if i != axis],
        indexing='ij')
    indices.insert(axis, indices_first_positive)
    first_unmasked_data = np.array(array)[tuple(indices)]
    return first_unmasked_data
Exemple #14
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

    assert eq(da.argmax(a), np.argmax(x))
    assert eq(da.argmin(a), np.argmin(x))
    assert eq(da.nanargmax(a), np.nanargmax(x))
    assert eq(da.nanargmin(a), np.nanargmin(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #15
0
def max_and_argmax(data):
    """Return the dask max and argmax of data along the last two axes,
    which corresponds to the x and y dimensions
    (uncomputed)
    """
    # Slap out a dimension to nicely apply argmax and max
    flatData = data.reshape(data.shape[:-2] + (-1, ))
    argmax = da.argmax(flatData, axis=-1)
    # We can forego calculating both max and argmax as soon as
    # we have da.take_along_axis() https://github.com/dask/dask/issues/3663
    # Would a map_blocks of np.take_along_axis() work and be faster?
    weights = da.max(flatData, axis=-1)
    return weights, argmax
Exemple #16
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)

    assert eq(da.argmax(a), np.argmax(x))
    assert eq(da.argmin(a), np.argmin(x))
    assert eq(da.nanargmax(a), np.nanargmax(x))
    assert eq(da.nanargmin(a), np.nanargmin(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #17
0
 def predict(self, X):
     """
     Perform classification on an array of test vectors X.
     Parameters
     ----------
     X : array-like, shape = [n_samples, n_features]
     Returns
     -------
     C : array, shape = [n_samples]
         Predicted target values for X
     """
     jll = self._joint_log_likelihood(X)
     return delayed(self.classes_)[da.argmax(jll, axis=1)]
Exemple #18
0
def nanargmax(a, axis=None):
    fill_value = dtypes.get_neg_infinity(a.dtype)
    if a.dtype.kind == "O":
        return _nan_argminmax_object("argmax", fill_value, a, axis=axis)

    a, mask = _replace_nan(a, fill_value)
    if isinstance(a, dask_array_type):
        res = dask_array.argmax(a, axis=axis)
    else:
        res = np.argmax(a, axis=axis)

    if mask is not None:
        mask = mask.all(axis=axis)
        if mask.any():
            raise ValueError("All-NaN slice encountered")
    return res
Exemple #19
0
    async def _predict_async(self, data, output_margin=False, base_margin=None):
        test_dmatrix = await DaskDMatrix(
            client=self.client, data=data, base_margin=base_margin,
            missing=self.missing
        )
        pred_probs = await predict(client=self.client,
                                   model=self.get_booster(),
                                   data=test_dmatrix,
                                   output_margin=output_margin)

        if self.n_classes_ == 2:
            preds = (pred_probs > 0.5).astype(int)
        else:
            preds = da.argmax(pred_probs, axis=1)

        return preds
Exemple #20
0
def nanargmax(a, axis=None):
    fill_value = dtypes.get_neg_infinity(a.dtype)
    if a.dtype.kind == 'O':
        return _nan_argminmax_object('argmax', fill_value, a, axis=axis)

    a, mask = _replace_nan(a, fill_value)
    if isinstance(a, dask_array_type):
        res = dask_array.argmax(a, axis=axis)
    else:
        res = np.argmax(a, axis=axis)

    if mask is not None:
        mask = mask.all(axis=axis)
        if mask.any():
            raise ValueError("All-NaN slice encountered")
    return res
Exemple #21
0
def test_linspace(endpoint):
    darr = da.linspace(6, 49, endpoint=endpoint, chunks=5)
    nparr = np.linspace(6, 49, endpoint=endpoint)
    assert_eq(darr, nparr)

    darr = da.linspace(1.4, 4.9, endpoint=endpoint, chunks=5, num=13)
    nparr = np.linspace(1.4, 4.9, endpoint=endpoint, num=13)
    assert_eq(darr, nparr)

    darr = da.linspace(6, 49, endpoint=endpoint, chunks=5, dtype=float)
    nparr = np.linspace(6, 49, endpoint=endpoint, dtype=float)
    assert_eq(darr, nparr)

    darr, dstep = da.linspace(6, 49, endpoint=endpoint, chunks=5, retstep=True)
    nparr, npstep = np.linspace(6, 49, endpoint=endpoint, retstep=True)
    assert np.allclose(dstep, npstep)
    assert_eq(darr, nparr)

    darr = da.linspace(1.4,
                       4.9,
                       endpoint=endpoint,
                       chunks=5,
                       num=13,
                       dtype=int)
    nparr = np.linspace(1.4, 4.9, num=13, endpoint=endpoint, dtype=int)
    assert_eq(darr, nparr)
    assert sorted(
        da.linspace(1.4, 4.9, endpoint=endpoint, chunks=5,
                    num=13).dask) == sorted(
                        da.linspace(1.4,
                                    4.9,
                                    endpoint=endpoint,
                                    chunks=5,
                                    num=13).dask)
    assert sorted(
        da.linspace(6, 49, endpoint=endpoint, chunks=5,
                    dtype=float).dask) == sorted(
                        da.linspace(6,
                                    49,
                                    endpoint=endpoint,
                                    chunks=5,
                                    dtype=float).dask)

    x = da.array([0.2, 6.4, 3.0, 1.6])
    nparr = np.linspace(0, 2, 8, endpoint=endpoint)
    darr = da.linspace(da.argmin(x), da.argmax(x) + 1, 8, endpoint=endpoint)
    assert_eq(darr, nparr)
Exemple #22
0
def select_best_model(models, data):
    """
    Given several models and data to fit, return the best model based on Grid Search Cross Validation

    :param models: An array of dicts containing the name for the model ("name"), the sklearn estimator ("model"),
                    and the parameters for Grid Search Cross Validation ("params")
    :param data: a pandas dataframe where each row is an hour
    :return: an sklearn estimator that is the best model refit on the whole train data
    """
    results = [grid_search_model(data, model) for model in models]

    best_index = da.argmax([r[0] for r in results])
    best_model = results[best_index][2]

    print("\nBest model: {0} with params {1}".format(model_name(best_model), results[best_index][1]))

    return best_model
Exemple #23
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, chunks=(2,))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Exemple #24
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, blockshape=(2, ))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
def first_cue_time(data: Dict[str, da.Array],
                   message: int) -> Optional[da.Array]:
    """
    Find the timestamp of the first instance of a cue message in a Tristan data set.

    Args:
        data:     A LATRD data dictionary (a dictionary with data set names as keys
                  and Dask arrays as values).  Must contain one entry for cue id
                  messages and one for cue timestamps.  The two arrays are assumed
                  to have the same length.
        message:  The message code, as defined in the Tristan standard.

    Returns:
        The timestamp, measured in clock cycles from the global synchronisation signal.
        If the message doesn't exist in the data set, this returns None.
    """
    index = da.argmax(data[cue_id_key] == message)
    if index or data[cue_id_key][0] == message:
        return data[cue_time_key][index]
Exemple #26
0
def subcount_forecast(data, feature):
    """
    Creates a new a column that is the predicted value of the input feature

    Essentially an abstraction for 'prediction_forecasts'

    :param data: a pandas dataframe where each row is an hour
    :param feature: a String containing the feature that should be forecasted (one of: casual, registered)
    :return: a pandas dataframe containing the new column
    """
    var_name = feature + "_forecast"
    print("\tAdding {} variable...".format(var_name))
    df = dd.get_dummies(data.copy().drop("cnt", axis=1))
    to_predict = dd.read_csv(PATH)[feature]
    df[feature] = to_predict

    train = get_train(df)

    model = RandomForestRegressor(random_state=SEED)
    model_params = {"n_estimators": list(range(10, 110, 10))}
    #tscv = TimeSeriesSplit(n_splits=5)

    grid_search = GridSearchCV(estimator=model,
                               param_grid=model_params,
                               scoring="r2",
                               cv=None,
                               refit=True)
    grid_search.fit(train.drop(feature, axis=1), train[feature])
    print("\t\tPredictions for GridSearchCV on {}: {:.5f} +/- {:.5f}".format(
        feature, grid_search.best_score_,
        grid_search.cv_results_["std_test_score"][da.argmax(
            grid_search.cv_results_["mean_test_score"])]))

    data[var_name] = grid_search.best_estimator_.predict(
        dd.get_dummies(data.drop("cnt", axis=1)))

    return data
Exemple #27
0
def _stage_2(
    YP: Array,
    X: Array,
    Y: Array,
    alphas: Optional[NDArray] = None,
    normalize: bool = True,
    _glow_adj_alpha: bool = False,
    _glow_adj_scaling: bool = False,
) -> Tuple[Array, Array]:
    """Stage 2 - WGR Meta Regression

    This stage will train separate ridge regression models for each outcome
    using the predictions from stage 1 for that same outcome as features. These
    predictions are then evaluated based on R2 score to determine an optimal
    "meta" estimator (see `_stage_1` for the "base" estimator description). Results
    then include only predictions and coefficients from this optimal model.

    For more details, see the level 1 regression model described in step 1
    of [Mbatchou et al. 2020](https://www.biorxiv.org/content/10.1101/2020.06.19.162354v2).
    """
    assert YP.ndim == 4
    assert X.ndim == 2
    assert Y.ndim == 2
    # Check that chunking across samples is the same for all arrays
    assert YP.numblocks[2] == X.numblocks[0] == Y.numblocks[0]
    assert YP.chunks[2] == X.chunks[0] == Y.chunks[0]
    # Assert single chunks for covariates and outcomes
    assert X.numblocks[1] == Y.numblocks[1] == 1
    # Extract shape statistics
    n_variant_block, n_alpha_1 = YP.shape[:2]
    n_sample_block = Y.numblocks[0]
    n_sample, n_outcome = Y.shape
    n_covar = X.shape[1]
    n_indvar = n_covar + n_variant_block * n_alpha_1
    sample_chunks = Y.chunks[0]

    if normalize:
        assert_block_shape(YP, n_variant_block, 1, n_sample_block, 1)
        assert_chunk_shape(YP, 1, n_alpha_1, sample_chunks[0], n_outcome)
        # See: https://github.com/projectglow/glow/issues/260
        if _glow_adj_scaling:
            YP = da.map_blocks(
                lambda x: (x - x.mean(axis=2, keepdims=True))
                / x.std(axis=2, keepdims=True),
                YP,
            )
        else:
            YP = (YP - YP.mean(axis=2, keepdims=True)) / YP.std(axis=2, keepdims=True)
    # Tranpose for refit on level 1 predictions
    YP = YP.transpose((3, 2, 0, 1))
    assert_array_shape(YP, n_outcome, n_sample, n_variant_block, n_alpha_1)

    if alphas is None:
        # See: https://github.com/projectglow/glow/issues/255
        if _glow_adj_alpha:
            alphas = get_alphas(n_variant_block * n_alpha_1 * n_outcome)
        else:
            alphas = get_alphas(n_variant_block * n_alpha_1)
    n_alpha_2 = alphas.size

    YR = []
    BR = []
    for i in range(n_outcome):
        # Slice and reshape to new 2D covariate matrix;
        # The order of raveling in trailing dimensions is important
        # and later reshapes will assume variants, alphas order
        XPB = YP[i].reshape((n_sample, n_variant_block * n_alpha_1))
        # Prepend covariates and chunk along first dim only
        XPB = da.concatenate((X, XPB), axis=1)
        XPB = XPB.rechunk(chunks=(None, -1))
        assert_array_shape(XPB, n_sample, n_indvar)
        assert XPB.numblocks == (n_sample_block, 1)
        # Extract outcome vector
        YB = Y[:, [i]]
        assert XPB.ndim == YB.ndim == 2
        # Fit and predict folds for each parameter
        BB, YPB = _ridge_regression_cv(XPB, YB, alphas, n_zero_reg=n_covar)[-2:]
        assert_array_shape(BB, n_alpha_2, n_sample_block * n_indvar, 1)
        assert_array_shape(YPB, n_alpha_2, n_sample, 1)
        BR.append(BB)
        YR.append(YPB)

    # Concatenate predictions along outcome dimension
    YR = da.concatenate(YR, axis=2)
    assert_block_shape(YR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(YR, n_alpha_2, sample_chunks[0], 1)
    assert_array_shape(YR, n_alpha_2, n_sample, n_outcome)
    # Move samples to last dim so all others are batch
    # dims for R2 calculations
    YR = da.transpose(YR, (0, 2, 1))
    assert_array_shape(YR, n_alpha_2, n_outcome, n_sample)
    YR = YR.rechunk((-1, -1, None))
    assert_block_shape(YR, 1, 1, n_sample_block)
    assert YR.shape[1:] == Y.T.shape

    # Concatenate betas along outcome dimension
    BR = da.concatenate(BR, axis=2)
    assert_block_shape(BR, 1, n_sample_block, n_outcome)
    assert_chunk_shape(BR, n_alpha_2, n_indvar, 1)
    assert_array_shape(BR, n_alpha_2, n_sample_block * n_indvar, n_outcome)

    # Compute R2 scores within each sample block for each outcome + alpha
    R2 = da.stack(
        [
            r2_score(YR.blocks[..., i], Y.T.blocks[..., i])
            # Avoid warnings on R2 calculations for blocks with single rows
            if YR.chunks[-1][i] > 1 else da.full(YR.shape[:-1], np.nan)
            for i in range(n_sample_block)
        ]
    )
    assert_array_shape(R2, n_sample_block, n_alpha_2, n_outcome)
    # Coerce to finite or nan before nan-aware mean
    R2 = da.where(da.isfinite(R2), R2, np.nan)
    # Find highest mean alpha score for each outcome across blocks
    R2M = da.nanmean(R2, axis=0)
    assert_array_shape(R2M, n_alpha_2, n_outcome)
    # Identify index for the alpha value with the highest mean score
    R2I = da.argmax(R2M, axis=0)
    assert_array_shape(R2I, n_outcome)

    # Choose the predictions corresponding to the model with best score
    YRM = da.stack([YR[R2I[i], i, :] for i in range(n_outcome)], axis=-1)
    YRM = YRM.rechunk((None, -1))
    assert_block_shape(YRM, n_sample_block, 1)
    assert_chunk_shape(YRM, sample_chunks[0], n_outcome)
    assert_array_shape(YRM, n_sample, n_outcome)
    # Choose the betas corresponding to the model with the best score
    BRM = da.stack([BR[R2I[i], :, i] for i in range(n_outcome)], axis=-1)
    BRM = BRM.rechunk((None, -1))
    assert_block_shape(BRM, n_sample_block, 1)
    assert_chunk_shape(BRM, n_indvar, n_outcome)
    assert_array_shape(BRM, n_sample_block * n_indvar, n_outcome)
    return BRM, YRM