Exemple #1
0
def test_score_invalid_method():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, labels = create_mock_classes(dims)
    c = classify.Classifier(RandomForestClassifier())
    c.fit(ds, labels)
    with assert_raises_regex(ValueError, 'not a valid scoring method'):
        c.score(ds, labels, method='not_a_method')
Exemple #2
0
def test_cluster():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, true_labels = create_mock_classes(dims)
    clf = classify.Classifier(MiniBatchKMeans(n_clusters=2))
    clustered = clf.fit_predict(ds)

    # Check that the clusters are identical to the true labels
    assert_equal(find_boundaries(true_labels), find_boundaries(clustered))
Exemple #3
0
def test_score():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, labels = create_mock_classes(dims)
    c = classify.Classifier(RandomForestClassifier())
    pred = c.fit(ds, labels).predict(ds)
    true_score = (pred == labels).mean()
    test_score = c.score(ds, labels)
    assert true_score == test_score
Exemple #4
0
def test_predict_proba():
    dims = OrderedDict([('y', 50), ('x', 50), ('time', 10)])
    ds, labels = create_mock_classes(dims)
    c = classify.Classifier(LogisticRegression())
    pred = c.fit(ds, labels).predict(ds)
    proba = c.fit(ds, labels).predict(ds, func='predict_proba')
    assert (proba >= 0).all()
    assert (proba <= 1).all()
    xr_assert_equal(pred, proba.argmax('label') + 1)
Exemple #5
0
def test_build_X(feature_dims):
    dims = OrderedDict([('y', 50), ('x', 50), ('time', 10)])
    ds, labels = create_mock_classes(dims)
    X = classify._build_X(ds, feature_dims=feature_dims)
    nrows = np.prod([N for d, N in dims.items() if d not in feature_dims])
    ncols = len(ds.data_vars) * \
        np.prod([N for d, N in dims.items() if d in feature_dims])
    assert X.shape == (nrows, ncols)

    # Test that the original dimension order doesn't matter
    ds_t = ds.transpose(*list(reversed(dims)))
    X_t = classify._build_X(ds_t, feature_dims=feature_dims)
    assert_equal(X, X_t)
Exemple #6
0
def test_classifier(clf, array):
    dims = OrderedDict([('y', 50), ('x', 50)])
    ds, labels_true = create_mock_classes(dims)
    if array:
        ds = ds['C11']

    # Select 10% for training
    labels_train = labels_true.copy()
    mask_train = (np.random.rand(dims['y'], dims['x']) < 0.1)
    labels_train = labels_train.where(mask_train)
    c = classify.Classifier(clf)
    c.fit(ds, labels_train)
    labels_predicted = c.predict(ds)

    # Expect 100% accuracy for this trivial classification task.
    xr_assert_equal(labels_predicted, labels_true)
Exemple #7
0
def test_classifier_feature_dims(dims, feature_dims):
    ds, labels = create_mock_classes(dims)
    c = classify.Classifier(RandomForestClassifier(n_estimators=20),
                            feature_dims=feature_dims)

    # Expect 100% accuracy for this trivial classification task.
    pred = c.fit(ds, labels).predict(ds)
    xr_assert_equal(
        pred, classify._broadcast_labels(labels, ds,
                                         feature_dims=feature_dims))

    # Check that the results are the same whether labels
    # are passed as xr.DataArray or np.ndarray
    pred_np = c.fit(ds, labels.values).predict(ds)
    xr_assert_equal(pred, pred_np)

    # Check that prediction result has correct dimensions
    assert_equal(utils.get_dims(pred),
                 classify._get_data_dims(ds, feature_dims=feature_dims))
Exemple #8
0
def test_broadcast(dims, feature_dims):
    ds, labels = create_mock_classes(dims)

    expected_shape = classify._get_data_shape(ds, feature_dims=feature_dims)

    # Check broadcast for numpy array
    blabels = classify._broadcast_labels(labels.values, ds, feature_dims)
    assert blabels.shape == expected_shape

    # Check broadcast for DataArray
    blabels = classify._broadcast_labels(labels.transpose('x', 'y'), ds,
                                         feature_dims)
    blabels_t = classify._broadcast_labels(labels.transpose('y', 'x'), ds,
                                           feature_dims)
    xr_assert_equal(blabels, blabels_t)
    assert blabels.shape == expected_shape

    # Check values equal along broadcast dimensions
    bc_dims = set(dims) - set(labels.dims) - set(feature_dims)
    for d in bc_dims:
        assert (blabels.std(d) == 0).all()
Exemple #9
0
def test_scaling():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, true_labels = create_mock_classes(dims)

    # Check that scaling doesn't affect the prediction
    # for a simple classification task

    clf_noscale = classify.Classifier(MiniBatchKMeans(n_clusters=2),
                                      scale=False).fit(ds)
    clf_scale = classify.Classifier(MiniBatchKMeans(n_clusters=2),
                                    scale=True).fit(ds)

    assert_equal(find_boundaries(clf_noscale.predict(ds)),
                 find_boundaries(clf_scale.predict(ds)))

    # Check that scaler has been properly fitted
    assert_allclose(clf_scale._scaler.mean_, ds.mean().to_array().values)
    assert_allclose(clf_scale._scaler.scale_, ds.std().to_array().values)

    # Scaled cluster center mean should be about zero
    assert np.all(np.abs(clf_scale.clf.cluster_centers_.mean(axis=0)) < 1e-2)
Exemple #10
0
def test_class_mean():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, true_labels = create_mock_classes(dims)
    means = classify.class_mean(ds, true_labels)
    for l in np.unique(true_labels):
        assert means.where(true_labels == l).std() == 0
Exemple #11
0
def test_predict_before_fit():
    dims = OrderedDict([('y', 100), ('x', 100)])
    ds, true_labels = create_mock_classes(dims)
    c = classify.Classifier(RandomForestClassifier())
    with assert_raises_regex(NotFittedError, 'not fitted yet'):
        c.predict(ds)
Exemple #12
0
def test_fit_predict():
    dims = OrderedDict([('y', 50), ('x', 50), ('time', 10)])
    ds, labels = create_mock_classes(dims)
    c = classify.Classifier(RandomForestClassifier(n_estimators=20))
    xr_assert_equal(c.fit(ds, labels).predict(ds), c.fit_predict(ds, labels))