Example #1
0
def test_robust_label_encoder_fill_label_value():
    y = np.array([1, 1, 0, 1, 1])
    enc = RobustLabelEncoder(labels=[1], fill_label_value=0)
    enc.fit(y)
    y_transform = enc.transform(y)
    assert_array_equal(y_transform, [0, 0, 1, 0, 0])
    assert_array_equal(enc.inverse_transform(y_transform), y)

    # Test that fit_transform has the same behavior
    enc = RobustLabelEncoder(labels=[1], fill_label_value=0)
    y_transform = enc.fit_transform(y)
    assert_array_equal(y_transform, [0, 0, 1, 0, 0])
    assert_array_equal(enc.inverse_transform(y_transform), y)
Example #2
0
def test_robust_label_encoder_sorted_labels(labels):
    enc = RobustLabelEncoder(labels=labels)
    enc.fit([labels[1], labels[0]])

    assert_array_equal(list(enc.classes_), labels)
    assert_array_equal(enc.transform([labels[2], labels[1], "173"]), [2, 1, 3])

    # Test that fit_transform has the same behavior
    enc = RobustLabelEncoder(labels=labels)
    y_transformed = enc.fit_transform([labels[2], labels[1], "173"])

    assert_array_equal(list(enc.classes_), labels)
    assert_array_equal(y_transformed, [2, 1, 3])
def test_robust_label_encoder_fill_label_value():
    y = np.array([1, 1, 0, 1, 1])
    enc = RobustLabelEncoder(labels=[1], fill_label_value=0, include_unseen_class=True)
    enc.fit(y)
    np.testing.assert_array_equal(enc.get_classes(), [1, 0])
    y_transform = enc.transform(y)
    np.testing.assert_array_equal(y_transform, [0, 0, 1, 0, 0])
    np.testing.assert_array_equal(enc.inverse_transform(y_transform), y)

    # Test that fit_transform has the same behavior
    enc = RobustLabelEncoder(labels=[1], fill_label_value=0)
    y_transform = enc.fit_transform(y)
    np.testing.assert_array_equal(enc.get_classes(), [1])
    np.testing.assert_array_equal(y_transform, [0, 0, 1, 0, 0])
    np.testing.assert_array_equal(enc.inverse_transform(y_transform), y)
Example #4
0
def test_robust_label_encoder_unsorted_labels_warning(labels):
    enc = RobustLabelEncoder(labels=labels)
    with pytest.warns(UserWarning):
        enc.fit([labels[2], labels[0]])

    assert_array_equal(list(enc.classes_), sorted(labels))
    assert_array_equal(enc.transform([labels[1], labels[2], "173"]), [2, 1, 3])

    # Test that fit_transform has the same behavior
    enc = RobustLabelEncoder(labels=labels)
    with pytest.warns(UserWarning):
        y_transformed = enc.fit_transform([labels[1], labels[2], "173"])

    assert_array_equal(list(enc.classes_), sorted(labels))
    assert_array_equal(y_transformed, [2, 1, 3])
def test_robust_label_encoder_unsorted_labels_warning(labels):
    enc = RobustLabelEncoder(labels=labels)
    with pytest.warns(UserWarning):
        enc.fit([labels[2], labels[0]])

    assert_array_equal(list(enc.classes_), sorted(labels))
    assert_array_equal(enc.transform([labels[1], labels[2], "173"]), [2, 1, 3])
Example #6
0
def build_label_transform():
    """Returns the model definition representing feature processing."""

    return RobustLabelEncoder(labels=[
        'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS',
        'WALKING_UPSTAIRS'
    ])
def test_robust_label_encoder_inverse_transform_dtype():
    enc = RobustLabelEncoder()
    enc.fit(X[:, 0])

    np.testing.assert_array_equal(enc.inverse_transform(["1.0", "2.0"]), ["banana", "hot dog"])

    with pytest.raises(ValueError):
        enc.inverse_transform(["0.", "2.b"])
Example #8
0
def test_robust_label_encoder():
    enc = RobustLabelEncoder()
    enc.fit(X[:, 0])

    assert_array_equal(enc.classes_, ["apple", "banana", "hot dog"])
    assert_array_equal(enc.transform([]), [])
    assert_array_equal(enc.transform(["hot dog", "banana", "hot dog"]), [2, 1, 2])
    assert_array_equal(enc.transform(["hot dog", "llama"]), [2, 3])
    assert_array_equal(enc.inverse_transform([0, 2]), ["apple", "hot dog"])
    assert_array_equal(enc.inverse_transform([0, 10]), ["apple", "<unseen_label>"])

    assert_array_equal(enc.fit_transform(X[:, 0]), [2, 2, 0, 2, 2, 1])
def test_robust_label_encoder_unsorted_labels_warning(labels):
    enc = RobustLabelEncoder(labels=labels)
    with pytest.warns(UserWarning):
        enc.fit([labels[2], labels[0]])

    np.testing.assert_array_equal(list(enc.classes_), sorted(labels))
    np.testing.assert_array_equal(enc.get_classes(), sorted(labels))
    np.testing.assert_array_equal(enc.transform([labels[1], labels[2], "173"]), [2, 1, 3])

    # Test that fit_transform has the same behavior
    enc = RobustLabelEncoder(labels=labels)
    with pytest.warns(UserWarning):
        y_transformed = enc.fit_transform([labels[1], labels[2], "173"])

    np.testing.assert_array_equal(list(enc.classes_), sorted(labels))
    np.testing.assert_array_equal(y_transformed, [2, 1, 3])

    # Test fill_label_value is not sorted when include_unseen_class is True
    enc = RobustLabelEncoder(labels=labels, fill_label_value="-99", include_unseen_class=True)
    with pytest.warns(UserWarning):
        enc.fit([labels[2], labels[0]])
    np.testing.assert_array_equal(enc.get_classes(), sorted(labels) + ["-99"])
Example #10
0
def test_inverse_label_transformer():
    st_helper = SklearnTestHelper()
    rle = RobustLabelEncoder()

    # Binary Classification
    data = np.random.random_sample((10, )).astype(np.float32)
    dshape = (relay.Any(), )
    st_helper.compile(rle, dshape, "float32", "inverse_transform")
    python_out = (data > 0.5).astype(int)
    tvm_out = st_helper.run(data)
    tvm.testing.assert_allclose(python_out, tvm_out, rtol=1e-5, atol=1e-5)

    # Multiclass Classification
    data = np.random.random_sample((10, 5)).astype(np.float32)
    dshape = (relay.Any(), 5)
    st_helper.compile(rle, dshape, "float32", "inverse_transform")
    python_out = np.argmax(data, axis=1)
    tvm_out = st_helper.run(data)
    tvm.testing.assert_allclose(python_out, tvm_out, rtol=1e-5, atol=1e-5)
Example #11
0
def build_label_transform():
    """Returns the model definition representing feature processing."""

    return RobustLabelEncoder(labels=['0'],
                              fill_label_value='1',
                              include_unseen_class=True)
Example #12
0
def build_label_transform():
    """Returns the model definition representing feature processing."""

    return RobustLabelEncoder(labels=['1', '2', '3', '4', '5'])
Example #13
0
def build_label_transform():
    """Returns the model definition representing feature processing."""

    return RobustLabelEncoder(labels=["1", "2", "3", "4", "5"])
def test_robust_label_encoder_error_unknown():
    with pytest.raises(ValueError):
        enc = RobustLabelEncoder(fill_unseen_labels=False)
        enc.fit(X[:, 0])
        assert_array_equal(enc.get_classes(), ["apple", "banana", "hot dog"])
        enc.transform(["eggplant"])
def test_robust_label_encoder_error_unknown():
    with pytest.raises(ValueError):
        enc = RobustLabelEncoder(fill_unseen_labels=False)
        enc.fit(X[:, 0])
        enc.transform(["eggplant"])
from sagemaker_sklearn_extension.impute import RobustMissingIndicator
from sagemaker_sklearn_extension.preprocessing import LogExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import NALabelEncoder
from sagemaker_sklearn_extension.preprocessing import QuadraticFeatures
from sagemaker_sklearn_extension.preprocessing import QuantileExtremeValuesTransformer
from sagemaker_sklearn_extension.preprocessing import RemoveConstantColumnsTransformer
from sagemaker_sklearn_extension.preprocessing import RobustLabelEncoder
from sagemaker_sklearn_extension.preprocessing import RobustStandardScaler
from sagemaker_sklearn_extension.preprocessing import ThresholdOneHotEncoder


@pytest.mark.parametrize(
    "Estimator",
    [
        DateTimeVectorizer(),
        LogExtremeValuesTransformer(),
        MultiColumnTfidfVectorizer(),
        NALabelEncoder(),
        QuadraticFeatures(),
        QuantileExtremeValuesTransformer(),
        RobustImputer(),
        RemoveConstantColumnsTransformer(),
        RobustLabelEncoder(),
        RobustMissingIndicator(),
        RobustStandardScaler(),
        ThresholdOneHotEncoder(),
    ],
)
def test_all_estimators(Estimator):
    return check_estimator(Estimator)
def test_robust_label_encoder_sorted_labels(labels):
    enc = RobustLabelEncoder(labels=labels)
    enc.fit([labels[1], labels[0]])

    assert_array_equal(list(enc.classes_), labels)
    assert_array_equal(enc.transform([labels[2], labels[1], "173"]), [2, 1, 3])