예제 #1
0
def test_bin_transform(n_features=10, n_samples=10000):
    """
    Testing BinTransformer
    """
    data = numpy.random.normal(size=[n_samples, n_features])

    n_bins = 41

    transformer = BinTransformer(max_bins=n_bins).fit(data)
    result = transformer.transform(data)

    assert numpy.all(result < n_bins)
    assert numpy.all(result >= 0)
    assert numpy.allclose(numpy.max(result, axis=0), n_bins - 1)
    assert numpy.allclose(numpy.min(result, axis=0), 0)
    assure_monotonic(data, result)

    # check reproducibility
    assert numpy.all(
        transformer.transform(data) == transformer.transform(data))

    # checking dtype is integer
    numpy_result = numpy.array(result)
    print(numpy_result.dtype)
    assert numpy_result.dtype == 'uint8'
예제 #2
0
def test_bin_transformer_extend_to(n_features=10, n_bins=123):
    extended_length = 19
    X, y = generate_sample(n_samples=20, n_features=n_features)
    X1 = BinTransformer(max_bins=n_bins).fit(X).transform(X)
    X2 = BinTransformer(max_bins=n_bins).fit(X).transform(
        X, extend_to=extended_length)
    assert len(X2) % extended_length == 0, 'wrong shape!'
    assert numpy.allclose(X2[:len(X1)],
                          X1), 'extending does not work as expected!'
예제 #3
0
def test_network_with_scaler(n_samples=200, n_features=15, distance=0.5):
    X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)
    for scaler in [BinTransformer(max_bins=16), IronTransformer()]:
        clf = nnet.SimpleNeuralNetwork(scaler=scaler, epochs=300)
        clf.fit(X, y)

        p = clf.predict_proba(X)
        assert roc_auc_score(y, p[:, 1]) > 0.8, 'quality is too low for model: {}'.format(clf)
예제 #4
0
def test_bin_transformer_limits(n_features=10, n_bins=123):
    X, y = generate_sample(n_samples=1999, n_features=n_features)
    X = BinTransformer(max_bins=n_bins).fit_transform(X)
    assert numpy.allclose(X.max(axis=0), n_bins - 1)

    X_orig, y = generate_sample(n_samples=20, n_features=n_features)
    X = BinTransformer(max_bins=n_bins).fit_transform(X_orig)
    assert numpy.allclose(X.min(axis=0), 0)
def test_bin_transform(n_features=10, n_samples=10000):
    """
    Testing BinTransformer
    """
    data = numpy.random.normal(size=[n_samples, n_features])

    n_bins = 41

    transformer = BinTransformer(max_bins=n_bins).fit(data)
    result = transformer.transform(data)

    assert numpy.all(result < n_bins)
    assert numpy.all(result >= 0)
    assert numpy.allclose(numpy.max(result, axis=0), n_bins - 1)
    assert numpy.allclose(numpy.min(result, axis=0), 0)
    assure_monotonic(data, result)

    # check reproducibility
    assert numpy.all(transformer.transform(data) == transformer.transform(data))

    # checking dtype is integer
    numpy_result = numpy.array(result)
    print(numpy_result.dtype)
    assert numpy_result.dtype == 'uint8'
예제 #6
0
    def test_preprocessing(self):
        X = np.array([[1.1, 1.2, 1.3],[5.1, 6.4, 10.5]])
        transformer = BinTransformer().fit(X)
        new_X = transformer.transform(X)

        self.assertEqual((2, 3), new_X.shape)