Beispiel #1
0
def test_tabulardrift(tabulardrift_params):
    n_cat, categories_per_feature, correction, \
        update_x_ref, preprocess_x_ref, new_categories = tabulardrift_params
    np.random.seed(0)
    # add categorical variables
    x_ref = np.random.randn(n * n_features).reshape(n, n_features).astype(
        np.float32)
    if n_cat > 0:
        cat_cols = np.random.choice(n_features, size=n_cat, replace=False)
        x_ref[:, cat_cols] = np.tile(
            np.array([np.arange(n_categories)] * n_cat).T,
            (n // n_categories, 1))
        if categories_per_feature is None:
            categories_per_feature = {f: None for f in cat_cols}
        elif categories_per_feature == int:
            categories_per_feature = {f: n_categories for f in cat_cols}
        elif categories_per_feature == list:
            categories_per_feature = {
                f: list(np.arange(n_categories))
                for f in cat_cols
            }
    else:
        categories_per_feature = None

    cd = TabularDrift(
        x_ref=x_ref,
        p_val=.05,
        categories_per_feature=categories_per_feature,
        preprocess_x_ref=preprocess_x_ref,
        update_x_ref=update_x_ref,
        correction=correction,
    )
    x = x_ref.copy()
    if new_categories and n_cat > 0:
        x[:, cat_cols] = x[:, cat_cols] + 1
    preds_batch = cd.predict(x, drift_type='batch', return_p_val=True)
    if new_categories and n_cat > 0:
        assert preds_batch['data']['is_drift'] == 1
    else:
        assert preds_batch['data']['is_drift'] == 0
    k = list(update_x_ref.keys())[0]
    assert cd.n == x.shape[0] + x_ref.shape[0]
    assert cd.x_ref.shape[0] == min(update_x_ref[k],
                                    x.shape[0] + x_ref.shape[0])
    assert preds_batch['data']['distance'].min() >= 0.
    if correction == 'bonferroni':
        assert preds_batch['data']['threshold'] == cd.p_val / cd.n_features

    preds_feature = cd.predict(x, drift_type='feature', return_p_val=True)
    assert preds_feature['data']['is_drift'].shape[0] == cd.n_features
    preds_by_feature = (preds_feature['data']['p_val'] < cd.p_val).astype(int)
    assert (preds_feature['data']['is_drift'] == preds_by_feature).all()
    assert preds_feature['data']['threshold'] == cd.p_val
Beispiel #2
0
def test_tabulardrift(tabulardrift_params):
    n_cat, preprocess, correction, update_X_ref, preprocess_X_ref = tabulardrift_params
    n_infer = 2
    preprocess_fn, preprocess_kwargs = preprocess
    if isinstance(preprocess_fn, Callable):
        raise NotImplementedError
    else:
        preprocess_fn, preprocess_kwargs = None, None

    # add categorical variables
    cat_cols = np.random.choice(n_features, size=n_cat, replace=False)
    X_ref = np.random.randn(n * n_features).reshape(
        n, n_features).astype('float32')
    if n_cat > 0:
        X_ref[:, cat_cols] = np.tile(
            np.array([np.arange(n / n_categories)] * n_cat).T,
            (n_categories, 1))

    cd = TabularDrift(p_val=.05,
                      X_ref=X_ref,
                      preprocess_X_ref=preprocess_X_ref,
                      update_X_ref=update_X_ref,
                      preprocess_fn=preprocess_fn,
                      preprocess_kwargs=preprocess_kwargs,
                      correction=correction,
                      n_infer=n_infer)
    X = X_ref.copy()
    preds_batch = cd.predict(X, drift_type='batch', return_p_val=True)
    assert preds_batch['data']['is_drift'] == 0
    k = list(update_X_ref.keys())[0]
    assert cd.n == X.shape[0] + X_ref.shape[0]
    assert cd.X_ref.shape[0] == min(update_X_ref[k],
                                    X.shape[0] + X_ref.shape[0])
    assert preds_batch['data']['distance'].min() >= 0.
    if correction == 'bonferroni':
        assert preds_batch['data']['threshold'] == cd.p_val / cd.n_features

    preds_feature = cd.predict(X, drift_type='feature', return_p_val=True)
    assert preds_feature['data']['is_drift'].shape[0] == cd.n_features
    preds_by_feature = (preds_feature['data']['p_val'] < cd.p_val).astype(int)
    assert (preds_feature['data']['is_drift'] == preds_by_feature).all()
    assert preds_feature['data']['threshold'] == cd.p_val
Beispiel #3
0
n_ref = 25
n_test = 25

#print(X.values)
X = X.values
X_ref, X_t0, X_t1 = X[:n_ref], X[n_ref:n_ref +
                                 n_test], X[n_ref + n_test:n_ref + 2 * n_test]
X_ref.shape, X_t0.shape, X_t1.shape

print(X_ref.shape, X_t0.shape, X_t1.shape)

categories_per_feature = {0: None, 1: None, 2: None, 3: None}

cd = TabularDrift(X_ref,
                  p_val=.05,
                  categories_per_feature=categories_per_feature)

# filepath = 'my_path'  # change to directory where detector is saved
# save_detector(cd, filepath)
# cd = load_detector(filepath)
#print(X_t0)
preds = cd.predict(X_t0)
labels = ['No!', 'Yes!']
print('Drift? {}'.format(labels[preds['data']['is_drift']]))

feature_names = [
    'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'
]
#print(feature_names)
Beispiel #4
0
                   threshold=threshold,
                   threshold_net=threshold_net,
                   latent_dim=latent_dim),
    KSDrift(X_ref,
            p_val=p_val,
            preprocess_x_ref=False,
            preprocess_fn=preprocess_fn),
    MMDDrift(X_ref,
             p_val=p_val,
             preprocess_x_ref=False,
             preprocess_fn=preprocess_fn,
             configure_kernel_from_x_ref=True,
             n_permutations=n_permutations),
    ChiSquareDrift(X_ref_cat, p_val=p_val, preprocess_x_ref=True),
    TabularDrift(X_ref_mix,
                 p_val=p_val,
                 categories_per_feature={0: None},
                 preprocess_x_ref=True),
    ClassifierDrift(X_ref,
                    model=model,
                    p_val=p_val,
                    n_folds=n_folds_drift,
                    train_size=None)
]
n_tests = len(detector)


@pytest.fixture
def select_detector(request):
    return detector[request.param]