Ejemplo n.º 1
0
def init_cd_mmddrift(state_dict: Dict, model: Union[tf.keras.Model, tf.keras.Sequential] = None) -> MMDDrift:
    """
    Initialize MMDDrift detector.

    Parameters
    ----------
    state_dict
        Dictionary containing the parameter values.
    model
        Optionally loaded model.

    Returns
    -------
    Initialized MMDDrift instance.
    """
    preprocess_fn = state_dict['preprocess_fn']
    preprocess_kwargs = state_dict['preprocess_kwargs']
    if isinstance(preprocess_fn, Callable) and isinstance(model, (tf.keras.Model, tf.keras.Sequential)):
        if preprocess_fn.__name__ == 'uae':
            preprocess_kwargs['encoder_net'] = model
        elif preprocess_fn.__name__ == 'hidden_output':
            preprocess_kwargs['model'] = model

    cd = MMDDrift(
        p_val=state_dict['p_val'],
        X_ref=state_dict['X_ref'],
        update_X_ref=state_dict['update_X_ref'],
        preprocess_fn=preprocess_fn,
        preprocess_kwargs=preprocess_kwargs,
        chunk_size=state_dict['chunk_size']
    )
    cd.n = state_dict['n']
    cd.infer_sigma = state_dict['infer_sigma']
    cd.permutation_test = state_dict['permutation_test']
    return cd
Ejemplo n.º 2
0
def MMD_test(source_data,
             target_data,
             p_val=0.05,
             preprocess_kwargs={},
             chunk_size=100,
             n_permutations=20):
    """
    Functional wrapper around alibi_detect MMDDrift class that uses uses gaussian kernel
    (https://docs.seldon.io/projects/alibi-detect/en/stable/api/alibi_detect.cd.mmd.html)
    

    Inputs:
        source_data - numpy.ndarray of shape (number of source samples,embedding dimension),
            samples from the source distribution
        target_data - numpy.ndarray of shape (number of target samples,embedding dimension),
            samples from the target distribution
        p_val - p-value used for the significance of the permutation test.
        preprocess_kwargs - Kwargs for a preprocessing function, pass callables under "model" key
        chunk_size - Chunk size if dask is used to parallelise the computation.
        n_permutations - Number of permutations used in the permutation test.
    Outputs:
        p - float, empirical p-value determined using the permutation test
    """
    source_size, source_dim = np.shape(source_data)
    target_size, target_dim = np.shape(target_data)
    assert source_dim == target_dim, "Source dimension must match target dimension"
    cd = MMDDrift(p_val=p_val,
                  X_ref=source_data,
                  preprocess_kwargs=preprocess_kwargs,
                  kernel=gaussian_kernel,
                  chunk_size=chunk_size,
                  n_permutations=n_permutations)
    result = cd.predict(target_data, return_p_val=True)
    return result['data']['p_val']
Ejemplo n.º 3
0
def test_mmd(mmd_params):
    n_features, n_enc, preprocess, chunk_size, n_permutations, \
        update_X_ref, preprocess_X_ref = mmd_params
    np.random.seed(0)
    X_ref = np.random.randn(n * n_features).reshape(
        n, n_features).astype('float32')
    preprocess_fn, preprocess_kwargs = preprocess
    if isinstance(preprocess_fn, Callable):
        if 'layer' in list(preprocess_kwargs.keys()) \
                and preprocess_kwargs['model'].__name__ == 'HiddenOutput':
            model = mymodel((n_features, ))
            layer = preprocess_kwargs['layer']
            preprocess_kwargs = {
                'model': HiddenOutput(model=model, layer=layer)
            }
        elif preprocess_kwargs['model'].__name__ == 'UAE' \
                and n_features > 1 and isinstance(n_enc, int):
            tf.random.set_seed(0)
            encoder_net = tf.keras.Sequential(
                [InputLayer(input_shape=(n_features, )),
                 Dense(n_enc)])
            preprocess_kwargs = {'model': UAE(encoder_net=encoder_net)}
        else:
            preprocess_fn, preprocess_kwargs = None, None
    else:
        preprocess_fn, preprocess_kwargs = None, None

    cd = MMDDrift(p_val=.05,
                  X_ref=X_ref,
                  preprocess_X_ref=preprocess_X_ref if isinstance(
                      preprocess_kwargs, dict) else False,
                  update_X_ref=update_X_ref,
                  preprocess_fn=preprocess_fn,
                  preprocess_kwargs=preprocess_kwargs,
                  chunk_size=chunk_size,
                  n_permutations=n_permutations)
    X = X_ref.copy()
    preds = cd.predict(X, return_p_val=True)
    assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val
    k = list(update_X_ref.keys())[0]
    assert cd.n == X.shape[0] + X_ref.shape[0]
    assert cd.X_ref.shape[0] == min(update_X_ref[k],
                                    X.shape[0] + X_ref.shape[0])

    X_h1 = np.random.randn(n * n_features).reshape(
        n, n_features).astype('float32')
    mu, sigma = 5, 5
    X_h1 = sigma * X_h1 + mu
    preds = cd.predict(X_h1, return_p_val=True)
    assert preds['data']['is_drift'] == 1 and preds['data']['p_val'] < cd.p_val
    assert preds['data']['distance'] >= 0.
Ejemplo n.º 4
0
def init_cd_mmddrift(state_dict: Dict, **kwargs) -> MMDDrift:
    """
    Initialize MMDDrift detector.

    Parameters
    ----------
    state_dict
        Dictionary containing the parameter values.
    kwargs
        Kwargs optionally containing preprocess_fn and preprocess_kwargs.

    Returns
    -------
    Initialized MMDDrift instance.
    """
    preprocess_fn, preprocess_kwargs = init_preprocess(**kwargs)
    cd = MMDDrift(p_val=state_dict['p_val'],
                  X_ref=state_dict['X_ref'],
                  preprocess_X_ref=False,
                  update_X_ref=state_dict['update_X_ref'],
                  preprocess_fn=preprocess_fn,
                  preprocess_kwargs=preprocess_kwargs,
                  chunk_size=state_dict['chunk_size'])
    cd.n = state_dict['n']
    cd.preprocess_X_ref = state_dict['preprocess_X_ref']
    cd.infer_sigma = state_dict['infer_sigma']
    cd.permutation_test = state_dict['permutation_test']
    return cd
Ejemplo n.º 5
0
def test_mmddrift(mmddrift_params):
    backend = mmddrift_params
    x_ref = np.random.randn(*(n, n_features))

    try:
        cd = MMDDrift(x_ref=x_ref, backend=backend)
    except NotImplementedError:
        cd = None

    if backend.lower() == 'pytorch':
        assert isinstance(cd._detector, MMDDriftTorch)
    elif backend.lower() == 'tensorflow':
        assert isinstance(cd._detector, MMDDriftTF)
    else:
        assert cd is None
Ejemplo n.º 6
0
def init_cd_mmddrift(state_dict: Dict, model: Optional[Union[tf.keras.Model, tf.keras.Sequential]],
                     emb: Optional[TransformerEmbedding], tokenizer: Optional[Callable], **kwargs) \
        -> MMDDrift:
    """
    Initialize MMDDrift detector.

    Parameters
    ----------
    state_dict
        Dictionary containing the parameter values.
    model
        Optional preprocessing model.
    emb
        Optional text embedding model.
    tokenizer
        Optional tokenizer for text drift.
    kwargs
        Kwargs optionally containing preprocess_fn and preprocess_kwargs.

    Returns
    -------
    Initialized MMDDrift instance.
    """
    preprocess_fn, preprocess_kwargs = init_preprocess(state_dict, model, emb, tokenizer, **kwargs)
    cd = MMDDrift(
        p_val=state_dict['p_val'],
        X_ref=state_dict['X_ref'],
        preprocess_X_ref=False,
        update_X_ref=state_dict['update_X_ref'],
        preprocess_fn=preprocess_fn,
        preprocess_kwargs=preprocess_kwargs,
        chunk_size=state_dict['chunk_size'],
        input_shape=state_dict['input_shape']
    )
    cd.n = state_dict['n']
    cd.preprocess_X_ref = state_dict['preprocess_X_ref']
    cd.infer_sigma = state_dict['infer_sigma']
    cd.permutation_test = state_dict['permutation_test']
    return cd
Ejemplo n.º 7
0
                  samples=samples,
                  **kwargs),
    OutlierProphet(threshold=.7, growth='logistic'),
    SpectralResidual(threshold=threshold, window_amp=10, window_local=10),
    OutlierSeq2Seq(input_dim,
                   seq_len,
                   threshold=threshold,
                   threshold_net=threshold_net,
                   latent_dim=latent_dim),
    KSDrift(p_val=p_val,
            X_ref=X_ref,
            preprocess_fn=uae,
            preprocess_kwargs={'encoder_net': encoder_net}),
    MMDDrift(p_val=p_val,
             X_ref=X_ref,
             preprocess_fn=uae,
             preprocess_kwargs={'encoder_net': encoder_net},
             n_permutations=10,
             chunk_size=10)
]
n_tests = len(detector)


@pytest.fixture
def select_detector(request):
    return detector[request.param]


@pytest.mark.parametrize('select_detector',
                         list(range(n_tests)),
                         indirect=True)
def test_save_load(select_detector):
Ejemplo n.º 8
0
                  samples=samples,
                  **kwargs),
    OutlierProphet(threshold=.7, growth='logistic'),
    SpectralResidual(threshold=threshold, window_amp=10, window_local=10),
    OutlierSeq2Seq(input_dim,
                   seq_len,
                   threshold=threshold,
                   threshold_net=threshold_net,
                   latent_dim=latent_dim),
    KSDrift(X_ref,
            p_val=p_val,
            preprocess_x_ref=False,
            preprocess_fn=preprocess_fn),
    MMDDrift(X_ref,
             p_val=p_val,
             preprocess_x_ref=False,
             preprocess_fn=preprocess_fn,
             configure_kernel_from_x_ref=True,
             n_permutations=n_permutations),
    ChiSquareDrift(X_ref_cat, p_val=p_val, preprocess_x_ref=True),
    TabularDrift(X_ref_mix,
                 p_val=p_val,
                 categories_per_feature={0: None},
                 preprocess_x_ref=True),
    ClassifierDrift(X_ref,
                    model=model,
                    p_val=p_val,
                    n_folds=n_folds_drift,
                    train_size=None)
]
n_tests = len(detector)
Ejemplo n.º 9
0
                  samples=samples,
                  **kwargs),
    OutlierProphet(threshold=.7, growth='logistic'),
    SpectralResidual(threshold=threshold, window_amp=10, window_local=10),
    OutlierSeq2Seq(input_dim,
                   seq_len,
                   threshold=threshold,
                   threshold_net=threshold_net,
                   latent_dim=latent_dim),
    KSDrift(p_val=p_val,
            X_ref=X_ref,
            preprocess_X_ref=False,
            preprocess_kwargs=preprocess_kwargs),
    MMDDrift(p_val=p_val,
             X_ref=X_ref,
             preprocess_X_ref=False,
             preprocess_kwargs=preprocess_kwargs,
             n_permutations=10,
             chunk_size=10),
    ChiSquareDrift(p_val=p_val, X_ref=X_ref_cat, preprocess_X_ref=True),
    TabularDrift(p_val=p_val,
                 X_ref=X_ref_mix,
                 categories_per_feature={0: None},
                 preprocess_X_ref=True)
]
n_tests = len(detector)


@pytest.fixture
def select_detector(request):
    return detector[request.param]
Ejemplo n.º 10
0
def _get_num_drifts(num_values, p_val):
    '''Retrieve number value oriented drift algortihms.'''
    mmd_drift = MMDDrift(num_values, p_val=p_val)  # requires numerical values
    return mmd_drift