Exemple #1
0
def init_od_iforest(state_dict: Dict) -> IForest:
    """
    Initialize isolation forest.

    Parameters
    ----------
    state_dict
        Dictionary containing the parameter values.

    Returns
    -------
    Initialized IForest instance.
    """
    od = IForest(threshold=state_dict['threshold'])
    od.isolationforest = state_dict['isolationforest']
    return od
Exemple #2
0
def train_detector(data, preprocessor, perc_outlier=5):
    """Train outliers detector."""

    print("Initialize outlier detector.")
    od = IForest(threshold=None, n_estimators=100)

    print("Training on normal data.")
    np.random.seed(0)
    normal_batch = create_outlier_batch(data.data,
                                        data.target,
                                        n_samples=30000,
                                        perc_outlier=0)

    X_train = normal_batch.data.astype('float')
    # y_train = normal_batch.target

    od.fit(preprocessor.transform(X_train))

    print("Train on threshold data.")
    np.random.seed(0)
    threshold_batch = create_outlier_batch(data.data,
                                           data.target,
                                           n_samples=1000,
                                           perc_outlier=perc_outlier)
    X_threshold = threshold_batch.data.astype('float')
    # y_threshold = threshold_batch.target

    od.infer_threshold(preprocessor.transform(X_threshold),
                       threshold_perc=100 - perc_outlier)

    return od
Exemple #3
0
def test_isolation_forest(iforest_params):
    threshold, threshold_perc, return_instance_score = iforest_params
    X, y = load_iris(return_X_y=True)
    iforest = IForest(threshold)
    assert iforest.threshold == threshold
    assert iforest.meta == {
        'name': 'IForest',
        'detector_type': 'offline',
        'data_type': 'tabular'
    }
    iforest.fit(X)
    iforest.infer_threshold(X, threshold_perc=threshold_perc)
    iscore = iforest.score(X)
    perc_score = 100 * (iscore <
                        iforest.threshold).astype(int).sum() / iscore.shape[0]
    assert threshold_perc + 5 > perc_score > threshold_perc - 5
    od_preds = iforest.predict(X, return_instance_score=return_instance_score)
    assert od_preds['meta'] == iforest.meta
    assert od_preds['data']['is_outlier'].sum() == (
        iscore > iforest.threshold).astype(int).sum()
    if not return_instance_score:
        assert od_preds['data']['instance_score'] is None
    Dense(n_gmm, activation=tf.nn.softmax)
])

threshold_net = tf.keras.Sequential([
    InputLayer(input_shape=(seq_len, latent_dim)),
    Dense(5, activation=tf.nn.relu)
])

# define model
inputs = tf.keras.Input(shape=(input_dim, ))
outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

detector = [
    AdversarialAE(threshold=threshold, model=model, **kwargs),
    IForest(threshold=threshold),
    Mahalanobis(threshold=threshold),
    OutlierAEGMM(threshold=threshold,
                 gmm_density_net=gmm_density_net,
                 n_gmm=n_gmm,
                 **kwargs),
    OutlierVAE(threshold=threshold,
               latent_dim=latent_dim,
               samples=samples,
               **kwargs),
    OutlierAE(threshold=threshold, **kwargs),
    OutlierVAEGMM(threshold=threshold,
                  gmm_density_net=gmm_density_net,
                  n_gmm=n_gmm,
                  latent_dim=latent_dim,
                  samples=samples,