Example #1
0
def score_to_label(pred_scores, outliers_fraction=0.1):
    """Turn raw outlier outlier scores to binary labels (0 or 1).

    Parameters
    ----------
    pred_scores : list or numpy array of shape (n_samples,)
        Raw outlier scores. Outliers are assumed have larger values.

    outliers_fraction : float in (0,1)
        Percentage of outliers.

    Returns
    -------
    outlier_labels : numpy array of shape (n_samples,)
        For each observation, tells whether or not
        it should be considered as an outlier according to the
        fitted model. Return the outlier probability, ranging
        in [0,1].
    """
    # check input values
    pred_scores = column_or_1d(pred_scores)
    check_parameter(outliers_fraction, 0, 1)

    threshold = percentile(pred_scores, 100 * (1 - outliers_fraction))
    pred_labels = (pred_scores > threshold).astype('int')
    return pred_labels
Example #2
0
    def __init__(self, base_estimators, meta_clf=None, n_folds=2,
                 keep_original=True,
                 use_proba=False, shuffle_data=False, random_state=None,
                 threshold=None, pre_fitted=None):

        super(Stacking, self).__init__(
            base_estimators=base_estimators, pre_fitted=pre_fitted)

        # validate input parameters
        if not isinstance(n_folds, int):
            raise ValueError('n_folds must be an integer variable')
        check_parameter(n_folds, low=2, include_left=True,
                        param_name='n_folds')
        self.n_folds = n_folds

        if meta_clf is not None:
            self.meta_clf = meta_clf
        else:
            self.meta_clf = LogisticRegression()

        # set flags
        self.keep_original = keep_original
        self.use_proba = use_proba
        self.shuffle_data = shuffle_data

        self.random_state = random_state

        if threshold is not None:
            warnings.warn(
                "Stacking does not support threshold setting option. "
                "Please set the threshold in classifiers directly.")

        if pre_fitted is not None:
            warnings.warn("Stacking does not support pre_fitted option.")
    def __init__(self, hidden_neurons=None,
                 hidden_activation='relu', output_activation='sigmoid',
                 loss=mean_squared_error, optimizer='adam',
                 epochs=100, batch_size=32, dropout_rate=0.2,
                 l2_regularizer=0.1, validation_size=0.1, preprocessing=True,
                 verbose=1, random_state=None, contamination=0.1):
        super(AutoEncoder, self).__init__(contamination=contamination)
        self.hidden_neurons = hidden_neurons
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation
        self.loss = loss
        self.optimizer = optimizer
        self.epochs = epochs
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.l2_regularizer = l2_regularizer
        self.validation_size = validation_size
        self.preprocessing = preprocessing
        self.verbose = verbose
        self.random_state = random_state

        # default values
        if self.hidden_neurons is None:
            self.hidden_neurons = [64, 32, 32, 64]

        # Verify the network design is valid
        if not self.hidden_neurons == self.hidden_neurons[::-1]:
            print(self.hidden_neurons)
            raise ValueError("Hidden units should be symmetric")

        self.hidden_neurons_ = self.hidden_neurons

        check_parameter(dropout_rate, 0, 1, param_name='dropout_rate',
                        include_left=True)
Example #4
0
    def __init__(self,
                 base_estimators,
                 method='average',
                 threshold=0.5,
                 weights=None,
                 pre_fitted=False):

        super(SimpleClassifierAggregator,
              self).__init__(base_estimators=base_estimators,
                             pre_fitted=pre_fitted)

        # validate input parameters
        if method not in [
                'average', 'maximization', 'majority_vote', 'median'
        ]:
            raise ValueError(
                "{method} is not a valid parameter.".format(method=method))

        self.method = method
        check_parameter(threshold,
                        0,
                        1,
                        include_left=False,
                        include_right=False,
                        param_name='threshold')
        self.threshold = threshold

        # set estimator weights
        self._set_weights(weights)
Example #5
0
def split_datasets(X, y, n_folds=3, shuffle_data=False, random_state=None):
    """Utility function to split the data for stacking. The data is split
    into n_folds with roughly equal rough size.

    Parameters
    ----------
    X : numpy array of shape (n_samples, n_features)
        The input samples.

    y : numpy array of shape (n_samples,)
        The ground truth of the input samples (labels).

    n_folds : int, optional (default=3)
        The number of splits of the training sample.

    shuffle_data : bool, optional (default=False)
        If True, shuffle the input data.

    random_state : RandomState, optional (default=None)
        A random number generator instance to define the state of the random
        permutations generator.

    Returns
    -------
    X : numpy array of shape (n_samples, n_features)
        The input samples. If shuffle_data, return the shuffled data.

    y : numpy array of shape (n_samples,)
        The ground truth of the input samples (labels). If shuffle_data,
        return the shuffled data.

    index_lists : list of list
        The list of indexes of each fold regarding the returned X and y.
        For instance, index_lists[0] contains the indexes of fold 0.

    """

    if not isinstance(n_folds, int):
        raise ValueError('n_folds must be an integer variable')
    check_parameter(n_folds, low=2, include_left=True, param_name='n_folds')

    random_state = check_random_state(random_state)

    if shuffle_data:
        X, y = shuffle(X, y, random_state=random_state)

    idx_length = len(y)
    idx_list = list(range(idx_length))

    avg_length = int(idx_length / n_folds)

    index_lists = []
    for i in range(n_folds - 1):
        index_lists.append(idx_list[i * avg_length:(i + 1) * avg_length])

    index_lists.append(idx_list[(n_folds - 1) * avg_length:])

    return X, y, index_lists
Example #6
0
    def __init__(self,
                 encoder_neurons=None,
                 decoder_neurons=None,
                 latent_dim=2,
                 hidden_activation='relu',
                 output_activation='sigmoid',
                 loss=mse,
                 optimizer='adam',
                 epochs=100,
                 batch_size=32,
                 dropout_rate=0.2,
                 l2_regularizer=0.1,
                 validation_size=0.1,
                 preprocessing=True,
                 verbose=1,
                 random_state=None,
                 contamination=0.1,
                 gamma=1.0,
                 capacity=0.0):
        super(VAE_EDE, self).__init__(contamination=contamination)
        self.encoder_neurons = encoder_neurons
        self.decoder_neurons = decoder_neurons
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation
        self.loss = loss
        self.optimizer = optimizer
        self.epochs = epochs
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.l2_regularizer = l2_regularizer
        self.validation_size = validation_size
        self.preprocessing = preprocessing
        self.verbose = verbose
        self.random_state = random_state
        self.latent_dim = latent_dim
        self.gamma = gamma
        self.capacity = capacity

        # default values
        if self.encoder_neurons is None:
            self.encoder_neurons = [128, 64, 32]

        if self.decoder_neurons is None:
            self.decoder_neurons = [32, 64, 128]

        self.encoder_neurons_ = self.encoder_neurons
        self.decoder_neurons_ = self.decoder_neurons

        check_parameter(dropout_rate,
                        0,
                        1,
                        param_name='dropout_rate',
                        include_left=True)
Example #7
0
    def __init__(self, base_estimators, n_clusters, linkage_method='single',
                 weights=None, pre_fitted=False):

        super(EAC, self).__init__(
            base_estimators=base_estimators, pre_fitted=pre_fitted)

        check_parameter(n_clusters, low=2, param_name='n_clusters')
        self.n_clusters = n_clusters

        # set estimator weights
        self._set_weights(weights)

        self.linkage_method = linkage_method
Example #8
0
def argmaxn(value_list, n, order='desc'):
    """Return the index of top n elements in the list
    if order is set to 'desc', otherwise return the index of n smallest ones.

    Parameters
    ----------
    value_list : list, array, numpy array of shape (n_samples,)
        A list containing all values.

    n : int
        The number of elements to select.

    order : str, optional (default='desc')
        The order to sort {'desc', 'asc'}:

        - 'desc': descending
        - 'asc': ascending

    Returns
    -------
    index_list : numpy array of shape (n,)
        The index of the top n elements.
    """

    value_list = column_or_1d(value_list)
    length = len(value_list)

    # validate the choice of n
    check_parameter(n,
                    1,
                    length,
                    include_left=True,
                    include_right=True,
                    param_name='n')

    # for the smallest n, flip the value
    if order != 'desc':
        n = length - n

    value_sorted = np.partition(value_list, length - n)
    threshold = value_sorted[int(length - n)]

    if order == 'desc':
        return np.where(np.greater_equal(value_list, threshold))[0]
    else:  # return the index of n smallest elements
        return np.where(np.less(value_list, threshold))[0]
Example #9
0
    def __init__(self, base_estimators, local_region_size=30, threshold=None,
                 pre_fitted=None):

        super(DCS_LA, self).__init__(
            base_estimators=base_estimators, pre_fitted=pre_fitted)

        # validate input parameters
        if not isinstance(local_region_size, int):
            raise ValueError('local_region_size must be an integer variable')
        check_parameter(local_region_size, low=2, include_left=True,
                        param_name='local_region_size')
        self.local_region_size = local_region_size

        if threshold is not None:
            warnings.warn(
                "DCS does not support threshold setting option. "
                "Please set the threshold in classifiers directly.")

        if pre_fitted is not None:
            warnings.warn("DCS does not support pre_fitted option.")
Example #10
0
    def __init__(self,
                 base_estimators,
                 local_region_size=30,
                 n_selected_clfs=None,
                 use_weights=False,
                 threshold=None,
                 pre_fitted=None):

        super(DES_LA, self).__init__(base_estimators=base_estimators,
                                     pre_fitted=pre_fitted)

        # validate input parameters
        if not isinstance(local_region_size, int):
            raise ValueError('local_region_size must be an integer variable')
        check_parameter(local_region_size,
                        low=2,
                        include_left=True,
                        param_name='local_region_size')
        self.local_region_size = local_region_size

        if n_selected_clfs is None:
            self.n_selected_clfs = int(self.n_base_estimators_ * 0.5)
        else:
            if not isinstance(n_selected_clfs, int):
                raise ValueError('n_selected_clfs must be an integer variable')
            check_parameter(n_selected_clfs,
                            low=1,
                            high=self.n_base_estimators_,
                            include_left=True,
                            include_right=True,
                            param_name='n_selected_clfs')
            self.n_selected_clfs = n_selected_clfs

        self.use_weights = use_weights

        if threshold is not None:
            warnings.warn("DES does not support threshold setting option. "
                          "Please set the threshold in classifiers directly.")

        if pre_fitted is not None:
            warnings.warn("DES does not support pre_fitted option.")
Example #11
0
    def __init__(self,
                 hidden_neurons=None,
                 hidden_activation='leakyrelu',
                 output_activation='leakyrelu',
                 loss=None,
                 optimizer='adam',
                 lr=1e-3,
                 epochs=20,
                 batch_size=32,
                 dropout_rate=0.2,
                 l2_regularizer=0.1,
                 validation_size=0.1,
                 preprocessing=False,
                 verbose=1,
                 random_state=None,
                 contamination=0.1):
        super(AE, self).__init__(contamination=contamination)
        self.hidden_neurons = hidden_neurons
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation
        self.loss = loss
        self.optimizer = optimizer
        self.epochs = epochs
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.l2_regularizer = l2_regularizer
        self.validation_size = validation_size
        self.preprocessing = preprocessing
        self.verbose = verbose
        self.random_state = random_state
        self.lr = lr

        self.hidden_neurons_ = self.hidden_neurons

        check_parameter(dropout_rate,
                        0,
                        1,
                        param_name='dropout_rate',
                        include_left=True)
Example #12
0
    def __init__(self,
                 base_estimators,
                 n_clusters,
                 weights=None,
                 reference_idx=0,
                 pre_fitted=False):

        super(ClustererEnsemble,
              self).__init__(base_estimators=base_estimators,
                             pre_fitted=pre_fitted)

        check_parameter(n_clusters, low=2, param_name='n_clusters')
        self.n_clusters = n_clusters

        check_parameter(reference_idx,
                        low=0,
                        high=self.n_base_estimators_ - 1,
                        include_left=True,
                        include_right=True)
        self.reference_idx = reference_idx

        # set estimator weights
        self._set_weights(weights)
Example #13
0
    def test_check_parameter_range(self):
        # verify parameter type correction
        with assert_raises(TypeError):
            check_parameter('f', 0, 100)

        with assert_raises(TypeError):
            check_parameter(1, 'f', 100)

        with assert_raises(TypeError):
            check_parameter(1, 0, 'f')

        with assert_raises(TypeError):
            check_parameter(argmaxn(value_list=[1, 2, 3], n=1), 0, 100)

        # if low and high are both unset
        with assert_raises(ValueError):
            check_parameter(50)

        # if low <= high
        with assert_raises(ValueError):
            check_parameter(50, 100, 99)

        with assert_raises(ValueError):
            check_parameter(50, 100, 100)

        # check one side
        with assert_raises(ValueError):
            check_parameter(50, low=100)
        with assert_raises(ValueError):
            check_parameter(50, high=0)

        assert_equal(True, check_parameter(50, low=10))
        assert_equal(True, check_parameter(50, high=100))

        # if check fails
        with assert_raises(ValueError):
            check_parameter(-1, 0, 100)

        with assert_raises(ValueError):
            check_parameter(101, 0, 100)

        with assert_raises(ValueError):
            check_parameter(0.5, 0.2, 0.3)

        # if check passes
        assert_equal(True, check_parameter(50, 0, 100))

        assert_equal(True, check_parameter(0.5, 0.1, 0.8))

        # if includes left or right bounds
        with assert_raises(ValueError):
            check_parameter(100,
                            0,
                            100,
                            include_left=False,
                            include_right=False)
        assert_equal(
            True,
            check_parameter(0, 0, 100, include_left=True, include_right=False))
        assert_equal(
            True,
            check_parameter(0, 0, 100, include_left=True, include_right=True))
        assert_equal(
            True,
            check_parameter(100,
                            0,
                            100,
                            include_left=False,
                            include_right=True))
        assert_equal(
            True,
            check_parameter(100, 0, 100, include_left=True,
                            include_right=True))
Example #14
0
def _aom_moa_helper(mode, scores, n_buckets, method, bootstrap_estimators,
                    random_state):
    """Internal helper function for Average of Maximum (AOM) and
    Maximum of Average (MOA). See :cite:`aggarwal2015theoretical` for details.

    First dividing estimators into subgroups, take the maximum/average score
    as the subgroup score. Finally, take the average/maximum of all subgroup 
    scores.

    Parameters
    ----------
    mode : str
        Define the operation model, either "AOM" or "MOA".

    scores : numpy array of shape (n_samples, n_estimators)
        The score matrix outputted from various estimators.

    n_buckets : int, optional (default=5)
        The number of subgroups to build.

    method : str, optional (default='static')
        {'static', 'dynamic'}, if 'dynamic', build subgroups
        randomly with dynamic bucket size.

    bootstrap_estimators : bool, optional (default=False)
        Whether estimators are drawn with replacement.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the
        random number generator; If RandomState instance, random_state is
        the random number generator; If None, the random number generator
        is the RandomState instance used by `np.random`.

    Returns
    -------
    combined_scores : Numpy array of shape (n_samples,)
        The combined scores.

    """

    if mode != 'AOM' and mode != 'MOA':
        raise NotImplementedError(
            '{mode} is not implemented'.format(mode=mode))

    scores = check_array(scores)
    # TODO: add one more parameter for max number of estimators
    # use random_state instead
    # for now it is fixed at n_estimators/2
    n_estimators = scores.shape[1]
    check_parameter(n_buckets,
                    2,
                    n_estimators,
                    include_left=True,
                    include_right=True,
                    param_name='n_buckets')

    scores_buckets = np.zeros([scores.shape[0], n_buckets])

    if method == 'static':

        n_estimators_per_bucket = int(n_estimators / n_buckets)
        if n_estimators % n_buckets != 0:
            raise ValueError('n_estimators / n_buckets has a remainder. Not '
                             'allowed in static mode.')

        if not bootstrap_estimators:
            # shuffle the estimator order
            shuffled_list = shuffle(list(range(0, n_estimators, 1)),
                                    random_state=random_state)

            head = 0
            for i in range(0, n_estimators, n_estimators_per_bucket):
                tail = i + n_estimators_per_bucket
                batch_ind = int(i / n_estimators_per_bucket)
                if mode == 'AOM':
                    scores_buckets[:, batch_ind] = np.max(
                        scores[:, shuffled_list[head:tail]], axis=1)
                else:
                    scores_buckets[:, batch_ind] = np.mean(
                        scores[:, shuffled_list[head:tail]], axis=1)

                # increment index
                head = head + n_estimators_per_bucket
                # noinspection PyUnusedLocal
        else:
            for i in range(n_buckets):
                ind = sample_without_replacement(n_estimators,
                                                 n_estimators_per_bucket,
                                                 random_state=random_state)
                if mode == 'AOM':
                    scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
                else:
                    scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)

    elif method == 'dynamic':  # random bucket size
        for i in range(n_buckets):
            # the number of estimators in a bucket should be 2 - n/2
            max_estimator_per_bucket = RandomState(seed=random_state).randint(
                2, int(n_estimators / 2))
            ind = sample_without_replacement(n_estimators,
                                             max_estimator_per_bucket,
                                             random_state=random_state)
            if mode == 'AOM':
                scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
            else:
                scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)

    else:
        raise NotImplementedError(
            '{method} is not implemented'.format(method=method))

    if mode == 'AOM':
        return np.mean(scores_buckets, axis=1)
    else:
        return np.max(scores_buckets, axis=1)
Example #15
0
    def _parameter_validation(self, contamination, n_jobs, rp_clf_list,
                              rp_ng_clf_list, approx_clf_list,
                              approx_ng_clf_list, approx_clf,
                              cost_forecast_loc_fit, cost_forecast_loc_pred):
        """Internal function to valid the initial parameters

        Returns
        -------
        self : object
            Post-check estimator.
        """

        if not (0. < contamination <= 0.5):
            raise ValueError("contamination must be in (0, 0.5], "
                             "got: %f" % contamination)

        self.contamination = contamination

        if approx_clf is not None:
            self.approx_clf = approx_clf
        else:
            self.approx_clf = RandomForestRegressor(n_estimators=50)

        if n_jobs is None:
            self.n_jobs = 1
        elif n_jobs == -1:
            self.n_jobs = effective_n_jobs()
        else:
            self.n_jobs = n_jobs

        # validate random projection list
        if rp_clf_list is None:
            # the algorithms that should be be using random projection
            self.rp_clf_list = ['LOF', 'KNN', 'ABOD', 'COF']
        else:
            self.rp_clf_list = rp_clf_list

        if rp_ng_clf_list is None:
            # the algorithms that should not be using random projection
            self.rp_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'MCD', 'LMDD']
        else:
            self.rp_ng_clf_list = rp_ng_clf_list

        # Validate target_dim_frac
        check_parameter(self.target_dim_frac,
                        low=0,
                        high=1,
                        include_left=False,
                        include_right=True,
                        param_name='target_dim_frac')

        # validate model approximation list
        if approx_clf_list is None:
            # the algorithms that should be be using approximation
            self.approx_clf_list = ['LOF', 'KNN', 'CBLOF', 'OCSVM']
        else:
            self.approx_clf_list = approx_clf_list

        if approx_ng_clf_list is None:
            # the algorithms that should not be using approximation
            self.approx_ng_clf_list = [
                'PCA', 'HBOS', 'ABOD', 'MCD', 'LMDD', 'LSCP', 'IForest'
            ]
        else:
            self.approx_ng_clf_list = approx_ng_clf_list

        this_directory = os.path.abspath(os.path.dirname(__file__))

        # validate the trained model
        if cost_forecast_loc_fit is None:
            self.cost_forecast_loc_fit_ = os.path.join(this_directory,
                                                       'saved_models',
                                                       'bps_train.joblib')
        else:
            self.cost_forecast_loc_fit_ = cost_forecast_loc_fit

        if cost_forecast_loc_pred is None:
            self.cost_forecast_loc_pred_ = os.path.join(
                this_directory, 'saved_models', 'bps_prediction.joblib')
        else:
            self.cost_forecast_loc_pred_ = cost_forecast_loc_pred

        return self
Example #16
0
    def _parameter_validation(self, contamination, n_jobs, rp_clf_list,
                              rp_ng_clf_list, approx_clf_list,
                              approx_ng_clf_list, approx_clf,
                              cost_forecast_loc_fit,
                              cost_forecast_loc_pred):

        if not (0. < contamination <= 0.5):
            raise ValueError("contamination must be in (0, 0.5], "
                             "got: %f" % contamination)

        self.contamination = contamination

        if approx_clf is not None:
            self.approx_clf = approx_clf
        else:
            self.approx_clf = RandomForestRegressor(n_estimators=50)

        if n_jobs is None:
            self.n_jobs = 1
        else:
            self.n_jobs = n_jobs

        # validate random projection list
        if rp_clf_list is None:
            # the algorithms that should be be using random projection
            self.rp_clf_list = ['LOF', 'KNN', 'ABOD']
        else:
            self.rp_clf_list = rp_clf_list

        if rp_ng_clf_list is None:
            # the algorithms that should be be using random projection
            self.rp_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'MCD', 'LMDD']
        else:
            self.rp_ng_clf_list = rp_ng_clf_list

        # Validate max_features
        check_parameter(self.max_features, low=0, high=1, include_left=False,
                        include_right=True, param_name='max_features')

        # validate model approximation list
        if approx_clf_list is None:
            # the algorithms that should be be using random projection
            self.approx_clf_list = ['LOF', 'KNN', 'CBLOF', 'OCSVM', 'IForest']
        else:
            self.approx_clf_list = approx_clf_list

        if approx_ng_clf_list is None:
            # the algorithms that should be be using random projection
            self.approx_ng_clf_list = ['PCA', 'HBOS', 'ABOD', 'MCD',
                                       'LMDD', 'LSCP']
        else:
            self.approx_ng_clf_list = approx_ng_clf_list

        this_directory = os.path.abspath(os.path.dirname(__file__))

        if cost_forecast_loc_fit is None:
            self.cost_forecast_loc_fit_ = os.path.join(
                this_directory, 'saved_models', 'bps_train.joblib')
        else:
            self.cost_forecast_loc_fit_ = cost_forecast_loc_fit

        if cost_forecast_loc_pred is None:
            self.cost_forecast_loc_pred_ = os.path.join(
                this_directory, 'saved_models', 'bps_prediction.joblib')
        else:
            self.cost_forecast_loc_pred_ = cost_forecast_loc_pred

        return self
Example #17
0
    def __init__(self,
                 epochs=100,
                 batch_size=32,
                 lr=1e-3,
                 loss='mse',
                 dropout_rate=0.2,
                 l2_regularizer=0.1,
                 validation_size=0.1,
                 verbose=1,
                 random_state=42,
                 contamination=0.1,
                 hid_dim=16,
                 lat_dim=8):
        """AutoEncoder

        Parameters
        ----------
        epochs: int (default is 100)
            The number of iterations to train the model.

        batch_size: int (default is 32)
            The number of instances used to train the model.

        lr: float (default is 1e-3)
            The learning step

        loss: str (default is "mse")
            The loss function

        dropout_rate: float (default is 0.2) (not implemented)
            It's in range (0,1)

        l2_regularizer: float (default is 0.1)
            The hyperparameter used to balance loss and weights.

        validation_size: float (default is 0.2)
            It's in range (0,1), which is used to evaluate the training result (not implemented)

        contamination: float (default is 0.1)
             It's in range (0,1). A threshold used to decide the normal score (not used)

        hid_dim: int (default is 16)
            The number of neurons of the hidden layer.

        lat_dim: int (default is 8)
            The number of neurons of the latent layer.

        verbose: int (default is 1)
            A print level is to control what information should be printed according to the given value.
            The higher the value is, the more info is printed.

        random_state: int (default is 42)

        """
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss = loss
        self.dropout_rate = dropout_rate
        self.l2_regularizer = l2_regularizer
        self.validation_size = validation_size
        self.verbose = verbose
        self.random_state = random_state
        self.lr = lr
        self.contamination = contamination
        self.hid_dim = hid_dim
        self.lat_dim = lat_dim

        check_parameter(dropout_rate,
                        0,
                        1,
                        param_name='dropout_rate',
                        include_left=True)

        if self.loss == 'mse' or (not self.loss):
            self.criterion = nn.MSELoss()