def RunGmm(X,Y,num_clusters_to_try, random_state):
    results = {}
    for k in num_clusters_to_try:
        prefix = "gmm_" + str(k)+"_"
        algo = GaussianMixture(n_components=k,verbose=0,max_iter = 1000)
        algo.fit(X)
        p = algo._estimate_weighted_log_prob(X)
        predicted = algo.predict(X)
        predicted_prob = algo.predict_proba(X)
        ari = Compute_ARI(Y,predicted)
        metrics = ComputeClusteringMetrics(Y,predicted,X)
        print(metrics)
        results[prefix+"metrics"]=metrics
        results[prefix+"ari"] = ari
        results[prefix+"algo"] = algo
        results[prefix+"predicted"] = predicted
        results[prefix+"prob"] = predicted_prob
        results[prefix+"new_data"] = predicted_prob
        results[prefix+"bic"] = algo.bic(X)
        print("done({2}) for {0} : {1}".format(str(k),str(ari),algo.converged_))

    return results
Esempio n. 2
0
lda = LinearDiscriminantAnalysis(n_components=NUM_DIM)
lda.fit(X_train, y_train)
X_train_lda = lda.transform(X_train)
X_score_lda = lda.transform(X_score)
# ====== plda ====== #
plda = PLDA(n_phi=NUM_DIM, random_state=SEED)
plda.fit(X_train, y_train)
X_train_plda = plda.predict_log_proba(X_train)
X_score_plda = plda.predict_log_proba(X_score)
# ====== gmm ====== #
gmm = GaussianMixture(n_components=NUM_DIM,
                      max_iter=100,
                      covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM,
                   batch_size=8,
                   learning_rate=0.0008,
                   n_iter=8,
                   verbose=2,
                   random_state=SEED)
rbm.fit(X_train)
X_train_rbm = rbm.transform(X_train)
X_score_rbm = rbm.transform(X_score)

# ===========================================================================
# Deep Learning
# ===========================================================================
Esempio n. 3
0
X_score_tsne = tsne.fit_transform(X_score)
# ====== lda ====== #
lda = LinearDiscriminantAnalysis(n_components=NUM_DIM)
lda.fit(X_train, y_train)
X_train_lda = lda.transform(X_train)
X_score_lda = lda.transform(X_score)
# ====== plda ====== #
plda = PLDA(n_phi=NUM_DIM, random_state=SEED)
plda.fit(X_train, y_train)
X_train_plda = plda.predict_log_proba(X_train)
X_score_plda = plda.predict_log_proba(X_score)
# ====== gmm ====== #
gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008,
                   n_iter=8, verbose=2, random_state=SEED)
rbm.fit(X_train)
X_train_rbm = rbm.transform(X_train)
X_score_rbm = rbm.transform(X_score)
# ===========================================================================
# Deep Learning
# ===========================================================================

# ===========================================================================
# Visualize
# ===========================================================================
def plot(train, score, title, applying_pca=False):
Esempio n. 4
0
class GMM(GaussianMixture, BaseDetector):

    def __init__(self, n_components=1, covariance_type='full', tol=1e-3,
                 reg_covar=1e-6, max_iter=100, n_init=1, init_params='kmeans',
                 weights_init=None, means_init=None, precisions_init=None,
                 random_state=42, warm_start=False,
                 verbose=0, verbose_interval=10, contamination=0.1):
        """GMM

        Parameters
        ----------
        n_components : int, defaults to 1.
            The number of mixture components.

        covariance_type : {'full' (default), 'tied', 'diag', 'spherical'}

        tol : float, defaults to 1e-3.
            The convergence threshold. EM iterations will stop when the
            lower bound average gain is below this threshold.

        reg_covar : float, defaults to 1e-6.
            Non-negative regularization added to the diagonal of covariance.
            Allows to assure that the covariance matrices are all positive.

        max_iter : int, defaults to 100.
            The number of EM iterations to perform.

        n_init : int, defaults to 1.
            The number of initializations to perform. The best results are kept.

        init_params : {'kmeans', 'random'}, defaults to 'kmeans'.
            The method used to initialize the weights, the means and the precisions.


        weights_init : array-like, shape (n_components, ), optional
            The user-provided initial weights, defaults to None.

        means_init : array-like, shape (n_components, n_features), optional
            The user-provided initial means, defaults to None,

        precisions_init : array-like, optional.
            The user-provided initial precisions (inverse of the covariance matrices), defaults to None.


        warm_start : bool, default to False.
            If 'warm_start' is True, the solution of the last fitting is used as
            initialization for the next call of fit().

        verbose : int, default to 0.
            Enable verbose output. If 1 then it prints the current
            initialization and each iteration step. If greater than 1 then
            it prints also the log probability and the time needed
            for each step.

        verbose_interval : int, default to 10.
            Number of iteration done before the next print.

        contamination: float (default is 0.1)
             It's in range (0,1). A threshold used to decide the normal score (not used).

        """
        self.n_components = n_components
        self.covariance_type = covariance_type
        self.tol = tol
        self.reg_covar = reg_covar
        self.max_iter = max_iter
        self.n_init = n_init
        self.init_params = init_params
        self.weights_init = weights_init
        self.means_init = means_init
        self.precisions_init = precisions_init
        self.random_state = random_state
        self.warm_start = warm_start
        self.verbose = verbose
        self.verbose_interval = verbose_interval
        self.contamination = contamination

    def fit(self, X, y=None):
        """Fit the model. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = _check_X(X)
        self._set_n_classes(y)

        self.model_ = GaussianMixture(n_components=self.n_components,
                                      covariance_type=self.covariance_type,
                                      tol=self.tol,
                                      reg_covar=self.reg_covar,
                                      max_iter=self.max_iter,
                                      n_init=self.n_init,
                                      init_params=self.init_params,
                                      weights_init=self.weights_init,
                                      means_init=self.means_init,
                                      precisions_init=self.precisions_init,
                                      random_state=self.random_state,
                                      warm_start=self.warm_start,
                                      verbose=self.verbose,
                                      verbose_interval=self.verbose_interval)
        self.model_.fit(X=X, y=y)

        return self

    def decision_function(self, X):
        """Predict raw anomaly scores of X using the fitted detector.

        The anomaly score of an input sample is computed based on the fitted
        detector. For consistency, outliers are assigned with
        larger anomaly scores. so use invert_order

        After invert_order(): the higher score, the more probability of x that is predicted as abnormal

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        # check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
        return invert_order(logsumexp(self.model_._estimate_weighted_log_prob(X), axis=1))

    def predict_proba(self, X):
        raise NotImplementedError