def train_gmm(num_components, feats):
    # resp = np.zeros((len(feats), num_components))
    # kmeans = MiniBatchKMeans(n_clusters=num_components, batch_size=256, verbose=True, n_init=1)
    # labels = kmeans.fit(feats).labels_
    # resp[np.arange(len(feats)), labels] = 1

    gmm = GaussianMixture(n_components=num_components,
                          covariance_type='diag',
                          init_params='kmeans',
                          verbose=True)

    def _initialize_parameters(X, random_state):
        """Initialize the model parameters.
        Parameters
        ----------
        X : array-like, shape  (n_samples, n_features)
        random_state : RandomState
            A random number generator instance.
        """
        n_samples, _ = X.shape

        if gmm.init_params == 'kmeans':
            resp = np.zeros((n_samples, gmm.n_components))
            label = MiniBatchKMeans(n_clusters=gmm.n_components,
                                    batch_size=256,
                                    verbose=True,
                                    n_init=1,
                                    random_state=random_state).fit(X).labels_
            resp[np.arange(n_samples), label] = 1
        elif gmm.init_params == 'random':
            resp = random_state.rand(n_samples, gmm.n_components)
            resp /= resp.sum(axis=1)[:, np.newaxis]
        else:
            raise ValueError("Unimplemented initialization method '%s'" %
                             gmm.init_params)

        gmm._initialize(X, resp)

    gmm._initialize_parameters = _initialize_parameters
    # gmm._initialize(feats, resp)
    # gmm.converged_ = True
    gmm.fit(feats)
    return gmm
Esempio n. 2
0
    def _initialize_parameters(self,
                               Y,
                               Yerr,
                               random_state,
                               projection=None,
                               log_weight=None,
                               Yclass=None):
        """Initialize the model parameters.

        Parameters
        ----------
        Y: array_like, shape (n_samples, n_y_features)
            Input data.

        Yerr: array_like, shape (n_samples, n_y_features[, n_y_features])
            (Co)variances on input data.

        random_state : RandomState
            A random number generator instance.

        projection: array_like (optional), shape (n_samples, n_y_features, n_x_features)
            An optional projection matrix, especially useful when there are
            missing data.

        log_weight : array_like, shape (n_samples,)
            Optional log weights for the various points.

        Yclass : array_like, shape (n_samples, n_classes)
            Optional log probability for each point to belong to a given class.

        """
        if isinstance(self.init_params, XDGaussianMixture):
            parameters = list(p.copy()
                              for p in self.init_params._get_parameters())
            self._set_parameters(parameters)
        else:
            init_params = self.init_params
            if projection is not None:
                identity = np.zeros(shape=projection.shape[1:])
                j = range(min(identity.shape))
                identity[j, j] = 1
                mask = np.all(projection == identity, axis=(1, 2))
                n_x_features = projection.shape[2]
            else:
                mask = np.ones(self.n_samples_, dtype=np.bool)
                n_x_features = Y.shape[1]
            n_points = Y.shape[0]
            if log_weight is not None:
                mask &= np.log(np.random.rand(
                    log_weight.shape[0])) < log_weight
            if init_params == 'gmm':
                init_params = 'kmeans'
            if Yclass is None:
                Yclass = np.zeros((n_points, self.n_classes))
            # OK, now everything is just the same: we have classes and class probabilities
            self.weights_ = np.empty(self.n_components)
            self.classes_ = np.empty((self.n_components, self.n_classes))
            self.means_ = np.empty((self.n_components, n_x_features))
            self.covariances_ = np.empty(
                (self.n_components, n_x_features, n_x_features))
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                tmp_gmm = GaussianMixture(self.n_components,
                                          max_iter=30,
                                          covariance_type='full',
                                          init_params=init_params,
                                          random_state=self.random_state)
                if init_params != 'random':
                    if np.sum(mask) < self.n_components:
                        raise ValueError(
                            'Number of valid points smaller than number of components.'
                        )
                    tmp_gmm.fit(Y[mask])
                    resp = tmp_gmm.predict_proba(Y[mask])[:, :, np.newaxis] * \
                        np.exp(Yclass)[mask, np.newaxis, :]
                    xclass = np.sum(resp, axis=0)
                    xclass /= np.sum(xclass, axis=1)[:, np.newaxis]
                else:
                    tmp_gmm._initialize_parameters(Y[mask],
                                                   tmp_gmm.random_state)
                    xclass = np.zeros((self.n_components, self.n_classes)) - \
                        np.log(self.n_classes)
                self.means_ = tmp_gmm.means_
                self.classes_ = xclass
                self.weights_ = tmp_gmm.weights_
                self.covariances_ = tmp_gmm.covariances_
            # Renormalize the weights
            self.weights_ /= np.sum(self.weights_)
            self.n_features_ = n_x_features