Example #1
0
def test_gmm_2_clusters_rand_init():
    means, covars, priors, LL, posteriors = gmm(X, n_clusters=2)

    assert_allclose(LL, -4341.0, atol=0.1)
    assert_allclose(priors, [0.5, 0.5], atol=0.1)
    assert_allclose(posteriors[0], [0.0, 1.0], atol=0.1)
    assert_allclose(means, [[4, 4], [0, 0]], atol=0.2)
Example #2
0
def test_gmm_2_clusters_rand_init():
    means, covars, priors, LL, posteriors = gmm(X, n_clusters=2)

    assert_allclose(LL, -4341.0, atol=0.1)
    assert_allclose(priors, [0.5, 0.5], atol=0.1)
    assert_allclose(posteriors[0], [0.0, 1.0], atol=0.1)
    assert_allclose(means, [[4, 4], [0, 0]], atol=0.2)
Example #3
0
def generate_gmms(X, _clusters=64):
    means, covars, priors, ll, posteriors = gmm(X,
                                                n_clusters=_clusters,
                                                max_num_iterations=10000,
                                                verbose=False)

    model = {
        'priors': priors,
        'means': means,
        'covars': covars,
        'll': ll,
        'posteriors': posteriors
    }

    return model
 def fit(self, X, y=None):
     X = X.reshape(-1, X.shape[2])
     if len(X) < self.gmm_samples_number:
         raise AttributeError(
             'Number of samples must be greater than the number of GMM samples')
     indices = np.random.choice(
         X.shape[0], self.gmm_samples_number, replace=False)
     X = X[indices, :]
     means, covars, priors, ll, posteriors = gmm(
         X,
         n_clusters=self.gmm_clusters_number,
         init_mode=self.init_mode,
     )
     means = means.transpose()
     covars = covars.transpose()
     self.gmm_ = (means, covars, priors)
     return self
Example #5
0
def get_gmm(kps, descs, image_size):
    print("GMM started---")

    # descs = StandardScaler().fit_transform(descs)

    pca = PCA(n_components=64)
    descs = pca.fit_transform(descs)

    descs = embed_spatial_info(kps, descs, image_size)

    data_mcp = gmm(descs,
                   n_clusters=512,
                   init_mode='rand',
                   max_num_iterations=100)

    means = data_mcp[0].transpose()
    covariance = data_mcp[1].transpose()
    priors = data_mcp[2]

    return means, covariance, priors
Example #6
0
def test_gmm_2_clusters_custom_init_fail():
    _ = gmm(X, n_clusters=2, init_mode='custom')
Example #7
0
    def fit(self,
            X,
            y,
            gmm_init='kmeans',
            svc_kernel='linear',
            svc_penalty='l2',
            C=1.0,
            seed=None):
        """
        Fit a GMM with `k` clusters using the sample images `X`. Then train a SVC on
        on the Fisher vector encodings of `X`, given the class labels `y`

        Parameters
        ----------
        X : array, shape (N,H,W,C), or list of N arrays w/ shapes (H_i,W_i,C)
            A set of training images from which to generate a sample of CNN
            feature vectors to which a GMM will be fit. `C` should match the
            `input_shape` of the CNN (must be 3 for ImageNet pretrained models).
        y : array, shape (N,)
            Array of class labels of images in `X`.
        gmm_init : str, optional
            Method to use for GMM initialization. One of {'kmeans', 'rand'}. Default = 'kmeans'.
            Custom init is also possible through `cyvlfeat`, but is not implemented here.
        svc_kernel : str, optional
            Specifies the kernel type to be used in the support vector classifier algorithm.
            It must be one of {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or a callable.
            If none or 'linear' is given, sklearn.svm.LinearSVC (liblinear) will be used,
            otherwise sklearn.svm.SVC (libsvm) will be used. (Actually, just LinearSVC for now.)
            The former has more flexible penalty/loss options, and scales better to large numbers of
            samples (> ~10,000). The latter obviously has more flexibility in kernel types.
        svc_penalty : str, optional
            One of {`l1`, `l2`}. Default='l2' is recommended.
        C : float, optional
            Inverse of regularization strength. Default=1.0 seems to work best in VGG+DTD tests.
        seed : int, optional
            Specify a random state for deterministic results. Default=None.

        Returns
        -------
        train_score : float
            Mean accuracy of trained SVC on the training set (`X`, `y`)
        """
        if isinstance(X, np.ndarray):
            assert X.ndim == 4, 'X must have shape (N,H,W,C) if an np.array'
            feats = self.cnn.predict(X)
            feats = feats.reshape(-1, feats[-1])
        elif isinstance(X, list):
            assert isinstance(
                X[0], np.ndarray), 'X must contain numpy.ndarrays, if a list'
            img_feats = [self._localfeatures(x) for x in X]
            #print('(sample of) img_feats.shapes:', [i.shape for i in img_feats[0:5]])
            feats = np.vstack(img_feats)
            #print('all_feats.shape :', feats.shape)
        else:
            raise ValueError(
                'GMM input X has unknown form. Should be 4D array or list of 3D arrays.'
            )

        # Fit the GMM
        # TODO: figure out covariance_bound Buffer bug
        #       should be = to max(all_feats.var(axis=k_feat))*0.0001
        print('Fitting GMM with %d clusters...' % self.k)
        self.means, self.covars, self.priors, LL, posteriors = gmm.gmm(
            feats,
            n_clusters=self.k,
            covariance_bound=None,
            init_mode=gmm_init)
        # Train the SVC
        if svc_kernel == 'linear':
            self.svc = LinearSVC(penalty=svc_penalty,
                                 C=C,
                                 class_weight='balanced',
                                 random_state=seed)
        else:
            raise NotImplementedError(
                'Only `linear` svc_kernel implemented right now.')

        fv_X = self.encode_batch(img_feats)

        self.svc.fit(fv_X, y)

        return self.svc.score(fv_X, y)
Example #8
0
def test_gmm_2_clusters_custom_init_fail():
    _ = gmm(X, n_clusters=2, init_mode='custom')
Example #9
0
def test_gmm_2_clusters_custom_init_fail():
    with pytest.raises(ValueError):
        _ = gmm(X, n_clusters=2, init_mode='custom')