def test_gmm_2_clusters_rand_init(): means, covars, priors, LL, posteriors = gmm(X, n_clusters=2) assert_allclose(LL, -4341.0, atol=0.1) assert_allclose(priors, [0.5, 0.5], atol=0.1) assert_allclose(posteriors[0], [0.0, 1.0], atol=0.1) assert_allclose(means, [[4, 4], [0, 0]], atol=0.2)
def generate_gmms(X, _clusters=64): means, covars, priors, ll, posteriors = gmm(X, n_clusters=_clusters, max_num_iterations=10000, verbose=False) model = { 'priors': priors, 'means': means, 'covars': covars, 'll': ll, 'posteriors': posteriors } return model
def fit(self, X, y=None): X = X.reshape(-1, X.shape[2]) if len(X) < self.gmm_samples_number: raise AttributeError( 'Number of samples must be greater than the number of GMM samples') indices = np.random.choice( X.shape[0], self.gmm_samples_number, replace=False) X = X[indices, :] means, covars, priors, ll, posteriors = gmm( X, n_clusters=self.gmm_clusters_number, init_mode=self.init_mode, ) means = means.transpose() covars = covars.transpose() self.gmm_ = (means, covars, priors) return self
def get_gmm(kps, descs, image_size): print("GMM started---") # descs = StandardScaler().fit_transform(descs) pca = PCA(n_components=64) descs = pca.fit_transform(descs) descs = embed_spatial_info(kps, descs, image_size) data_mcp = gmm(descs, n_clusters=512, init_mode='rand', max_num_iterations=100) means = data_mcp[0].transpose() covariance = data_mcp[1].transpose() priors = data_mcp[2] return means, covariance, priors
def test_gmm_2_clusters_custom_init_fail(): _ = gmm(X, n_clusters=2, init_mode='custom')
def fit(self, X, y, gmm_init='kmeans', svc_kernel='linear', svc_penalty='l2', C=1.0, seed=None): """ Fit a GMM with `k` clusters using the sample images `X`. Then train a SVC on on the Fisher vector encodings of `X`, given the class labels `y` Parameters ---------- X : array, shape (N,H,W,C), or list of N arrays w/ shapes (H_i,W_i,C) A set of training images from which to generate a sample of CNN feature vectors to which a GMM will be fit. `C` should match the `input_shape` of the CNN (must be 3 for ImageNet pretrained models). y : array, shape (N,) Array of class labels of images in `X`. gmm_init : str, optional Method to use for GMM initialization. One of {'kmeans', 'rand'}. Default = 'kmeans'. Custom init is also possible through `cyvlfeat`, but is not implemented here. svc_kernel : str, optional Specifies the kernel type to be used in the support vector classifier algorithm. It must be one of {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or a callable. If none or 'linear' is given, sklearn.svm.LinearSVC (liblinear) will be used, otherwise sklearn.svm.SVC (libsvm) will be used. (Actually, just LinearSVC for now.) The former has more flexible penalty/loss options, and scales better to large numbers of samples (> ~10,000). The latter obviously has more flexibility in kernel types. svc_penalty : str, optional One of {`l1`, `l2`}. Default='l2' is recommended. C : float, optional Inverse of regularization strength. Default=1.0 seems to work best in VGG+DTD tests. seed : int, optional Specify a random state for deterministic results. Default=None. Returns ------- train_score : float Mean accuracy of trained SVC on the training set (`X`, `y`) """ if isinstance(X, np.ndarray): assert X.ndim == 4, 'X must have shape (N,H,W,C) if an np.array' feats = self.cnn.predict(X) feats = feats.reshape(-1, feats[-1]) elif isinstance(X, list): assert isinstance( X[0], np.ndarray), 'X must contain numpy.ndarrays, if a list' img_feats = [self._localfeatures(x) for x in X] #print('(sample of) img_feats.shapes:', [i.shape for i in img_feats[0:5]]) feats = np.vstack(img_feats) #print('all_feats.shape :', feats.shape) else: raise ValueError( 'GMM input X has unknown form. Should be 4D array or list of 3D arrays.' ) # Fit the GMM # TODO: figure out covariance_bound Buffer bug # should be = to max(all_feats.var(axis=k_feat))*0.0001 print('Fitting GMM with %d clusters...' % self.k) self.means, self.covars, self.priors, LL, posteriors = gmm.gmm( feats, n_clusters=self.k, covariance_bound=None, init_mode=gmm_init) # Train the SVC if svc_kernel == 'linear': self.svc = LinearSVC(penalty=svc_penalty, C=C, class_weight='balanced', random_state=seed) else: raise NotImplementedError( 'Only `linear` svc_kernel implemented right now.') fv_X = self.encode_batch(img_feats) self.svc.fit(fv_X, y) return self.svc.score(fv_X, y)
def test_gmm_2_clusters_custom_init_fail(): with pytest.raises(ValueError): _ = gmm(X, n_clusters=2, init_mode='custom')