def test(file, max_n_components, n_classes): print('GaussianMixture for set: ' + file) dataset = utils.dataset_reader(file) X, y = utils.data_process(dataset) list_sse = [] list_nmi = [] for n_components in range(1, max_n_components + 1): gmm = GaussianMixture(n_components=n_components) gmm.fit(X) y_hat = gmm.predict(X) sse = utils.sum_of_squared_errors(X, y_hat, gmm.means_) nmi = utils.normalized_mutual_information(y, n_classes, y_hat, n_components) print('{0:2d} components, SSE: {1:.2f}, NMI: {2:.4f}'.format( n_components, sse, nmi)) # print('iterations: ', gmm.n_iter_) # print(gmm.means_, gmm.covariances_, gmm.weights_) # print(gmm.lower_bound_) list_sse.append(sse) list_nmi.append(nmi) utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_components + 1)) utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_components + 1))
def testPredictClasses(self): """ Assert that torch.FloatTensor is handled correctly. """ x = torch.randn(4, 2) n_components = np.random.randint(1, 100) model = GaussianMixture(n_components, x.size(1)) model.fit(x) y = model.predict(x) # check that dimensionality of class memberships is (n) self.assertEqual(torch.Tensor(x.size(0)).size(), y.size())
def testPredictProbabilities(self): """ Assert that torch.FloatTensor is handled correctly when returning class probabilities. """ x = torch.randn(4, 2) n_components = np.random.randint(1, 100) model = GaussianMixture(n_components, x.size(1)) model.fit(x) # check that y_p has dimensions (n, k) y_p = model.predict(x, probs=True) self.assertEqual( torch.Tensor(x.size(0), n_components).size(), y_p.size())
def main(): n, d = 300, 2 # generate some data points .. data = torch.Tensor(n, d).normal_() # .. and shift them around to non-standard Gaussians data[:n//2] -= 1 data[:n//2] *= sqrt(3) data[n//2:] += 1 data[n//2:] *= sqrt(2) # Next, the Gaussian mixture is instantiated and .. n_components = 2 model = GaussianMixture(n_components, d) model.fit(data) # .. used to predict the data points as they where shifted y = model.predict(data) plot(data, y)
# init_dataset_size = int(train_dataset.shape[0] * 0.005) # init_dataset = train_dataset[np.random.choice(train_dataset.shape[0], init_dataset_size, replace=False)] init_dataset = train_dataset model = GaussianMixture(X=init_dataset, n_components=args.components, random_state=seed, init_params=args.init) init_metrics = { 'aic': model.aic(train_dataset), 'bic': model.bic(train_dataset), 'll': model.score(train_dataset) } model.fit(train_dataset, args.epochs, train_dataset_labels, args, output_dir) predicted_labels = model.predict_proba(train_dataset).tolist() predicted_labels = np.array(predicted_labels) print('\nSaving images...') metrics = model.history_['metrics'] for key in metrics: metrics[key].insert(0, init_metrics[key]) plot_metric(metrics['ll'], args.epochs, output_dir, 'Epochs', 'Log-Likelihood') plot_metric(metrics['aic'], args.epochs, output_dir, 'Epochs', 'AIC') plot_metric(metrics['bic'], args.epochs, output_dir, 'Epochs', 'BIC')