def test_regularisation(): # We train the GaussianMixture on degenerate data by defining two clusters # of a 0 covariance. rng = np.random.RandomState(0) n_samples, n_features = 10, 5 X = np.vstack((np.ones( (n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features)))) for covar_type in COVARIANCE_TYPE: gmm = GaussianMixture(n_components=n_samples, reg_covar=0, covariance_type=covar_type, random_state=rng) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) assert_raise_message( ValueError, "Fitting the mixture model failed because " "some components have ill-defined empirical " "covariance (for instance caused by " "singleton or collapsed samples). Try to " "decrease the number of components, or " "increase reg_covar.", gmm.fit, X) gmm.set_params(reg_covar=1e-6).fit(X)
def test_gaussian_mixture_fit_predict_n_init(): # Check that fit_predict is equivalent to fit.predict, when n_init > 1 X = np.random.RandomState(0).randn(1000, 5) gm = GaussianMixture(n_components=5, n_init=5, random_state=0) y_pred1 = gm.fit_predict(X) y_pred2 = gm.predict(X) assert_array_equal(y_pred1, y_pred2)
def test_monotonic_likelihood(): # We check that each step of the EM without regularization improve # monotonically the training set likelihood rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] gmm = GaussianMixture(n_components=n_components, covariance_type=covar_type, reg_covar=0, warm_start=True, max_iter=1, random_state=rng, tol=1e-7) current_log_likelihood = -np.infty with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) # Do one training iteration at a time so we can make sure that the # training log likelihood increases after each iteration. for _ in range(600): prev_log_likelihood = current_log_likelihood try: current_log_likelihood = gmm.fit(X).score(X) except ConvergenceWarning: pass assert_greater_equal(current_log_likelihood, prev_log_likelihood) if gmm.converged_: break assert gmm.converged_
def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message( NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert_greater(gmm2.score(X), gmm1.score(X))
def create_mask(img, background_probability=0.75, use_triangle=False, use_otsu=False): test_mask = None if use_triangle: test_mask = sitk.GetArrayFromImage(sitk.TriangleThreshold(img, 0, 1)) elif use_otsu: test_mask = sitk.GetArrayFromImage(sitk.OtsuThreshold(img, 0, 1)) else: if type(img) is sitk.SimpleITK.Image: img = sitk.GetArrayFromImage(img) gmix = GaussianMixture(n_components=3, covariance_type='full', init_params='kmeans', verbose=0) gmix.fit(img.ravel().reshape(-1, 1)) covariances = gmix.covariances_ mean_background = gmix.means_.min() covariance_background = covariances[np.where( gmix.means_ == mean_background ) ][0][0] z_score = st.norm.ppf(background_probability) threshold = z_score * np.sqrt(covariance_background) + mean_background test_mask = (img > threshold) eroded_im = morphology.opening(test_mask, selem=morphology.ball(2)) connected_comp = skimage.measure.label(eroded_im) out = skimage.measure.regionprops(connected_comp) area_max = 0.0 idx_max = 0 for i in range(len(out)): if out[i].area > area_max: area_max = out[i].area idx_max = i+1 connected_comp[ connected_comp != idx_max ] = 0 mask = connected_comp mask_sitk = sitk.GetImageFromArray(mask) mask_sitk.CopyInformation(img) return mask_sitk
def test_gaussian_mixture_estimate_log_prob_resp(): # test whether responsibilities are normalized rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=5) n_samples = rand_data.n_samples n_features = rand_data.n_features n_components = rand_data.n_components X = rng.rand(n_samples, n_features) for covar_type in COVARIANCE_TYPE: weights = rand_data.weights means = rand_data.means precisions = rand_data.precisions[covar_type] g = GaussianMixture(n_components=n_components, random_state=rng, weights_init=weights, means_init=means, precisions_init=precisions, covariance_type=covar_type) g.fit(X) resp = g.predict_proba(X) assert_array_almost_equal(resp.sum(axis=1), np.ones(n_samples)) assert_array_equal(g.weights_init, weights) assert_array_equal(g.means_init, means) assert_array_equal(g.precisions_init, precisions)
def test_monotonic_likelihood(): # We check that each step of the EM without regularization improve # monotonically the training set likelihood rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] gmm = GaussianMixture(n_components=n_components, covariance_type=covar_type, reg_covar=0, warm_start=True, max_iter=1, random_state=rng, tol=1e-7) current_log_likelihood = -np.infty with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) # Do one training iteration at a time so we can make sure that the # training log likelihood increases after each iteration. for _ in range(600): prev_log_likelihood = current_log_likelihood try: current_log_likelihood = gmm.fit(X).score(X) except ConvergenceWarning: pass assert_greater_equal(current_log_likelihood, prev_log_likelihood) if gmm.converged_: break assert_true(gmm.converged_)
def macro_clusters(self, n_clusters, max_iters=1000, **kwargs): data = np.concatenate([ m.sample(np.int32(np.ceil(m.n / 100))) for m in self.micro_clusters ]) gmm = GaussianMixture(n_components=n_clusters) gmm.fit(data) self.macro_centroids = gmm.means_ self.gmm_model = gmm
def test_gaussian_mixture_fit(): # recover the ground truth rng = np.random.RandomState(0) rand_data = RandomData(rng) n_features = rand_data.n_features n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=20, reg_covar=0, random_state=rng, covariance_type=covar_type) g.fit(X) # needs more data to pass the test with rtol=1e-7 assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights), rtol=0.1, atol=1e-2) arg_idx1 = g.means_[:, 0].argsort() arg_idx2 = rand_data.means[:, 0].argsort() assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2], rtol=0.1, atol=1e-2) if covar_type == 'full': prec_pred = g.precisions_ prec_test = rand_data.precisions['full'] elif covar_type == 'tied': prec_pred = np.array([g.precisions_] * n_components) prec_test = np.array([rand_data.precisions['tied']] * n_components) elif covar_type == 'spherical': prec_pred = np.array( [np.eye(n_features) * c for c in g.precisions_]) prec_test = np.array([ np.eye(n_features) * c for c in rand_data.precisions['spherical'] ]) elif covar_type == 'diag': prec_pred = np.array([np.diag(d) for d in g.precisions_]) prec_test = np.array( [np.diag(d) for d in rand_data.precisions['diag']]) arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort() arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort() for k, h in zip(arg_idx1, arg_idx2): ecov = EmpiricalCovariance() ecov.covariance_ = prec_test[h] # the accuracy depends on the number of data and randomness, rng assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_gaussian_mixture_n_parameters(): # Test that the right number of parameters is estimated rng = np.random.RandomState(0) n_samples, n_features, n_components = 50, 5, 2 X = rng.randn(n_samples, n_features) n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41} for cv_type in COVARIANCE_TYPE: g = GaussianMixture( n_components=n_components, covariance_type=cv_type, random_state=rng).fit(X) assert_equal(g._n_parameters(), n_params[cv_type])
def main(): #Import data from file trainData, trainLabel = importTrainData() testData, testLabel = importTestData() #PCA procedure #For train data rawlowDivTrainData = [] pca = PCA(n_components=537) #pca = PCA(n_components=537, whiten="True") rawlowDivTrainData = pca.fit_transform(trainData) #For test data which sum up to 10000 lowDivTestData = pca.fit_transform(testData) #Classify manually, divide the data into 10 parts by labels numTrainData = [[], [], [], [], [], [], [], [], [], []] for i in range(60000): numTrainData[trainLabel[i]].append(rawlowDivTrainData[i]) #Train Gmm for every mode/Each mode represent a number. myGmms = [] #GMM procedure for num in range(10): print("GMM", num) gmmClassifier = GaussianMixture(n_components=25, init_params='kmeans', max_iter=1000) gmmClassifier.fit(numTrainData[num]) myGmms.append(gmmClassifier) #Validate # We need to calculate all the total probability under each GMM, and choose the biggest one. correctCount = 0 allProb = [] for num in range(10): allProb.append(np.array(myGmms[num].score_samples(lowDivTestData))) allProb = np.array(allProb) #If the result seem strange, print the predict label for i in range(10000): predictLabel = 0 max = np.max(allProb[:, i]) for num in range(10): if max == allProb[num, i]: predictLabel = num if predictLabel == testLabel[i]: correctCount += 1 #Output print("The correct rate is:", correctCount / 10000)
def test_gaussian_mixture_n_parameters(): # Test that the right number of parameters is estimated rng = np.random.RandomState(0) n_samples, n_features, n_components = 50, 5, 2 X = rng.randn(n_samples, n_features) n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41} for cv_type in COVARIANCE_TYPE: g = GaussianMixture(n_components=n_components, covariance_type=cv_type, random_state=rng).fit(X) assert_equal(g._n_parameters(), n_params[cv_type])
def test_init(): # We check that by increasing the n_init number we have a better solution for random_state in range(25): rand_data = RandomData(np.random.RandomState(random_state), scale=1) n_components = rand_data.n_components X = rand_data.X['full'] gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, random_state=random_state).fit(X) gmm2 = GaussianMixture(n_components=n_components, n_init=10, max_iter=1, random_state=random_state).fit(X) assert gmm2.lower_bound_ >= gmm1.lower_bound_
def test_multiple_init(): # Test that multiple inits does not much worse than a single one rng = np.random.RandomState(0) n_samples, n_features, n_components = 50, 5, 2 X = rng.randn(n_samples, n_features) for cv_type in COVARIANCE_TYPE: train1 = GaussianMixture(n_components=n_components, covariance_type=cv_type, random_state=rng).fit(X).score(X) train2 = GaussianMixture(n_components=n_components, covariance_type=cv_type, random_state=rng, n_init=5).fit(X).score(X) assert_greater_equal(train2, train1)
def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message(NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert_greater(gmm2.score(X), gmm1.score(X))
def test_bic_1d_1component(): # Test all of the covariance_types return the same BIC score for # 1-dimensional, 1 component fits. rng = np.random.RandomState(0) n_samples, n_dim, n_components = 100, 1, 1 X = rng.randn(n_samples, n_dim) bic_full = GaussianMixture(n_components=n_components, covariance_type='full', random_state=rng).fit(X).bic(X) for covariance_type in ['tied', 'diag', 'spherical']: bic = GaussianMixture(n_components=n_components, covariance_type=covariance_type, random_state=rng).fit(X).bic(X) assert_almost_equal(bic_full, bic)
def test_gaussian_mixture_verbose(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type, verbose=1) h = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type, verbose=2) old_stdout = sys.stdout sys.stdout = StringIO() try: g.fit(X) h.fit(X) finally: sys.stdout = old_stdout
def cluster(clusterType, vectors, y): if (clusterType == "KMeans"): kclusterer = KMeansClusterer( NUM_CLUSTERS, distance=nltk.cluster.util.cosine_distance, repeats=25) assigned_clusters = kclusterer.cluster(vectors, assign_clusters=True) elif (clusterType == "GMM"): GMM = GaussianMixture(n_components=NUM_CLUSTERS) assigned_clusters = GMM.fit_predict(vectors) elif (clusterType == "SVM"): classifier = SVC(kernel='rbf', gamma='auto', random_state=0) #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) elif (clusterType == "T2VH"): ret = hierarchical.ward_tree(vectors, n_clusters=NUM_CLUSTERS) children = ret[0] n_leaves = ret[2] assigned_clusters = hierarchical._hc_cut(NUM_CLUSTERS, children, n_leaves) elif (clusterType == "RandomForest"): classifier = RandomForestClassifier() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) elif (clusterType == "DecisionTree"): classifier = DecisionTreeClassifier() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) elif (clusterType == "LogisticRegression"): classifier = sklearn.linear_model.LogisticRegression() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) else: print(clusterType, " is not a predefined cluster type.") return return assigned_clusters
def test_sample(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7, n_components=3) n_features, n_components = rand_data.n_features, rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] gmm = GaussianMixture(n_components=n_components, covariance_type=covar_type, random_state=rng) # To sample we need that GaussianMixture is fitted assert_raise_message(NotFittedError, "This GaussianMixture instance " "is not fitted", gmm.sample, 0) gmm.fit(X) assert_raise_message(ValueError, "Invalid value for 'n_samples", gmm.sample, 0) # Just to make sure the class samples correctly n_samples = 20000 X_s, y_s = gmm.sample(n_samples) for k in range(n_components): if covar_type == 'full': assert_array_almost_equal(gmm.covariances_[k], np.cov(X_s[y_s == k].T), decimal=1) elif covar_type == 'tied': assert_array_almost_equal(gmm.covariances_, np.cov(X_s[y_s == k].T), decimal=1) elif covar_type == 'diag': assert_array_almost_equal(gmm.covariances_[k], np.diag(np.cov(X_s[y_s == k].T)), decimal=1) else: assert_array_almost_equal( gmm.covariances_[k], np.var(X_s[y_s == k] - gmm.means_[k]), decimal=1) means_s = np.array([np.mean(X_s[y_s == k], 0) for k in range(n_components)]) assert_array_almost_equal(gmm.means_, means_s, decimal=1) # Check shapes of sampled data, see # https://github.com/scikit-learn/scikit-learn/issues/7701 assert_equal(X_s.shape, (n_samples, n_features)) for sample_size in range(1, 100): X_s, _ = gmm.sample(sample_size) assert_equal(X_s.shape, (sample_size, n_features))
def spawn(self, sess, dataset, num_data=None): """ Run through the computational graph with a dataset to create an agent """ # 1. Train the latent classifier print('Step 1...') dataset_string = sess.run(dataset.repeat(1000).batch(self.batch_size).make_one_shot_iterator().string_handle()) try: while True: sess.run(self.latent_train_step, feed_dict={self.handle: dataset_string}) except tf.errors.OutOfRangeError: pass # 2. Train the GMM print('Step 2...') dataset_string = sess.run(dataset.batch(num_data).make_one_shot_iterator().string_handle()) _, z_mean, _, _ = sess.run(self.iterator.get_next(), feed_dict={self.handle: dataset_string}) gmm = GaussianMixture(n_components=self.num_pattern, covariance_type='full').fit(z_mean) means_ = gmm.means_.astype(np.float32) scales_ = self.scale_to_unconstrained.forward(np.linalg.cholesky(gmm.covariances_).astype(np.float32)) sess.run([self.means.assign(means_), self.scales_unconstrained.assign(scales_)], feed_dict={self.handle: dataset_string}) # 3. Compute S_labels_patterns print('Step 3...') S_label_pattern_ = sess.run(self.S_label_pattern) patterns = (means_, gmm.covariances_) return Agent(sess, patterns, S_label_pattern_)
def test_convergence_detected_with_warm_start(): # We check that convergence is detected when warm_start=True rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components X = rand_data.X['full'] for max_iter in (1, 2, 50): gmm = GaussianMixture(n_components=n_components, warm_start=True, max_iter=max_iter, random_state=rng) for _ in range(100): gmm.fit(X) if gmm.converged_: break assert gmm.converged_ assert max_iter >= gmm.n_iter_
def test_gaussian_mixture_fit(): # recover the ground truth rng = np.random.RandomState(0) rand_data = RandomData(rng) n_features = rand_data.n_features n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=20, reg_covar=0, random_state=rng, covariance_type=covar_type) g.fit(X) # needs more data to pass the test with rtol=1e-7 assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights), rtol=0.1, atol=1e-2) arg_idx1 = g.means_[:, 0].argsort() arg_idx2 = rand_data.means[:, 0].argsort() assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2], rtol=0.1, atol=1e-2) if covar_type == 'full': prec_pred = g.precisions_ prec_test = rand_data.precisions['full'] elif covar_type == 'tied': prec_pred = np.array([g.precisions_] * n_components) prec_test = np.array([rand_data.precisions['tied']] * n_components) elif covar_type == 'spherical': prec_pred = np.array([np.eye(n_features) * c for c in g.precisions_]) prec_test = np.array([np.eye(n_features) * c for c in rand_data.precisions['spherical']]) elif covar_type == 'diag': prec_pred = np.array([np.diag(d) for d in g.precisions_]) prec_test = np.array([np.diag(d) for d in rand_data.precisions['diag']]) arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort() arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort() for k, h in zip(arg_idx1, arg_idx2): ecov = EmpiricalCovariance() ecov.covariance_ = prec_test[h] # the accuracy depends on the number of data and randomness, rng assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_score_samples(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message(NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm.score_samples, X) gmm_score_samples = gmm.fit(X).score_samples(X) assert_equal(gmm_score_samples.shape[0], rand_data.n_samples)
def test_gaussian_mixture_fit_predict(): rng = np.random.RandomState(0) rand_data = RandomData(rng) for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y g = GaussianMixture(n_components=rand_data.n_components, random_state=rng, weights_init=rand_data.weights, means_init=rand_data.means, precisions_init=rand_data.precisions[covar_type], covariance_type=covar_type) # check if fit_predict(X) is equivalent to fit(X).predict(X) f = copy.deepcopy(g) Y_pred1 = f.fit(X).predict(X) Y_pred2 = g.fit_predict(X) assert_array_equal(Y_pred1, Y_pred2) assert_greater(adjusted_rand_score(Y, Y_pred2), .95)
def test_gaussian_mixture_fit_best_params(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components n_init = 10 for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type) ll = [] for _ in range(n_init): g.fit(X) ll.append(g.score(X)) ll = np.array(ll) g_best = GaussianMixture(n_components=n_components, n_init=n_init, reg_covar=0, random_state=rng, covariance_type=covar_type) g_best.fit(X) assert_almost_equal(ll.min(), g_best.score(X))
def test_property(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] gmm = GaussianMixture(n_components=n_components, covariance_type=covar_type, random_state=rng, n_init=5) gmm.fit(X) if covar_type == 'full': for prec, covar in zip(gmm.precisions_, gmm.covariances_): assert_array_almost_equal(linalg.inv(prec), covar) elif covar_type == 'tied': assert_array_almost_equal(linalg.inv(gmm.precisions_), gmm.covariances_) else: assert_array_almost_equal(gmm.precisions_, 1. / gmm.covariances_)
def test_check_means(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components, n_features = rand_data.n_components, rand_data.n_features X = rand_data.X['full'] g = GaussianMixture(n_components=n_components) # Check means bad shape means_bad_shape = rng.rand(n_components + 1, n_features) g.means_init = means_bad_shape assert_raise_message(ValueError, "The parameter 'means' should have the shape of ", g.fit, X) # Check good means matrix means = rand_data.means g.means_init = means g.fit(X) assert_array_equal(means, g.means_init)
class ScikitLL(LikelihoodEvaluator): """ Fastest Single Core Version so far! """ def __init__(self, Xpoints, numMixtures): super().__init__(Xpoints, numMixtures) self.evaluator = GaussianMixture(numMixtures, 'diag') self.Xpoints = Xpoints self.evaluator.fit(Xpoints) def __str__(self): return "SciKit's learn implementation Implementation" def loglikelihood(self, means, diagCovs, weights): self.evaluator.weights_ = weights self.evaluator.covariances_ = diagCovs self.evaluator.means_ = means self.evaluator.precisions_cholesky_ = _compute_precision_cholesky( diagCovs, "diag") return self.numPoints * np.sum(self.evaluator.score(self.Xpoints))
def _initialize_from_json(self, data): self.name = data["name"] self.spatial_coefs = np.array(data["spatial_coeffs"]) self.knots = np.array(data["knots"]) self.n_canonical_frames = data["n_canonical_frames"] self.time_function = np.array(list(range(self.n_canonical_frames))) self.motion_spline = MotionSpline(self.spatial_coefs, self.time_function, self.knots, None) self.gmm = GaussianMixture(n_components=1, covariance_type='full') #self.gmm.fit([0]) if "skeleton" in data: self.animated_joints = data["skeleton"]["animated_joints"]
def fuse(self, agent1, agent2): gmm = GaussianMixture(n_components=self.num_pattern).fit(np.concatenate((agent1.patterns[0], agent2.patterns[0]))) s1 = agent1.S_label_pattern s2 = agent2.S_label_pattern idx1 = gmm.predict(agent1.patterns[0]) idx2 = gmm.predict(agent2.patterns[0]) s = np.ones((self.num_pattern, 10)) for j in range(self.num_pattern): i1 = np.argwhere(idx1 == j)[:, 0] for i in i1: s[j, :] *= s1[i, :] i2 = np.argwhere(idx2 == j)[:, 0] for i in i2: s[j, :] *= s2[i, :] normalization_const = np.sum(s, axis=1, keepdims=True) # normalization_const = np.reshape(normalization_const, (self.num_pattern, 10)) normalization_const = np.tile(normalization_const, (1, 10)) s /= normalization_const return Agent(agent1.sess, (gmm.means_, gmm.covariances_), s)
def get_3d_grid_gmm(subdivisions: Tuple[int, int, int] = (5, 5, 5), variance: float = 0.04) -> GaussianMixture: """ Compute the weight, mean and covariance of a gmm placed on a 3D grid :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid :param variance: scalar for spherical gmm.p :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model """ n_gaussians = np.prod(np.array(subdivisions)) step = [ 1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]), 1.0 / (subdivisions[2]) ] means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]), step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]), step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])] means = np.reshape(means, [3, -1]).T covariances = variance * np.ones_like(means) weights = (1.0 / n_gaussians) * np.ones(n_gaussians) gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag') gmm.weights_ = weights gmm.covariances_ = covariances gmm.means_ = means gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag') return gmm
def test_check_precisions(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components, n_features = rand_data.n_components, rand_data.n_features # Define the bad precisions for each covariance_type precisions_bad_shape = { 'full': np.ones((n_components + 1, n_features, n_features)), 'tied': np.ones((n_features + 1, n_features + 1)), 'diag': np.ones((n_components + 1, n_features)), 'spherical': np.ones((n_components + 1)) } # Define not positive-definite precisions precisions_not_pos = np.ones((n_components, n_features, n_features)) precisions_not_pos[0] = np.eye(n_features) precisions_not_pos[0, 0, 0] = -1. precisions_not_positive = { 'full': precisions_not_pos, 'tied': precisions_not_pos[0], 'diag': -1. * np.ones((n_components, n_features)), 'spherical': -1. * np.ones(n_components) } not_positive_errors = { 'full': 'symmetric, positive-definite', 'tied': 'symmetric, positive-definite', 'diag': 'positive', 'spherical': 'positive' } for covar_type in COVARIANCE_TYPE: X = RandomData(rng).X[covar_type] g = GaussianMixture(n_components=n_components, covariance_type=covar_type, random_state=rng) # Check precisions with bad shapes g.precisions_init = precisions_bad_shape[covar_type] assert_raise_message( ValueError, "The parameter '%s precision' should have " "the shape of" % covar_type, g.fit, X) # Check not positive precisions g.precisions_init = precisions_not_positive[covar_type] assert_raise_message( ValueError, "'%s precision' should be %s" % (covar_type, not_positive_errors[covar_type]), g.fit, X) # Check the correct init of precisions_init g.precisions_init = rand_data.precisions[covar_type] g.fit(X) assert_array_equal(rand_data.precisions[covar_type], g.precisions_init)
def test_sample(): rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_features, n_components = rand_data.n_features, rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] gmm = GaussianMixture(n_components=n_components, covariance_type=covar_type, random_state=rng) # To sample we need that GaussianMixture is fitted assert_raise_message(NotFittedError, "This GaussianMixture instance " "is not fitted", gmm.sample, 0) gmm.fit(X) assert_raise_message(ValueError, "Invalid value for 'n_samples", gmm.sample, 0) # Just to make sure the class samples correctly X_s, y_s = gmm.sample(20000) for k in range(n_features): if covar_type == 'full': assert_array_almost_equal(gmm.covariances_[k], np.cov(X_s[y_s == k].T), decimal=1) elif covar_type == 'tied': assert_array_almost_equal(gmm.covariances_, np.cov(X_s[y_s == k].T), decimal=1) elif covar_type == 'diag': assert_array_almost_equal(gmm.covariances_[k], np.diag(np.cov(X_s[y_s == k].T)), decimal=1) else: assert_array_almost_equal( gmm.covariances_[k], np.var(X_s[y_s == k] - gmm.means_[k]), decimal=1) means_s = np.array([np.mean(X_s[y_s == k], 0) for k in range(n_features)]) assert_array_almost_equal(gmm.means_, means_s, decimal=1)
def test_check_weights(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components X = rand_data.X['full'] g = GaussianMixture(n_components=n_components) # Check bad shape weights_bad_shape = rng.rand(n_components, 1) g.weights_init = weights_bad_shape assert_raise_message(ValueError, "The parameter 'weights' should have the shape of " "(%d,), " "but got %s" % (n_components, str(weights_bad_shape.shape)), g.fit, X) # Check bad range weights_bad_range = rng.rand(n_components) + 1 g.weights_init = weights_bad_range assert_raise_message(ValueError, "The parameter 'weights' should be in the range " "[0, 1], but got max value %.5f, min value %.5f" % (np.min(weights_bad_range), np.max(weights_bad_range)), g.fit, X) # Check bad normalization weights_bad_norm = rng.rand(n_components) weights_bad_norm = weights_bad_norm / (weights_bad_norm.sum() + 1) g.weights_init = weights_bad_norm assert_raise_message(ValueError, "The parameter 'weights' should be normalized, " "but got sum(weights) = %.5f" % np.sum(weights_bad_norm), g.fit, X) # Check good weights matrix weights = rand_data.weights g = GaussianMixture(weights_init=weights, n_components=n_components) g.fit(X) assert_array_equal(weights, g.weights_init)
def test_regularisation(): # We train the GaussianMixture on degenerate data by defining two clusters # of a 0 covariance. rng = np.random.RandomState(0) n_samples, n_features = 10, 5 X = np.vstack((np.ones((n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features)))) for covar_type in COVARIANCE_TYPE: gmm = GaussianMixture(n_components=n_samples, reg_covar=0, covariance_type=covar_type, random_state=rng) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) assert_raise_message(ValueError, "The algorithm has diverged because of too " "few samples per components. " "Try to decrease the number of components, " "or increase reg_covar.", gmm.fit, X) gmm.set_params(reg_covar=1e-6).fit(X)
def test_check_covariances(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components, n_features = rand_data.n_components, rand_data.n_features # Define the bad covariances for each covariance_type covariances_bad_shape = { 'full': rng.rand(n_components + 1, n_features, n_features), 'tied': rng.rand(n_features + 1, n_features + 1), 'diag': rng.rand(n_components + 1, n_features), 'spherical': rng.rand(n_components + 1)} # Define not positive-definite covariances covariances_not_pos = rng.rand(n_components, n_features, n_features) covariances_not_pos[0] = np.eye(n_features) covariances_not_pos[0, 0, 0] = -1. covariances_not_positive = { 'full': covariances_not_pos, 'tied': covariances_not_pos[0], 'diag': -1. * np.ones((n_components, n_features)), 'spherical': -1. * np.ones(n_components)} not_positive_errors = { 'full': 'symmetric, positive-definite', 'tied': 'symmetric, positive-definite', 'diag': 'positive', 'spherical': 'positive'} for cov_type in ['full', 'tied', 'diag', 'spherical']: X = rand_data.X[cov_type] g = GaussianMixture(n_components=n_components, covariance_type=cov_type) # Check covariance with bad shapes g.covariances_init = covariances_bad_shape[cov_type] assert_raise_message(ValueError, "The parameter '%s covariance' should have " "the shape of" % cov_type, g.fit, X) # Check not positive covariances g.covariances_init = covariances_not_positive[cov_type] assert_raise_message(ValueError, "'%s covariance' should be %s" % (cov_type, not_positive_errors[cov_type]), g.fit, X) # Check the correct init of covariances_init g.covariances_init = rand_data.covariances[cov_type] g.fit(X) assert_array_equal(rand_data.covariances[cov_type], g.covariances_init)
def test_gaussian_mixture_predict_predict_proba(): rng = np.random.RandomState(0) rand_data = RandomData(rng) for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y g = GaussianMixture(n_components=rand_data.n_components, random_state=rng, weights_init=rand_data.weights, means_init=rand_data.means, precisions_init=rand_data.precisions[covar_type], covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message(NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", g.predict, X) g.fit(X) Y_pred = g.predict(X) Y_pred_proba = g.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert_greater(adjusted_rand_score(Y, Y_pred), .95)
def test_regularisation(): # We train the GaussianMixture on degenerate data by defining two clusters # of a 0 covariance. rng = np.random.RandomState(0) n_samples, n_features = 10, 5 X = np.vstack((np.ones((n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features)))) for covar_type in COVARIANCE_TYPE: gmm = GaussianMixture(n_components=n_samples, reg_covar=0, covariance_type=covar_type, random_state=rng) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) assert_raise_message(ValueError, "Fitting the mixture model failed because " "some components have ill-defined empirical " "covariance (for instance caused by " "singleton or collapsed samples). Try to " "decrease the number of components, or " "increase reg_covar.", gmm.fit, X) gmm.set_params(reg_covar=1e-6).fit(X)
def test_check_precisions(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components, n_features = rand_data.n_components, rand_data.n_features # Define the bad precisions for each covariance_type precisions_bad_shape = { 'full': np.ones((n_components + 1, n_features, n_features)), 'tied': np.ones((n_features + 1, n_features + 1)), 'diag': np.ones((n_components + 1, n_features)), 'spherical': np.ones((n_components + 1))} # Define not positive-definite precisions precisions_not_pos = np.ones((n_components, n_features, n_features)) precisions_not_pos[0] = np.eye(n_features) precisions_not_pos[0, 0, 0] = -1. precisions_not_positive = { 'full': precisions_not_pos, 'tied': precisions_not_pos[0], 'diag': np.full((n_components, n_features), -1.), 'spherical': np.full(n_components, -1.)} not_positive_errors = { 'full': 'symmetric, positive-definite', 'tied': 'symmetric, positive-definite', 'diag': 'positive', 'spherical': 'positive'} for covar_type in COVARIANCE_TYPE: X = RandomData(rng).X[covar_type] g = GaussianMixture(n_components=n_components, covariance_type=covar_type, random_state=rng) # Check precisions with bad shapes g.precisions_init = precisions_bad_shape[covar_type] assert_raise_message(ValueError, "The parameter '%s precision' should have " "the shape of" % covar_type, g.fit, X) # Check not positive precisions g.precisions_init = precisions_not_positive[covar_type] assert_raise_message(ValueError, "'%s precision' should be %s" % (covar_type, not_positive_errors[covar_type]), g.fit, X) # Check the correct init of precisions_init g.precisions_init = rand_data.precisions[covar_type] g.fit(X) assert_array_equal(rand_data.precisions[covar_type], g.precisions_init)
def test_gaussian_mixture_aic_bic(): # Test the aic and bic criteria rng = np.random.RandomState(0) n_samples, n_features, n_components = 50, 3, 2 X = rng.randn(n_samples, n_features) # standard gaussian entropy sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) + n_features * (1 + np.log(2 * np.pi))) for cv_type in COVARIANCE_TYPE: g = GaussianMixture( n_components=n_components, covariance_type=cv_type, random_state=rng, max_iter=200) g.fit(X) aic = 2 * n_samples * sgh + 2 * g._n_parameters() bic = (2 * n_samples * sgh + np.log(n_samples) * g._n_parameters()) bound = n_features / np.sqrt(n_samples) assert (g.aic(X) - aic) / n_samples < bound assert (g.bic(X) - bic) / n_samples < bound
def test_warm_start(): random_state = 0 rng = np.random.RandomState(random_state) n_samples, n_features, n_components = 500, 2, 2 X = rng.rand(n_samples, n_features) # Assert the warm_start give the same result for the same number of iter g = GaussianMixture(n_components=n_components, n_init=1, max_iter=2, reg_covar=0, random_state=random_state, warm_start=False) h = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=random_state, warm_start=True) with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) g.fit(X) score1 = h.fit(X).score(X) score2 = h.fit(X).score(X) assert_almost_equal(g.weights_, h.weights_) assert_almost_equal(g.means_, h.means_) assert_almost_equal(g.precisions_, h.precisions_) assert_greater(score2, score1) # Assert that by using warm_start we can converge to a good solution g = GaussianMixture(n_components=n_components, n_init=1, max_iter=5, reg_covar=0, random_state=random_state, warm_start=False, tol=1e-6) h = GaussianMixture(n_components=n_components, n_init=1, max_iter=5, reg_covar=0, random_state=random_state, warm_start=True, tol=1e-6) with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) g.fit(X) h.fit(X).fit(X) assert_true(not g.converged_) assert_true(h.converged_)
def test_warm_start(seed): random_state = seed rng = np.random.RandomState(random_state) n_samples, n_features, n_components = 500, 2, 2 X = rng.rand(n_samples, n_features) # Assert the warm_start give the same result for the same number of iter g = GaussianMixture(n_components=n_components, n_init=1, max_iter=2, reg_covar=0, random_state=random_state, warm_start=False) h = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=random_state, warm_start=True) g.fit(X) score1 = h.fit(X).score(X) score2 = h.fit(X).score(X) assert_almost_equal(g.weights_, h.weights_) assert_almost_equal(g.means_, h.means_) assert_almost_equal(g.precisions_, h.precisions_) assert score2 > score1 # Assert that by using warm_start we can converge to a good solution g = GaussianMixture(n_components=n_components, n_init=1, max_iter=5, reg_covar=0, random_state=random_state, warm_start=False, tol=1e-6) h = GaussianMixture(n_components=n_components, n_init=1, max_iter=5, reg_covar=0, random_state=random_state, warm_start=True, tol=1e-6) g.fit(X) assert not g.converged_ h.fit(X) # depending on the data there is large variability in the number of # refit necessary to converge due to the complete randomness of the # data for _ in range(1000): h.fit(X) if h.converged_: break assert h.converged_