def test_gaussian_mixture_fit_predict_n_init(): # Check that fit_predict is equivalent to fit.predict, when n_init > 1 X = np.random.RandomState(0).randn(1000, 5) gm = GaussianMixture(n_components=5, n_init=5, random_state=0) y_pred1 = gm.fit_predict(X) y_pred2 = gm.predict(X) assert_array_equal(y_pred1, y_pred2)
def cluster(clusterType, vectors, y): if (clusterType == "KMeans"): kclusterer = KMeansClusterer( NUM_CLUSTERS, distance=nltk.cluster.util.cosine_distance, repeats=25) assigned_clusters = kclusterer.cluster(vectors, assign_clusters=True) elif (clusterType == "GMM"): GMM = GaussianMixture(n_components=NUM_CLUSTERS) assigned_clusters = GMM.fit_predict(vectors) elif (clusterType == "SVM"): classifier = SVC(kernel='rbf', gamma='auto', random_state=0) #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) elif (clusterType == "T2VH"): ret = hierarchical.ward_tree(vectors, n_clusters=NUM_CLUSTERS) children = ret[0] n_leaves = ret[2] assigned_clusters = hierarchical._hc_cut(NUM_CLUSTERS, children, n_leaves) elif (clusterType == "RandomForest"): classifier = RandomForestClassifier() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) elif (clusterType == "DecisionTree"): classifier = DecisionTreeClassifier() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) elif (clusterType == "LogisticRegression"): classifier = sklearn.linear_model.LogisticRegression() #cross-validation assigned_clusters = cross_validation(classifier, vectors, y) # classifier.fit(vectors, y) # assigned_clusters=classifier.predict(vectors) else: print(clusterType, " is not a predefined cluster type.") return return assigned_clusters
def test_gaussian_mixture_fit_predict(): rng = np.random.RandomState(0) rand_data = RandomData(rng) for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y g = GaussianMixture(n_components=rand_data.n_components, random_state=rng, weights_init=rand_data.weights, means_init=rand_data.means, precisions_init=rand_data.precisions[covar_type], covariance_type=covar_type) # check if fit_predict(X) is equivalent to fit(X).predict(X) f = copy.deepcopy(g) Y_pred1 = f.fit(X).predict(X) Y_pred2 = g.fit_predict(X) assert_array_equal(Y_pred1, Y_pred2) assert_greater(adjusted_rand_score(Y, Y_pred2), .95)