def test_fit_iris(self): X = load_iris().data clusters = 4 model = KMeans(n_gpus=1, n_clusters=clusters, random_state=123).fit(X) assert model.cluster_centers_.shape == (X.shape[1], clusters) model_rerun = KMeans(n_gpus=1, n_clusters=clusters, random_state=123).fit(X) # Same random_state should yield same results assert np.allclose( model.cluster_centers_, model_rerun.cluster_centers_ ) model_rerun2 = model_rerun.fit(X) # Multiple invocations of fit with the same random_state # also should produce the same result assert np.allclose( model_rerun.cluster_centers_, model_rerun2.cluster_centers_ ) model_all = KMeans(n_clusters=clusters, random_state=123).fit(X) # Multi GPU should yield same result as single GPU assert np.allclose( model.cluster_centers_, model_all.cluster_centers_ )
def test_fit_iris(self): X = load_iris().data clusters = 4 model = KMeans(n_gpus=1, n_clusters=clusters, random_state=123).fit(X) assert model.cluster_centers_.shape == (X.shape[1], clusters) model_rerun = KMeans(n_gpus=1, n_clusters=clusters, random_state=123).fit(X) # Same random_state should yield same results assert np.allclose(model.cluster_centers_, model_rerun.cluster_centers_) model_rerun2 = model_rerun.fit(X) # Multiple invocations of fit with the same random_state # also should produce the same result assert np.allclose(model_rerun.cluster_centers_, model_rerun2.cluster_centers_) model_all = KMeans(n_clusters=clusters, random_state=123).fit(X) # Multi GPU should yield same result as single GPU assert np.allclose(model.cluster_centers_, model_all.cluster_centers_)
def test_fit_iris_backupsklearn(self): X = load_iris().data clusters = 4 print("Running model") model = KMeans(n_gpus=1, n_clusters=clusters, random_state=123).fit(X) assert model.cluster_centers_.shape == (X.shape[1], clusters) print("passed 1") print("Running model_rerun") model_rerun = KMeans(n_gpus=1, n_clusters=clusters, random_state=123, init=model.cluster_centers_, n_init=1).fit(X) import sys print(model_rerun.cluster_centers_) sys.stdout.flush() # Choosing initial clusters for sklearn should yield similar result assert np.allclose( model.cluster_centers_, model_rerun.cluster_centers_ ) # sklearn directly or our indirect should be same (and is) from sklearn.cluster import KMeans as KMeans_test print("Running model_rerun2") model_rerun2 = KMeans_test(n_clusters=clusters, random_state=123, init=model.cluster_centers_, n_init=1).fit(X) print(model_rerun2.cluster_centers_) sys.stdout.flush() assert np.allclose( model_rerun.cluster_centers_, model_rerun2.cluster_centers_ ) print("passed 2")
def test_transform_iris(self): X = load_iris().data model = KMeans(n_gpus=1, n_clusters=4, random_state=1234567).fit(X) labels_from_trans = list( map(lambda x: np.argmin(x), model.transform(X))) assert all(labels_from_trans == model.predict(X))
def test_fit_iris_precision(self): X_f64 = load_iris().data X_f32 = X_f64.astype(np.float32) kmeans = KMeans(n_gpus=1, n_clusters=4, random_state=12345) model_f64_labels = kmeans.fit(X_f64).predict(X_f64) model_f32_labels = kmeans.fit(X_f32).predict(X_f32) assert all(model_f64_labels == model_f32_labels)
def test_fit_vs_sk_iris(self): X = load_iris().data model = KMeans(n_gpus=1, n_clusters=4, random_state=1234).fit(X) h2o_labels = model.predict(X) sk_lables = model.sklearn_predict(X) assert all(h2o_labels == sk_lables)
def test_transform_iris(self): X = load_iris().data model = KMeans(n_gpus=1, n_clusters=4, random_state=1234567).fit(X) labels_from_trans = list( map(lambda x: np.argmin(x), model.transform(X)) ) assert all(labels_from_trans == model.predict(X))
def func(): # data prep iris = datasets.load_iris() X = iris.data y = iris.target # removing the third class, making it a binary problem X = X[y != 2] y = y[y != 2] X -= np.mean(X, 0) # splitting into train and valid frame X_test = X[np.r_[40:50,90:100]] y_test = y[np.r_[40:50,90:100]] X = X[np.r_[:40,50:90]] y = y[np.r_[:40,50:90]] classification = True logreg = h2o4gpu.LogisticRegression(penalty="l1") lr = h2o4gpu.ElasticNetH2O( n_threads = None, n_alphas = 1, n_lambdas = 1, n_folds = 1, lambda_max = 1.0, lambda_min_ratio = 1.0, lambda_stop_early = False, store_full_path = 0, alphas = None, lambdas = None, family = 'logistic', alpha_max = 1.0, alpha_min = 1.0) model = logreg.fit(X, y) mm = lr.fit(X, y) y_pred = model.predict(X_test) print(y_pred) y_p = mm.predict(X_test) print(y_p) print(y_pred, np.round(y_pred)) # TO-DO: change the assertion once the logic to convert probabilities to classes is implemented assert (y_test == np.round(y_pred)).all() == True assert (y_pred == y_p).all() == True
def test_predict_iris(self): X = load_iris().data model = KMeans(n_gpus=1, n_clusters=4, random_state=123456).fit(X) assert all(model.labels_ == model.predict(X))
def test_predict_iris(self, order): X = np.asanyarray(load_iris().data, order=order) model = KMeans(n_gpus=1, n_clusters=4, random_state=123456).fit(X) assert all(model.labels_ == model.predict(X))