def test_logistic_random_data(self):
     X_train, X_test, y_train, y_test = get_classification_data(n_classes=2)
     logistic = LogisticRegression(sparkSession)
     logistic.fit(X_train, y_train)
     mllearn_predicted = logistic.predict(X_test)
     sklearn_logistic = linear_model.LogisticRegression()
     sklearn_logistic.fit(X_train, y_train)
     self.failUnless(
         test_accuracy_score(sklearn_logistic.predict(X_test),
                             mllearn_predicted, y_test, 0.95))
예제 #2
0
 def test_logistic(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     logistic = LogisticRegression(sparkSession)
     logistic.fit(X_train, y_train)
     mllearn_predicted = logistic.predict(X_test)
     sklearn_logistic = linear_model.LogisticRegression()
     sklearn_logistic.fit(X_train, y_train)
     self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
예제 #3
0
 def test_logistic(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     logistic = LogisticRegression(sparkSession)
     logistic.fit(X_train, y_train)
     mllearn_predicted = logistic.predict(X_test)
     sklearn_logistic = linear_model.LogisticRegression()
     sklearn_logistic.fit(X_train, y_train)
     self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
예제 #4
0
 def test_logistic_sk2(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     # Convert to DataFrame for i/o: current way to transfer data
     logistic = LogisticRegression(sparkSession, transferUsingDF=True)
     logistic.fit(X_train, y_train)
     mllearn_predicted = logistic.predict(X_test)
     sklearn_logistic = linear_model.LogisticRegression()
     sklearn_logistic.fit(X_train, y_train)
     self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
 def test_logistic_sk2(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     # Convert to DataFrame for i/o: current way to transfer data
     logistic = LogisticRegression(sparkSession, transferUsingDF=True)
     logistic.fit(X_train, y_train)
     mllearn_predicted = logistic.predict(X_test)
     sklearn_logistic = linear_model.LogisticRegression()
     sklearn_logistic.fit(X_train, y_train)
     self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn