def test_logistic_random_data(self): X_train, X_test, y_train, y_test = get_classification_data(n_classes=2) logistic = LogisticRegression(sparkSession) logistic.fit(X_train, y_train) mllearn_predicted = logistic.predict(X_test) sklearn_logistic = linear_model.LogisticRegression() sklearn_logistic.fit(X_train, y_train) self.failUnless( test_accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted, y_test, 0.95))
def test_logistic(self): digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) X_train = X_digits[:int(.9 * n_samples)] y_train = y_digits[:int(.9 * n_samples)] X_test = X_digits[int(.9 * n_samples):] y_test = y_digits[int(.9 * n_samples):] logistic = LogisticRegression(sparkSession) logistic.fit(X_train, y_train) mllearn_predicted = logistic.predict(X_test) sklearn_logistic = linear_model.LogisticRegression() sklearn_logistic.fit(X_train, y_train) self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
def test_logistic(self): digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) X_train = X_digits[:int(.9 * n_samples)] y_train = y_digits[:int(.9 * n_samples)] X_test = X_digits[int(.9 * n_samples):] y_test = y_digits[int(.9 * n_samples):] logistic = LogisticRegression(sparkSession) logistic.fit(X_train, y_train) mllearn_predicted = logistic.predict(X_test) sklearn_logistic = linear_model.LogisticRegression() sklearn_logistic.fit(X_train, y_train) self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
def test_logistic_sk2(self): digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) X_train = X_digits[:int(.9 * n_samples)] y_train = y_digits[:int(.9 * n_samples)] X_test = X_digits[int(.9 * n_samples):] y_test = y_digits[int(.9 * n_samples):] # Convert to DataFrame for i/o: current way to transfer data logistic = LogisticRegression(sparkSession, transferUsingDF=True) logistic.fit(X_train, y_train) mllearn_predicted = logistic.predict(X_test) sklearn_logistic = linear_model.LogisticRegression() sklearn_logistic.fit(X_train, y_train) self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn
def test_logistic_sk2(self): digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target n_samples = len(X_digits) X_train = X_digits[:int(.9 * n_samples)] y_train = y_digits[:int(.9 * n_samples)] X_test = X_digits[int(.9 * n_samples):] y_test = y_digits[int(.9 * n_samples):] # Convert to DataFrame for i/o: current way to transfer data logistic = LogisticRegression(sparkSession, transferUsingDF=True) logistic.fit(X_train, y_train) mllearn_predicted = logistic.predict(X_test) sklearn_logistic = linear_model.LogisticRegression() sklearn_logistic.fit(X_train, y_train) self.failUnless(accuracy_score(sklearn_logistic.predict(X_test), mllearn_predicted) > 0.95) # We are comparable to a similar algorithm in scikit learn