def test_ovr(): X = np.array([[0, 0, 1, 1], [1, 1, 0, 0], [-1, -1, -1, -1]] * 100) y = np.array([0, 1, 2] * 100) ovr = DistOneVsRestClassifier(LogisticRegression(solver="liblinear")) ovr.fit(X, y) preds = ovr.predict(X[:3]) assert np.allclose(preds, np.array([0, 1, 2]))
def test_multiclass(spark_session): sc = spark_session.sparkContext # variables solver = "liblinear" test_size = 0.2 # load sample data (binary target) data = load_digits() X = data["data"] y = data["target"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=10 ) ### distributed one vs rest model = DistOneVsRestClassifier(LogisticRegression(solver=solver), sc) # distributed fitting with spark model.fit(X_train, y_train) # predictions on the driver preds = model.predict(X_test) assert preds.shape == y_test.shape
# load sample data (binary target) data = load_digits() X = data["data"] y = data["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=10) ### distributed one vs rest model = DistOneVsRestClassifier(LogisticRegression(solver=solver), sc) # distributed fitting with spark model.fit(X_train, y_train) # predictions on the driver preds = model.predict(X_test) probs = model.predict_proba(X_test) # results print("-- One Vs Rest --") print("Weighted F1: {0}".format( f1_score(y_test, preds, average=scoring_average))) print("Precision: {0}".format( precision_score(y_test, preds, average=scoring_average))) print("Recall: {0}".format(recall_score(y_test, preds, average=scoring_average))) print(pickle.loads(pickle.dumps(model))) ### distributed one vs one model = DistOneVsOneClassifier(LogisticRegression(solver=solver), sc) # distributed fitting with spark