def test_save_load_classifier(self): X, y = datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) k = 4 classifier_before = pyfms.Classifier(X.shape[1], k=k) classifier_before.fit(X_train, y_train, nb_epoch=1000) weights_before = classifier_before.get_weights() accuracy_before = accuracy_score(y_test, classifier_before.predict(X_test)) classifier_file = os.path.join(self.workspace, 'classifier.fm') classifier_before.save_weights(classifier_file) classifier_after = pyfms.Classifier(X.shape[1]) classifier_after.load_weights(classifier_file) weights_after = classifier_after.get_weights() accuracy_after = accuracy_score(y_test, classifier_after.predict(X_test)) for wb, wa in zip(weights_before, weights_after): np.testing.assert_array_equal(wb, wa) self.assertEqual(accuracy_before, accuracy_after)
def test_weighted_classifier(self): np.random.seed(0) X, y = datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Re-weight instances so that each class gets equal total weighting. class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True))) sample_weight = np.array( [1.0 / class_count_lookup[_y] for _y in y_train]) classifier = pyfms.Classifier(X.shape[1]) classifier.fit(X_train, y_train, sample_weight=sample_weight, nb_epoch=10000) accuracy = accuracy_score(y_test, classifier.predict(X_test)) expected_accuracy = 0.9649122807017544 self.assertAlmostEqual(accuracy, expected_accuracy)
def test_sparse_classifier(self): np.random.seed(0) X, y = datasets.load_boston(return_X_y=True) y = y > 30 # Binarize target # Columns 1 and 3 (0-indexed) are sparse. # Slice data to the first 5 columns for a higher sparsity ratio. X = X[:, :5] X = sparse.csr_matrix(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Sparsify data X_train = sparse.csr_matrix(X_train) X_test = sparse.csr_matrix(X_test) classifier_dims = X.shape[1] classifier = pyfms.Classifier(classifier_dims, k=2, X_format="csr") classifier.fit(X_train, y_train, nb_epoch=20000) accuracy = accuracy_score(y_test, classifier.predict(X_test)) self.assertAlmostEqual(accuracy, 0.8725490196078431)
reg = pyfms.regularizers.L2(0, 0, .01) fm_regressor.fit(X_train, y_train, nb_epoch=50000, regularizer=reg) print(' Factorization Machine MSE: {}'.format( mean_squared_error(y_test, fm_regressor.predict(X_test)))) linear_regression = LinearRegression() linear_regression.fit(X_train, y_train) print(' Linear Regression MSE: {}'.format( mean_squared_error(y_test, linear_regression.predict(X_test)))) print('\n*** Binary Classification Example (with verbose output) ***') X, y = datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) fm_classifier = pyfms.Classifier(X.shape[1]) fm_classifier.fit(X_train, y_train, verbosity=2000, nb_epoch=10000) print(' Factorization Machine Error: {}'.format( error_score(y_test, fm_classifier.predict(X_test)))) logistic_regression = LogisticRegression() logistic_regression.fit(X, y) print(' Logistic Regression Error: {}'.format( error_score(y_test, logistic_regression.predict(X_test)))) print('\n*** Saving Model Example ***') # Save the factorization machine classifier that was trained earlier f = "weights.fm" fm_classifier.save_weights(f)
# Columns 1 and 3 (0-indexed) are sparse. # Slice data to the first 5 columns for a higher sparsity ratio. X = X[:, :5] X = sparse.csr_matrix(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Re-weight instances so that each class gets equal total weighting. class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True))) sample_weight = np.array([1.0 / class_count_lookup[_y] for _y in y_train]) # Sparsify data X_train = sparse.csr_matrix(X_train) X_test = sparse.csr_matrix(X_test) classifier_dims = X.shape[1] fm_classifier = pyfms.Classifier(classifier_dims, k=2, X_format="csr") fm_classifier.fit(X_train, y_train, sample_weight=sample_weight, nb_epoch=20000) print('Factorization Machine Error: {}'.format( error_score(y_test, fm_classifier.predict(X_test)))) logistic_regression = LogisticRegression(solver='lbfgs') logistic_regression.fit(X_train, y_train, sample_weight=sample_weight) print('Logistic Regression Error: {}'.format( error_score(y_test, logistic_regression.predict(X_test)))) print() print('*******************************************') print('* Regression Example')