def test_rbf_tsvm_hepatitis(self): """ It tests non-linear TSVM estimator on hepatitis dataset """ clf = TSVM('RBF', 1, 0.5, 0.5, 0.1) clf.fit(X, y) pred = clf.predict(X) assert_greater(np.mean(y == pred), 0.95)
def test_rectangular_tsvm_hepatitis(self): """ It tests TSVM with rectangular on hepatitis dataset """ clf = TSVM('RBF', 0.75, 0.5, 2, 0.1) clf.fit(X, y) pred = clf.predict(X) assert_greater(np.mean(y == pred), 0.95)
def test_linear_tsvm_hepatitis(self): """ It tests linear TSVM estimator on hepatits dataset """ clf = TSVM('linear', 1, 0.5, 0.5) clf.fit(X, y) pred = clf.predict(X) assert_greater(np.mean(y == pred), 0.78)
def test_rbf_tsvm_set_get_params(self): """ It checks that set_params and get_params works correctly for non-linear TSVM estimator """ expected_output = {'C2': 2, 'C1': 4, 'rect_kernel': 1, 'gamma': 0.625, 'kernel': 'RBF'} tsvm_cls = TSVM('RBF') tsvm_cls.set_params(**{'C1': 4, 'C2': 2, 'gamma': 0.625}) self.assertEqual(tsvm_cls.get_params(), expected_output, 'set_params and get_params output don\'t match')
def test_linear_tsvm_set_get_params(self): """ It checks that set_params and get_params works correctly for linear TSVM estimator """ expected_output = {'gamma': 1, 'C1': 0.5, 'rect_kernel': 1, 'C2': 1, 'kernel': 'linear'} tsvm_clf = TSVM('linear') tsvm_clf.set_params(**{'C1': 0.5, 'C2':1}) self.assertEqual(tsvm_clf.get_params(), expected_output, 'set_params and get_params output don\'t match')
def get_selected_clf(self): """ It returns the classifier that is selected by user. Returns ------- clf_obj : object An estimator object. .. warning:: """ clf_obj = None if self.clf_type == 'tsvm': clf_obj = TSVM(self.kernel_type, self.rect_kernel) elif self.clf_type == 'lstsvm': clf_obj = LSTSVM(self.kernel_type, self.rect_kernel) if self.class_type == 'multiclass': if self.mc_scheme == 'ova': clf_obj = OneVsAllClassifier(clf_obj) elif self.mc_scheme == 'ovo': clf_obj = OneVsOneClassifier(clf_obj) return clf_obj
def test_save_model(self): """ It saves a TSVM-based model on the disk for test purpose. """ if not isfile(self.save_model_path): tsvm_model = TSVM() eval_m = Validator(self.X, self.y, ('CV', 5), tsvm_model) save_model(eval_m, {'C1': 1, 'C2': 1}, self.save_model_path)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.bin_clf = TSVM() self.mc_clf = OneVsAllClassifier(self.bin_clf) # Binary dataset bin_dataset = DataReader('./dataset/2d-synthetic.csv', ',', True) bin_dataset.load_data(False, False) self.bin_X, self.bin_y, _ = bin_dataset.get_data() # Multi-class data mc_dataset = DataReader('./dataset/mc-data.csv', ',', True) mc_dataset.load_data(False, False) self.mc_X, self.mc_y, _ = mc_dataset.get_data()
data_path = '/home/mir/mir-projects/NDC' # Specify the dataset's filename dataset = DataReader(join(data_path, 'NDC-train-1l.csv'), ',', False) dataset.load_data(False, False) X, y, _ = dataset.get_data() print("Loaded the dataset...") X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3) print(X_train.shape) print("Split train/test sets...") # A TSVM-based estimator tsvm_model = TSVM() train_t = time.time() tsvm_model.fit(X_train, y_train) print("Train time: %.5f" % (time.time() - train_t)) test_t = time.time() pred = tsvm_model.predict(X_test) print("Test time: %.5f" % (time.time() - test_t)) acc = accuracy_score(y_test, pred) print("Acc: %.2f" % (acc * 100))
# Step 1: Load your dataset data_path = '../../dataset/australian.csv' sep_char = ',' # separtor character of the CSV file header = True # Whether the dataset has header names. dataset = DataReader(data_path, sep_char, header) shuffle_data = True normalize_data = False dataset.load_data(shuffle_data, normalize_data) X, y, file_name = dataset.get_data() # Step 2: Choose a TSVM-based estimator kernel = 'linear' tsvm_clf = TSVM(kernel=kernel) # Step 3: Evaluate the estimator using train/test split eval_method = 't_t_split' # Train/Test split test_set_size = 30 # 30% of samples val = Validator(X, y, (eval_method, test_set_size), tsvm_clf) eval_func = val.choose_validator() # Hyper-parameters of the classifier h_params = {'C1': 2**-3, 'C2': 2**-5} acc, std, full_report = eval_func(h_params) print("Accuracy: %.2f" % acc) print(full_report)
# LIBTwinSVM: A Library for Twin Support Vector Machines # Developers: Mir, A. and Mahdi Rahbar # License: GNU General Public License v3.0 """ A benchmark script for demonstrating the effectiveness of LIBTwinSVM in terms of prediction accuracy. """ from libtsvm.preprocess import DataReader from libtsvm.estimators import TSVM from libtsvm.model_selection import Validator, grid_search data_path = '../dataset/australian.csv' dataset = DataReader(data_path, ',', True) dataset.load_data(True, False) X, y, _ = dataset.get_data() tsvm_clf = TSVM(kernel='linear') val = Validator(X, y, ('CV', 5), tsvm_clf) eval_method = val.choose_validator() params = {'C1': (-5, 5), 'C2': (-5, 5), 'gamma': None} best_acc, best_acc_std, opt_params, _ = grid_search(eval_method, params) print("Best accuracy: %.2f+-%.2f | Optimal parameters: %s" % (best_acc, best_acc_std, str(opt_params)))