def mnist(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=10, C3=10, C4=10, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.01, n_components=1000), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/MNIST_train_data.csv') train_label = pd.read_csv(f'{DATA_DIR}/MNIST_train_label.csv') test_data = pd.read_csv(f'{DATA_DIR}/MNIST_test_data.csv') test_label = pd.read_csv(f'{DATA_DIR}/MNIST_test_label.csv') ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data.values, y=train_label.values.reshape(train_label.values.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'MNIST: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_generate_sub_samples(dataset_3_classes): parameters = Hyperparameters() model = iFBTSVM(parameters=parameters) sub_data_sets = model._generate_sub_sets(X=dataset_3_classes.X, y=dataset_3_classes.y) dag_1 = next(sub_data_sets) truth_1 = [np.array([0.9, 1.0, 1.1]), np.array(['1', '1', '1']), np.array([10.9, 11.0, 11.1]), np.array(['2', '2', '2'])] for i in range(len(truth_1)): assert np.array_equal(dag_1[i], truth_1[i]) dag_2 = next(sub_data_sets) truth_2 = [np.array([0.9, 1.0, 1.1]), np.array(['1', '1', '1']), np.array([110.9, 111.0, 111.1]), np.array(['3', '3', '3'])] for i in range(len(truth_2)): assert np.array_equal(dag_2[i], truth_2[i]) dag_3 = next(sub_data_sets) truth_3 = [np.array([10.9, 11.0, 11.1]), np.array(['2', '2', '2']), np.array([110.9, 111.0, 111.1]), np.array(['3', '3', '3'])] for i in range(len(truth_3)): assert np.array_equal(dag_3[i], truth_3[i]) with pytest.raises(StopIteration): _ = next(sub_data_sets)
def sea(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=1, C3=10, C4=1, max_iter=50, phi=0, kernel=None, # RBFSampler(gamma=0.6, n_components=1400), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/SEA/10K/SEA10K.csv') train_data = _data.values[:10000, 0:3] train_label = _data.values[:10000, 3:] test_data = _data.values[10000:, 0:3] test_label = _data.values[10000:, 3:] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'SEA 10K: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def gisette(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0, kernel=None, forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data', delim_whitespace=True) train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels', delim_whitespace=True) test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data', delim_whitespace=True) test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels', delim_whitespace=True) ifbtsvm = iFBTSVM(parameters=params, n_jobs=1) # Training num_points = 500 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points].values, y=train_label[:num_points].values.reshape( train_label[:num_points].values.shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' ) # Update batch_size = 500 before = time.monotonic() ifbtsvm.update(X=train_data[num_points:].values, y=train_label[num_points:].values.reshape( train_label[num_points:].values.shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s' )
def test_compute_score_none(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = None c = np.arange(1, 6) _score = svm._compute_score(score, c) assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
def border(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.01, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0.00001, kernel=RBFSampler(gamma=0.01, n_components=10), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/Border_train_data.csv') train_label = pd.read_csv(f'{DATA_DIR}/Border_train_label.csv') test_data = pd.read_csv(f'{DATA_DIR}/Border_test_data.csv') test_label = pd.read_csv(f'{DATA_DIR}/Border_test_label.csv') ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training num_points = 60 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points].values, y=train_label[:num_points].values.reshape( train_label[:num_points].values.shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Border\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' ) # Update batch_size = 100 before = time.monotonic() ifbtsvm.update(X=train_data[num_points:].values, y=train_label[num_points:].values.reshape( train_label[num_points:].values.shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Border\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s' )
def test_compute_score_none(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = None c = np.zeros((5,)) for i in range(5): c[i] = i + 1 _score = svm._compute_score(score, c) assert np.array_equal(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
def usps(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.01, n_components=1000), forget_score=10, ) train_data = np.zeros((7291, 256)) train_label = np.zeros((7291, )) with open(f'{DATA_DIR}/usps', 'r') as f_in: for i, line in enumerate(f_in): split = line.split(' ') train_label[i] = split[0] for s in split[1:]: if s == '\n': continue feats = s.split(':') train_data[i, int(feats[0]) - 1] = float(feats[1]) test_data = np.zeros((2007, 256)) test_label = np.zeros((2007, )) with open(f'{DATA_DIR}/usps.t', 'r') as f_in: for i, line in enumerate(f_in): split = line.split(' ') test_label[i] = split[0] for s in split[1:]: if s == '\n': continue feats = s.split(':') test_data[i, int(feats[0]) - 1] = float(feats[1]) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'USPS: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def rbf(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=250, phi=0, kernel=RBFSampler(gamma=0.45, n_components=300), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/RBF/10K/RBF10K.csv') train_data = _data.values[:10000, 0:10] train_label = _data.values[:10000, 10:] test_data = _data.values[10000:, 0:10] test_label = _data.values[10000:, 10:] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training num_points = 5000 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points], y=train_label[:num_points].reshape(train_label[:num_points].shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data, y=test_label) # Update batch_size = int(len(train_data) / 100 * 5 + 0.5) # 5% of original dataset before = time.monotonic() ifbtsvm.update(X=train_data[num_points:], y=train_label[num_points:].reshape(train_label[num_points:].shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data, y=test_label) print(f'RBF\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s')
def letter(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.03, n_components=500), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/letter-recognition.data') train_data = _data.values[:16000, 1:] train_label = _data.values[:16000, 0] test_data = _data.values[16000:, 1:] test_label = _data.values[16000:, 0] for i, lbl in enumerate(train_label): train_label[i] = ord(lbl) - 65 # '65' -> 'A' for i, lbl in enumerate(test_label): test_label[i] = ord(lbl) - 65 # '65' -> 'A' test_label = test_label.reshape(test_label.shape[0], 1).astype(np.int) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label.reshape(test_label.shape[0], 1)) print( f'Letter: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_compute_score(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]) c = np.arange(1, 6) _score = svm._compute_score(score, c) assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [2, 2, 2, 2, 2]])) score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]) c = np.arange(1, 11) _score = svm._compute_score(score, c) assert_allclose( _score, np.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]]))
def test_predictions(): dataset = load_iris() params = Hyperparameters( epsilon=0.0000001, fuzzy=0.01, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0.00001, kernel=None, ) # Initialisation iFBTSVM ifbtsvm = iFBTSVM(parameters=params, n_jobs=1) # Training ifbtsvm.fit(X=dataset.data, y=dataset.target) # Prediction assert pytest.approx(ifbtsvm.score(X=dataset.data, y=dataset.target), rel=1e-3) == 0.973333
def gisette(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=None, # RBFSampler(gamma=0.4, n_components=150), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data', delim_whitespace=True) train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels', delim_whitespace=True) test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data', delim_whitespace=True) test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels', delim_whitespace=True) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data.values, y=train_label.values.reshape(train_label.values.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def isolet(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=10, C3=10, C4=10, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.001, n_components=1000), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/isolet1+2+3+4.data') train_data = _data.values[:, :617] train_label = _data.values[:, 617] _data = pd.read_csv(f'{DATA_DIR}/isolet5.data') test_data = _data.values[:, :617] test_label = _data.values[:, 617] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'ISOLET: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
from libifbtsvm import iFBTSVM from libifbtsvm.models.ifbtsvm import Hyperparameters if __name__ == '__main__': dataset = load_iris() params = Hyperparameters( epsilon=0.0000001, fuzzy=0.01, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0.00001, kernel=None, ) # Initialisation iFBTSVM ifbtsvm = iFBTSVM(parameters=params, n_jobs=1) # Training before = time.monotonic() ifbtsvm.fit(X=dataset.data, y=dataset.target) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=dataset.data, y=dataset.target) print(f'Accuracy iFBTSVM: {accuracy * 100.0}% Train time: {elapsed}s')