def mnist(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=10, C3=10, C4=10, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.01, n_components=1000), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/MNIST_train_data.csv') train_label = pd.read_csv(f'{DATA_DIR}/MNIST_train_label.csv') test_data = pd.read_csv(f'{DATA_DIR}/MNIST_test_data.csv') test_label = pd.read_csv(f'{DATA_DIR}/MNIST_test_label.csv') ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data.values, y=train_label.values.reshape(train_label.values.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'MNIST: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_iris(): params = Hyperparameters(fuzzy=0.5, epsilon=0.5) dataset = load_iris() x_p = dataset.data[np.where(dataset.target == 0)] x_n = dataset.data[np.where(dataset.target == 1)] membership = fuzzy_membership(params=params, class_p=x_p, class_n=x_n) _truth = np.asarray( [[0.80251715], [0.97998912], [0.812318], [0.9807224], [0.98189354], [0.99937236], [0.96043452], [0.56903383], [0.97212066], [0.96048151], [0.60125974], [0.99984087], [0.97872578], [0.99542787], [0.97290636], [0.95614206], [0.99496593], [0.99843223], [0.9804573], [0.9889402], [0.95800151], [0.99971397], [0.96464217], [0.99565949], [0.99577327], [0.97731756], [0.90653641], [0.84949026], [0.999488], [0.94963879], [0.97055683], [0.95245829], [0.9980525], [0.94679333], [0.98472415], [0.9759757], [0.92815776], [0.98848116], [0.99733116], [0.99105587], [0.99493091], [0.9970333], [0.99903366], [0.61157738], [0.9992254], [0.99909612], [1.], [0.99982867], [0.5], [0.99991744]]) assert_allclose(membership.sn, _truth) _truth = np.asarray( [[0.99992168], [0.9917287], [0.99376558], [0.98430655], [0.99974305], [0.95670035], [0.99387979], [1.], [0.91239646], [0.99587275], [0.98883625], [0.99914972], [0.98704236], [0.85665848], [0.78204237], [0.56573912], [0.96225372], [0.99991496], [0.90482216], [0.99526856], [0.99049658], [0.99760092], [0.96536234], [0.99558788], [0.98841007], [0.99139502], [0.99961253], [0.99956062], [0.99959803], [0.99427123], [0.99396476], [0.99323856], [0.94598095], [0.85250365], [0.99691726], [0.99689705], [0.98412401], [0.99910674], [0.93096412], [0.99996815], [0.99976035], [0.5], [0.95871688], [0.99539269], [0.9728796], [0.98887459], [0.99416071], [0.98977514], [0.99446211], [0.99990073]]) assert_allclose(membership.sp, _truth)
def sea(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=1, C3=10, C4=1, max_iter=50, phi=0, kernel=None, # RBFSampler(gamma=0.6, n_components=1400), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/SEA/10K/SEA10K.csv') train_data = _data.values[:10000, 0:3] train_label = _data.values[:10000, 3:] test_data = _data.values[10000:, 0:3] test_label = _data.values[10000:, 3:] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'SEA 10K: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def gisette(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0, kernel=None, forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data', delim_whitespace=True) train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels', delim_whitespace=True) test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data', delim_whitespace=True) test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels', delim_whitespace=True) ifbtsvm = iFBTSVM(parameters=params, n_jobs=1) # Training num_points = 500 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points].values, y=train_label[:num_points].values.reshape( train_label[:num_points].values.shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' ) # Update batch_size = 500 before = time.monotonic() ifbtsvm.update(X=train_data[num_points:].values, y=train_label[num_points:].values.reshape( train_label[num_points:].values.shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s' )
def test_compute_score_none(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = None c = np.arange(1, 6) _score = svm._compute_score(score, c) assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
def border(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.01, C1=8, C2=2, C3=8, C4=2, max_iter=500, phi=0.00001, kernel=RBFSampler(gamma=0.01, n_components=10), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/Border_train_data.csv') train_label = pd.read_csv(f'{DATA_DIR}/Border_train_label.csv') test_data = pd.read_csv(f'{DATA_DIR}/Border_test_data.csv') test_label = pd.read_csv(f'{DATA_DIR}/Border_test_label.csv') ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training num_points = 60 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points].values, y=train_label[:num_points].values.reshape( train_label[:num_points].values.shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Border\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' ) # Update batch_size = 100 before = time.monotonic() ifbtsvm.update(X=train_data[num_points:].values, y=train_label[num_points:].values.reshape( train_label[num_points:].values.shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Border\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s' )
def test_compute_score_none(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = None c = np.zeros((5,)) for i in range(5): c[i] = i + 1 _score = svm._compute_score(score, c) assert np.array_equal(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
def usps(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.01, n_components=1000), forget_score=10, ) train_data = np.zeros((7291, 256)) train_label = np.zeros((7291, )) with open(f'{DATA_DIR}/usps', 'r') as f_in: for i, line in enumerate(f_in): split = line.split(' ') train_label[i] = split[0] for s in split[1:]: if s == '\n': continue feats = s.split(':') train_data[i, int(feats[0]) - 1] = float(feats[1]) test_data = np.zeros((2007, 256)) test_label = np.zeros((2007, )) with open(f'{DATA_DIR}/usps.t', 'r') as f_in: for i, line in enumerate(f_in): split = line.split(' ') test_label[i] = split[0] for s in split[1:]: if s == '\n': continue feats = s.split(':') test_data[i, int(feats[0]) - 1] = float(feats[1]) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'USPS: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_no_noise(valid_ensemble_a, valid_ensemble_b): params = Hyperparameters(fuzzy=0.5, epsilon=0.5) _fuzzy = fuzzy_membership(params=params, class_p=valid_ensemble_a, class_n=valid_ensemble_b) _truth = np.asarray([[1.0], [0.5], [0.5]]) assert np.isclose(_fuzzy.sp, _truth).all() assert np.isclose(_fuzzy.sn, _truth).all() assert not np.nonzero(_fuzzy.noise_p)[0].size > 0 assert not np.nonzero(_fuzzy.noise_n)[0].size > 0
def rbf(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=250, phi=0, kernel=RBFSampler(gamma=0.45, n_components=300), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/RBF/10K/RBF10K.csv') train_data = _data.values[:10000, 0:10] train_label = _data.values[:10000, 10:] test_data = _data.values[10000:, 0:10] test_label = _data.values[10000:, 10:] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training num_points = 5000 before = time.monotonic() ifbtsvm.fit(X=train_data[:num_points], y=train_label[:num_points].reshape(train_label[:num_points].shape[0])) after = time.monotonic() elapsed = (after - before) accuracy_1 = ifbtsvm.score(X=test_data, y=test_label) # Update batch_size = int(len(train_data) / 100 * 5 + 0.5) # 5% of original dataset before = time.monotonic() ifbtsvm.update(X=train_data[num_points:], y=train_label[num_points:].reshape(train_label[num_points:].shape[0]), batch_size=batch_size) after = time.monotonic() u_elapsed = after - before # Prediction accuracy_2 = ifbtsvm.score(X=test_data, y=test_label) print(f'RBF\t' f'Training (DataPoints|Accuracy|Time): ' f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t' f'Update (BatchSize|Accuracy|Time): ' f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s')
def test_fuzzy_membership_noise(valid_ensemble_a, valid_ensemble_b): params = Hyperparameters(fuzzy=0.5, epsilon=0.5) # Update ensemble "b" to have a point closer to center of "a" valid_ensemble_b[0][0] = 0.8 valid_ensemble_b[0][1] = 0.8 _fuzzy = fuzzy_membership(params=params, class_p=valid_ensemble_a, class_n=valid_ensemble_b) _truth_p = np.asarray([[1.0], [0.5], [0.5]]) _truth_n = np.asarray([[0.5], [1.0], [1.0]]) assert np.isclose(_fuzzy.sp, _truth_p).all() assert np.isclose(_fuzzy.sn, _truth_n).all() assert not np.nonzero(_fuzzy.noise_p)[0].size > 0 assert np.nonzero(_fuzzy.noise_n)[0] == [0]
def letter(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.03, n_components=500), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/letter-recognition.data') train_data = _data.values[:16000, 1:] train_label = _data.values[:16000, 0] test_data = _data.values[16000:, 1:] test_label = _data.values[16000:, 0] for i, lbl in enumerate(train_label): train_label[i] = ord(lbl) - 65 # '65' -> 'A' for i, lbl in enumerate(test_label): test_label[i] = ord(lbl) - 65 # '65' -> 'A' test_label = test_label.reshape(test_label.shape[0], 1).astype(np.int) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label.reshape(test_label.shape[0], 1)) print( f'Letter: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_compute_score(): params = Hyperparameters(phi=0.5) svm = iFBTSVM(parameters=params) score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]) c = np.arange(1, 6) _score = svm._compute_score(score, c) assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [2, 2, 2, 2, 2]])) score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]) c = np.arange(1, 11) _score = svm._compute_score(score, c) assert_allclose( _score, np.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]]))
def gisette(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=8, C2=2, C3=8, C4=2, max_iter=50, phi=0, kernel=None, # RBFSampler(gamma=0.4, n_components=150), forget_score=10, ) train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data', delim_whitespace=True) train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels', delim_whitespace=True) test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data', delim_whitespace=True) test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels', delim_whitespace=True) ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data.values, y=train_label.values.reshape(train_label.values.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values) print( f'Gisette: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def isolet(): params = Hyperparameters( epsilon=1e-10, fuzzy=0.1, C1=10, C2=10, C3=10, C4=10, max_iter=50, phi=0, kernel=RBFSampler(gamma=0.001, n_components=1000), forget_score=10, ) _data = pd.read_csv(f'{DATA_DIR}/isolet1+2+3+4.data') train_data = _data.values[:, :617] train_label = _data.values[:, 617] _data = pd.read_csv(f'{DATA_DIR}/isolet5.data') test_data = _data.values[:, :617] test_label = _data.values[:, 617] ifbtsvm = iFBTSVM(parameters=params, n_jobs=4) # Training before = time.monotonic() ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0])) after = time.monotonic() elapsed = (after - before) # Prediction accuracy = ifbtsvm.score(X=test_data, y=test_label) print( f'ISOLET: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s' ) return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_epsilon_error(valid_ensemble_a, valid_ensemble_b, u, epsilon): params = Hyperparameters(fuzzy=u, epsilon=epsilon) with pytest.raises(ValueError): _ = fuzzy_membership(params, valid_ensemble_a, valid_ensemble_b)