Beispiel #1
0
def mnist():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=10,
        C3=10,
        C4=10,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.01, n_components=1000),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/MNIST_train_data.csv')
    train_label = pd.read_csv(f'{DATA_DIR}/MNIST_train_label.csv')
    test_data = pd.read_csv(f'{DATA_DIR}/MNIST_test_data.csv')
    test_label = pd.read_csv(f'{DATA_DIR}/MNIST_test_label.csv')

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data.values,
                y=train_label.values.reshape(train_label.values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'MNIST: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #2
0
def test_generate_sub_samples(dataset_3_classes):

    parameters = Hyperparameters()

    model = iFBTSVM(parameters=parameters)

    sub_data_sets = model._generate_sub_sets(X=dataset_3_classes.X, y=dataset_3_classes.y)

    dag_1 = next(sub_data_sets)
    truth_1 = [np.array([0.9, 1.0, 1.1]), np.array(['1', '1', '1']),
               np.array([10.9, 11.0, 11.1]), np.array(['2', '2', '2'])]

    for i in range(len(truth_1)):
        assert np.array_equal(dag_1[i], truth_1[i])

    dag_2 = next(sub_data_sets)
    truth_2 = [np.array([0.9, 1.0, 1.1]), np.array(['1', '1', '1']),
               np.array([110.9, 111.0, 111.1]), np.array(['3', '3', '3'])]

    for i in range(len(truth_2)):
        assert np.array_equal(dag_2[i], truth_2[i])

    dag_3 = next(sub_data_sets)
    truth_3 = [np.array([10.9, 11.0, 11.1]), np.array(['2', '2', '2']),
               np.array([110.9, 111.0, 111.1]), np.array(['3', '3', '3'])]

    for i in range(len(truth_3)):
        assert np.array_equal(dag_3[i], truth_3[i])

    with pytest.raises(StopIteration):
        _ = next(sub_data_sets)
Beispiel #3
0
def sea():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=1,
        C3=10,
        C4=1,
        max_iter=50,
        phi=0,
        kernel=None,  # RBFSampler(gamma=0.6, n_components=1400),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/SEA/10K/SEA10K.csv')
    train_data = _data.values[:10000, 0:3]
    train_label = _data.values[:10000, 3:]
    test_data = _data.values[10000:, 0:3]
    test_label = _data.values[10000:, 3:]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'SEA 10K: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #4
0
def gisette():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0,
        kernel=None,
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data',
                             delim_whitespace=True)
    train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels',
                              delim_whitespace=True)
    test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data',
                            delim_whitespace=True)
    test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels',
                             delim_whitespace=True)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)

    # Training
    num_points = 500
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points].values,
                y=train_label[:num_points].values.reshape(
                    train_label[:num_points].values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values)

    print(
        f'Gisette\t'
        f'Training (DataPoints|Accuracy|Time): '
        f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
    )

    # Update
    batch_size = 500
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:].values,
                   y=train_label[num_points:].values.reshape(
                       train_label[num_points:].values.shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Gisette\t'
        f'Update (BatchSize|Accuracy|Time): '
        f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s'
    )
Beispiel #5
0
def test_compute_score_none():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = None
    c = np.arange(1, 6)

    _score = svm._compute_score(score, c)

    assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
Beispiel #6
0
def border():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0.00001,
        kernel=RBFSampler(gamma=0.01, n_components=10),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/Border_train_data.csv')
    train_label = pd.read_csv(f'{DATA_DIR}/Border_train_label.csv')
    test_data = pd.read_csv(f'{DATA_DIR}/Border_test_data.csv')
    test_label = pd.read_csv(f'{DATA_DIR}/Border_test_label.csv')

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    num_points = 60
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points].values,
                y=train_label[:num_points].values.reshape(
                    train_label[:num_points].values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values)

    print(
        f'Border\t'
        f'Training (DataPoints|Accuracy|Time): '
        f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
    )

    # Update
    batch_size = 100
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:].values,
                   y=train_label[num_points:].values.reshape(
                       train_label[num_points:].values.shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Border\t'
        f'Update (BatchSize|Accuracy|Time): '
        f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s'
    )
Beispiel #7
0
def test_compute_score_none():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = None
    c = np.zeros((5,))
    for i in range(5):
        c[i] = i + 1

    _score = svm._compute_score(score, c)

    assert np.array_equal(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
Beispiel #8
0
def usps():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.01, n_components=1000),
        forget_score=10,
    )

    train_data = np.zeros((7291, 256))
    train_label = np.zeros((7291, ))
    with open(f'{DATA_DIR}/usps', 'r') as f_in:
        for i, line in enumerate(f_in):
            split = line.split(' ')
            train_label[i] = split[0]
            for s in split[1:]:
                if s == '\n':
                    continue
                feats = s.split(':')
                train_data[i, int(feats[0]) - 1] = float(feats[1])

    test_data = np.zeros((2007, 256))
    test_label = np.zeros((2007, ))
    with open(f'{DATA_DIR}/usps.t', 'r') as f_in:
        for i, line in enumerate(f_in):
            split = line.split(' ')
            test_label[i] = split[0]
            for s in split[1:]:
                if s == '\n':
                    continue
                feats = s.split(':')
                test_data[i, int(feats[0]) - 1] = float(feats[1])

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'USPS: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #9
0
def rbf():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=250,
        phi=0,
        kernel=RBFSampler(gamma=0.45, n_components=300),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/RBF/10K/RBF10K.csv')
    train_data = _data.values[:10000, 0:10]
    train_label = _data.values[:10000, 10:]
    test_data = _data.values[10000:, 0:10]
    test_label = _data.values[10000:, 10:]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    num_points = 5000
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points],
                y=train_label[:num_points].reshape(train_label[:num_points].shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data, y=test_label)

    # Update
    batch_size = int(len(train_data) / 100 * 5 + 0.5)  # 5% of original dataset
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:],
                   y=train_label[num_points:].reshape(train_label[num_points:].shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data, y=test_label)
    print(f'RBF\t'
          f'Training (DataPoints|Accuracy|Time): '
          f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
          f'Update (BatchSize|Accuracy|Time): '
          f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s')
Beispiel #10
0
def letter():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.03, n_components=500),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/letter-recognition.data')
    train_data = _data.values[:16000, 1:]
    train_label = _data.values[:16000, 0]
    test_data = _data.values[16000:, 1:]
    test_label = _data.values[16000:, 0]

    for i, lbl in enumerate(train_label):
        train_label[i] = ord(lbl) - 65  # '65' -> 'A'

    for i, lbl in enumerate(test_label):
        test_label[i] = ord(lbl) - 65  # '65' -> 'A'

    test_label = test_label.reshape(test_label.shape[0], 1).astype(np.int)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data,
                             y=test_label.reshape(test_label.shape[0], 1))
    print(
        f'Letter: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #11
0
def test_compute_score():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]])
    c = np.arange(1, 6)

    _score = svm._compute_score(score, c)

    assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [2, 2, 2, 2, 2]]))

    score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]])
    c = np.arange(1, 11)

    _score = svm._compute_score(score, c)

    assert_allclose(
        _score,
        np.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                    [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]]))
Beispiel #12
0
def test_predictions():
    dataset = load_iris()
    params = Hyperparameters(
        epsilon=0.0000001,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0.00001,
        kernel=None,
    )

    # Initialisation iFBTSVM
    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)

    # Training
    ifbtsvm.fit(X=dataset.data, y=dataset.target)

    # Prediction
    assert pytest.approx(ifbtsvm.score(X=dataset.data, y=dataset.target), rel=1e-3) == 0.973333
Beispiel #13
0
def gisette():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=None,  # RBFSampler(gamma=0.4, n_components=150),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data',
                             delim_whitespace=True)
    train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels',
                              delim_whitespace=True)
    test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data',
                            delim_whitespace=True)
    test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels',
                             delim_whitespace=True)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data.values,
                y=train_label.values.reshape(train_label.values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Gisette: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #14
0
def isolet():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=10,
        C3=10,
        C4=10,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.001, n_components=1000),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/isolet1+2+3+4.data')
    train_data = _data.values[:, :617]
    train_label = _data.values[:, 617]

    _data = pd.read_csv(f'{DATA_DIR}/isolet5.data')
    test_data = _data.values[:, :617]
    test_label = _data.values[:, 617]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'ISOLET: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Beispiel #15
0
from libifbtsvm import iFBTSVM
from libifbtsvm.models.ifbtsvm import Hyperparameters

if __name__ == '__main__':

    dataset = load_iris()
    params = Hyperparameters(
        epsilon=0.0000001,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0.00001,
        kernel=None,
    )

    # Initialisation iFBTSVM
    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=dataset.data, y=dataset.target)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=dataset.data, y=dataset.target)
    print(f'Accuracy iFBTSVM: {accuracy * 100.0}% Train time: {elapsed}s')