Example #1
0
def mnist():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=10,
        C3=10,
        C4=10,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.01, n_components=1000),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/MNIST_train_data.csv')
    train_label = pd.read_csv(f'{DATA_DIR}/MNIST_train_label.csv')
    test_data = pd.read_csv(f'{DATA_DIR}/MNIST_test_data.csv')
    test_label = pd.read_csv(f'{DATA_DIR}/MNIST_test_label.csv')

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data.values,
                y=train_label.values.reshape(train_label.values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'MNIST: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_iris():
    params = Hyperparameters(fuzzy=0.5, epsilon=0.5)
    dataset = load_iris()

    x_p = dataset.data[np.where(dataset.target == 0)]
    x_n = dataset.data[np.where(dataset.target == 1)]

    membership = fuzzy_membership(params=params, class_p=x_p, class_n=x_n)

    _truth = np.asarray(
        [[0.80251715], [0.97998912], [0.812318], [0.9807224], [0.98189354],
         [0.99937236], [0.96043452], [0.56903383], [0.97212066], [0.96048151],
         [0.60125974], [0.99984087], [0.97872578], [0.99542787], [0.97290636],
         [0.95614206], [0.99496593], [0.99843223], [0.9804573], [0.9889402],
         [0.95800151], [0.99971397], [0.96464217], [0.99565949], [0.99577327],
         [0.97731756], [0.90653641], [0.84949026], [0.999488], [0.94963879],
         [0.97055683], [0.95245829], [0.9980525], [0.94679333], [0.98472415],
         [0.9759757], [0.92815776], [0.98848116], [0.99733116], [0.99105587],
         [0.99493091], [0.9970333], [0.99903366], [0.61157738], [0.9992254],
         [0.99909612], [1.], [0.99982867], [0.5], [0.99991744]])
    assert_allclose(membership.sn, _truth)

    _truth = np.asarray(
        [[0.99992168], [0.9917287], [0.99376558], [0.98430655], [0.99974305],
         [0.95670035], [0.99387979], [1.], [0.91239646], [0.99587275],
         [0.98883625], [0.99914972], [0.98704236], [0.85665848], [0.78204237],
         [0.56573912], [0.96225372], [0.99991496], [0.90482216], [0.99526856],
         [0.99049658], [0.99760092], [0.96536234], [0.99558788], [0.98841007],
         [0.99139502], [0.99961253], [0.99956062], [0.99959803], [0.99427123],
         [0.99396476], [0.99323856], [0.94598095], [0.85250365], [0.99691726],
         [0.99689705], [0.98412401], [0.99910674], [0.93096412], [0.99996815],
         [0.99976035], [0.5], [0.95871688], [0.99539269], [0.9728796],
         [0.98887459], [0.99416071], [0.98977514], [0.99446211], [0.99990073]])

    assert_allclose(membership.sp, _truth)
Example #3
0
def sea():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=1,
        C3=10,
        C4=1,
        max_iter=50,
        phi=0,
        kernel=None,  # RBFSampler(gamma=0.6, n_components=1400),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/SEA/10K/SEA10K.csv')
    train_data = _data.values[:10000, 0:3]
    train_label = _data.values[:10000, 3:]
    test_data = _data.values[10000:, 0:3]
    test_label = _data.values[10000:, 3:]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'SEA 10K: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Example #4
0
def gisette():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0,
        kernel=None,
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data',
                             delim_whitespace=True)
    train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels',
                              delim_whitespace=True)
    test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data',
                            delim_whitespace=True)
    test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels',
                             delim_whitespace=True)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=1)

    # Training
    num_points = 500
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points].values,
                y=train_label[:num_points].values.reshape(
                    train_label[:num_points].values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values)

    print(
        f'Gisette\t'
        f'Training (DataPoints|Accuracy|Time): '
        f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
    )

    # Update
    batch_size = 500
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:].values,
                   y=train_label[num_points:].values.reshape(
                       train_label[num_points:].values.shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Gisette\t'
        f'Update (BatchSize|Accuracy|Time): '
        f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s'
    )
Example #5
0
def test_compute_score_none():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = None
    c = np.arange(1, 6)

    _score = svm._compute_score(score, c)

    assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
Example #6
0
def border():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.01,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=500,
        phi=0.00001,
        kernel=RBFSampler(gamma=0.01, n_components=10),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/Border_train_data.csv')
    train_label = pd.read_csv(f'{DATA_DIR}/Border_train_label.csv')
    test_data = pd.read_csv(f'{DATA_DIR}/Border_test_data.csv')
    test_label = pd.read_csv(f'{DATA_DIR}/Border_test_label.csv')

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    num_points = 60
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points].values,
                y=train_label[:num_points].values.reshape(
                    train_label[:num_points].values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data.values, y=test_label.values)

    print(
        f'Border\t'
        f'Training (DataPoints|Accuracy|Time): '
        f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
    )

    # Update
    batch_size = 100
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:].values,
                   y=train_label[num_points:].values.reshape(
                       train_label[num_points:].values.shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Border\t'
        f'Update (BatchSize|Accuracy|Time): '
        f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s'
    )
Example #7
0
def test_compute_score_none():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = None
    c = np.zeros((5,))
    for i in range(5):
        c[i] = i + 1

    _score = svm._compute_score(score, c)

    assert np.array_equal(_score, np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]]))
Example #8
0
def usps():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.01, n_components=1000),
        forget_score=10,
    )

    train_data = np.zeros((7291, 256))
    train_label = np.zeros((7291, ))
    with open(f'{DATA_DIR}/usps', 'r') as f_in:
        for i, line in enumerate(f_in):
            split = line.split(' ')
            train_label[i] = split[0]
            for s in split[1:]:
                if s == '\n':
                    continue
                feats = s.split(':')
                train_data[i, int(feats[0]) - 1] = float(feats[1])

    test_data = np.zeros((2007, 256))
    test_label = np.zeros((2007, ))
    with open(f'{DATA_DIR}/usps.t', 'r') as f_in:
        for i, line in enumerate(f_in):
            split = line.split(' ')
            test_label[i] = split[0]
            for s in split[1:]:
                if s == '\n':
                    continue
                feats = s.split(':')
                test_data[i, int(feats[0]) - 1] = float(feats[1])

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'USPS: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_no_noise(valid_ensemble_a, valid_ensemble_b):
    params = Hyperparameters(fuzzy=0.5, epsilon=0.5)

    _fuzzy = fuzzy_membership(params=params,
                              class_p=valid_ensemble_a,
                              class_n=valid_ensemble_b)

    _truth = np.asarray([[1.0], [0.5], [0.5]])

    assert np.isclose(_fuzzy.sp, _truth).all()
    assert np.isclose(_fuzzy.sn, _truth).all()

    assert not np.nonzero(_fuzzy.noise_p)[0].size > 0
    assert not np.nonzero(_fuzzy.noise_n)[0].size > 0
Example #10
0
def rbf():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=250,
        phi=0,
        kernel=RBFSampler(gamma=0.45, n_components=300),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/RBF/10K/RBF10K.csv')
    train_data = _data.values[:10000, 0:10]
    train_label = _data.values[:10000, 10:]
    test_data = _data.values[10000:, 0:10]
    test_label = _data.values[10000:, 10:]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    num_points = 5000
    before = time.monotonic()
    ifbtsvm.fit(X=train_data[:num_points],
                y=train_label[:num_points].reshape(train_label[:num_points].shape[0]))
    after = time.monotonic()
    elapsed = (after - before)
    accuracy_1 = ifbtsvm.score(X=test_data, y=test_label)

    # Update
    batch_size = int(len(train_data) / 100 * 5 + 0.5)  # 5% of original dataset
    before = time.monotonic()
    ifbtsvm.update(X=train_data[num_points:],
                   y=train_label[num_points:].reshape(train_label[num_points:].shape[0]),
                   batch_size=batch_size)
    after = time.monotonic()
    u_elapsed = after - before

    # Prediction
    accuracy_2 = ifbtsvm.score(X=test_data, y=test_label)
    print(f'RBF\t'
          f'Training (DataPoints|Accuracy|Time): '
          f'{num_points}|{np.around(accuracy_1 * 100.0, 3)}%|{np.around(elapsed, 3)}s\t'
          f'Update (BatchSize|Accuracy|Time): '
          f'{batch_size}|{np.around(accuracy_2 * 100.0, 3)}%|{np.around(u_elapsed, 3)}s')
def test_fuzzy_membership_noise(valid_ensemble_a, valid_ensemble_b):
    params = Hyperparameters(fuzzy=0.5, epsilon=0.5)

    # Update ensemble "b" to have a point closer to center of "a"
    valid_ensemble_b[0][0] = 0.8
    valid_ensemble_b[0][1] = 0.8

    _fuzzy = fuzzy_membership(params=params,
                              class_p=valid_ensemble_a,
                              class_n=valid_ensemble_b)

    _truth_p = np.asarray([[1.0], [0.5], [0.5]])
    _truth_n = np.asarray([[0.5], [1.0], [1.0]])

    assert np.isclose(_fuzzy.sp, _truth_p).all()
    assert np.isclose(_fuzzy.sn, _truth_n).all()

    assert not np.nonzero(_fuzzy.noise_p)[0].size > 0
    assert np.nonzero(_fuzzy.noise_n)[0] == [0]
Example #12
0
def letter():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.03, n_components=500),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/letter-recognition.data')
    train_data = _data.values[:16000, 1:]
    train_label = _data.values[:16000, 0]
    test_data = _data.values[16000:, 1:]
    test_label = _data.values[16000:, 0]

    for i, lbl in enumerate(train_label):
        train_label[i] = ord(lbl) - 65  # '65' -> 'A'

    for i, lbl in enumerate(test_label):
        test_label[i] = ord(lbl) - 65  # '65' -> 'A'

    test_label = test_label.reshape(test_label.shape[0], 1).astype(np.int)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label)
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data,
                             y=test_label.reshape(test_label.shape[0], 1))
    print(
        f'Letter: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Example #13
0
def test_compute_score():
    params = Hyperparameters(phi=0.5)
    svm = iFBTSVM(parameters=params)

    score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]])
    c = np.arange(1, 6)

    _score = svm._compute_score(score, c)

    assert_allclose(_score, np.asarray([[1, 2, 3, 4, 5], [2, 2, 2, 2, 2]]))

    score = np.asarray([[1, 2, 3, 4, 5], [1, 1, 1, 1, 1]])
    c = np.arange(1, 11)

    _score = svm._compute_score(score, c)

    assert_allclose(
        _score,
        np.asarray([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                    [2, 2, 2, 2, 2, 1, 1, 1, 1, 1]]))
Example #14
0
def gisette():

    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=8,
        C2=2,
        C3=8,
        C4=2,
        max_iter=50,
        phi=0,
        kernel=None,  # RBFSampler(gamma=0.4, n_components=150),
        forget_score=10,
    )

    train_data = pd.read_csv(f'{DATA_DIR}/gisette_train.data',
                             delim_whitespace=True)
    train_label = pd.read_csv(f'{DATA_DIR}/gisette_train.labels',
                              delim_whitespace=True)
    test_data = pd.read_csv(f'{DATA_DIR}/gisette_valid.data',
                            delim_whitespace=True)
    test_label = pd.read_csv(f'{DATA_DIR}/gisette_valid.labels',
                             delim_whitespace=True)

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data.values,
                y=train_label.values.reshape(train_label.values.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data.values, y=test_label.values)
    print(
        f'Gisette: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
Example #15
0
def isolet():
    params = Hyperparameters(
        epsilon=1e-10,
        fuzzy=0.1,
        C1=10,
        C2=10,
        C3=10,
        C4=10,
        max_iter=50,
        phi=0,
        kernel=RBFSampler(gamma=0.001, n_components=1000),
        forget_score=10,
    )

    _data = pd.read_csv(f'{DATA_DIR}/isolet1+2+3+4.data')
    train_data = _data.values[:, :617]
    train_label = _data.values[:, 617]

    _data = pd.read_csv(f'{DATA_DIR}/isolet5.data')
    test_data = _data.values[:, :617]
    test_label = _data.values[:, 617]

    ifbtsvm = iFBTSVM(parameters=params, n_jobs=4)

    # Training
    before = time.monotonic()
    ifbtsvm.fit(X=train_data, y=train_label.reshape(train_label.shape[0]))
    after = time.monotonic()
    elapsed = (after - before)

    # Prediction
    accuracy = ifbtsvm.score(X=test_data, y=test_label)
    print(
        f'ISOLET: Accuracy: {np.around(accuracy * 100.0, 3)}% Train time: {np.around(elapsed, 3)}s'
    )
    return accuracy, np.around(elapsed, 3)
def test_fuzzy_membership_epsilon_error(valid_ensemble_a, valid_ensemble_b, u,
                                        epsilon):
    params = Hyperparameters(fuzzy=u, epsilon=epsilon)

    with pytest.raises(ValueError):
        _ = fuzzy_membership(params, valid_ensemble_a, valid_ensemble_b)