Ejemplo n.º 1
0
def do_comparison(X_train, y_train, X_test, y_test, dataset):
    # evaluate both svms on a given datasets, generate plots
    Cs = 10.**np.arange(-4, 1)
    multisvm = MultiSVM()
    svm = OneSlackSSVM(MultiClassClf(), tol=0.01)

    accs_pystruct, times_pystruct = eval_on_data(X_train,
                                                 y_train,
                                                 X_test,
                                                 y_test,
                                                 svm,
                                                 Cs=Cs)
    accs_svmstruct, times_svmstruct = eval_on_data(X_train,
                                                   y_train,
                                                   X_test,
                                                   y_test,
                                                   multisvm,
                                                   Cs=Cs)

    plot_curves(times_svmstruct,
                times_pystruct,
                Cs=Cs,
                title="learning time (s) %s" % dataset,
                filename="times_%s.pdf" % dataset)
    plot_curves(accs_svmstruct,
                accs_pystruct,
                Cs=Cs,
                title="accuracy %s" % dataset,
                filename="accs_%s.pdf" % dataset)
Ejemplo n.º 2
0
def test_class_weights_rescale_C():
    # check that our crammer-singer implementation with class weights and
    # rescale_C=True is the same as LinearSVC's c-s class_weight implementation
    raise SkipTest("class weight test needs update")
    from sklearn.svm import LinearSVC
    X, Y = make_blobs(n_samples=210,
                      centers=3,
                      random_state=1,
                      cluster_std=3,
                      shuffle=False)
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    X, Y = X[:170], Y[:170]

    weights = len(Y) / (np.bincount(Y) * len(np.unique(Y)))
    pbl_class_weight = MultiClassClf(n_features=3,
                                     n_classes=3,
                                     class_weight=weights,
                                     rescale_C=True)
    svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10, tol=1e-5)
    svm_class_weight.fit(X, Y)

    try:
        linearsvm = LinearSVC(multi_class='crammer_singer',
                              fit_intercept=False,
                              class_weight='balanced',
                              C=10)
        linearsvm.fit(X, Y)

        assert_array_almost_equal(svm_class_weight.w, linearsvm.coef_.ravel(),
                                  3)
    except TypeError:
        # travis has a really old sklearn version that doesn't support
        # class_weight in LinearSVC
        pass
Ejemplo n.º 3
0
def crammer_singer_classifier(X_train_bias, y_train, num_classes, n_jobs=2, C=1):
    model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=num_classes)
    # n-slack cutting plane ssvm
    n_slack_svm = NSlackSSVM(model, n_jobs=n_jobs, verbose=0, 
                            check_constraints=False, C=C,
                            batch_size=100, tol=1e-2)

    n_slack_svm.fit(X_train_bias, y_train)
    return n_slack_svm
Ejemplo n.º 4
0
def test_simple_1d_dataset_cutting_plane():
    # 10 1d datapoints between 0 and 1
    X = np.random.uniform(size=(30, 1))
    Y = (X.ravel() > 0.5).astype(np.int)
    # we have to add a constant 1 feature by hand :-/
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    pbl = MultiClassClf(n_features=2)
    svm = NSlackSSVM(pbl, check_constraints=True, C=10000)
    svm.fit(X, Y)
    assert_array_equal(Y, np.hstack(svm.predict(X)))
Ejemplo n.º 5
0
def test_equal_class_weights():
    # test that equal class weight is the same as no class weight
    X, Y = make_blobs(n_samples=80, centers=3, random_state=42)
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:]

    pbl = MultiClassClf(n_features=3, n_classes=3)
    svm = OneSlackSSVM(pbl, C=10)

    svm.fit(X_train, Y_train)
    predict_no_class_weight = svm.predict(X_test)

    pbl_class_weight = MultiClassClf(n_features=3,
                                     n_classes=3,
                                     class_weight=np.ones(3))
    svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10)
    svm_class_weight.fit(X_train, Y_train)
    predict_class_weight = svm_class_weight.predict(X_test)

    assert_array_equal(predict_no_class_weight, predict_class_weight)
    assert_array_almost_equal(svm.w, svm_class_weight.w)
Ejemplo n.º 6
0
def test_blobs_2d_subgradient():
    # make two gaussian blobs
    X, Y = make_blobs(n_samples=80, centers=3, random_state=42)
    # we have to add a constant 1 feature by hand :-/
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:]

    pbl = MultiClassClf(n_features=3, n_classes=3)
    svm = SubgradientSSVM(pbl, C=1000)

    svm.fit(X_train, Y_train)
    assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
Ejemplo n.º 7
0
def test_blobs_2d_cutting_plane():
    # make two gaussian blobs
    X, Y = make_blobs(n_samples=80, centers=3, random_state=42)
    # we have to add a constant 1 feature by hand :-/
    X = np.hstack([X, np.ones((X.shape[0], 1))])

    X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:]

    pbl = MultiClassClf(n_features=3, n_classes=3)
    svm = NSlackSSVM(pbl, check_constraints=True, C=1000, batch_size=1)

    svm.fit(X_train, Y_train)
    assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
Ejemplo n.º 8
0
def test_class_weights():
    X, Y = make_blobs(n_samples=210,
                      centers=3,
                      random_state=1,
                      cluster_std=3,
                      shuffle=False)
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    X, Y = X[:170], Y[:170]

    pbl = MultiClassClf(n_features=3, n_classes=3)
    svm = OneSlackSSVM(pbl, C=10)

    svm.fit(X, Y)

    weights = 1. / np.bincount(Y)
    weights *= len(weights) / np.sum(weights)
    pbl_class_weight = MultiClassClf(n_features=3,
                                     n_classes=3,
                                     class_weight=weights)
    svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10)
    svm_class_weight.fit(X, Y)

    assert_greater(f1_score(Y, svm_class_weight.predict(X), average='macro'),
                   f1_score(Y, svm.predict(X), average='macro'))
Ejemplo n.º 9
0
# Loss = toeplitz(np.arange(n_classes))
# Loss = np.ones((n_classes, n_classes))
np.fill_diagonal(Loss, 0.0)

# cond_loss = np.dot(Loss, np.expand_dims(prob, 1))
# opt = np.argmin(cond_loss)

# print(cond_loss)
# print("OPTIMAL IS %f", opt)

# we add a constant 1 feature for the bias
X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))])

model = MultiClassClf(n_features=X_train_bias.shape[1],
                      n_classes=n_classes,
                      Loss=Loss)
gmodel = GeneralizedMultiClassClf(n_features=X_train_bias.shape[1],
                                  n_classes=n_classes,
                                  Loss=Loss)

method = 'generalized'
# method = 'vanilla'

Cs = [1.]
# Cs = [6.5, 7., 7.5]

for C in Cs:
    fw_bc_svm = FrankWolfeSSVM(model,
                               C=C,
                               max_iter=300,
Ejemplo n.º 10
0
def test_crammer_singer_model_class_weight():
    X, Y = make_blobs(n_samples=80, centers=3, random_state=42)
    # we have to add a constant 1 feature by hand :-/
    X = np.hstack([X, np.ones((X.shape[0], 1))])

    pbl = MultiClassClf(n_features=3, n_classes=3, class_weight=[1, 2, 1])

    rng = np.random.RandomState(0)
    w = rng.uniform(size=pbl.size_joint_feature)
    # test inference energy
    x = X[0]
    y, energy = pbl.inference(x, w, return_energy=True)
    assert_almost_equal(energy, np.dot(w, pbl.joint_feature(x, y)))

    # test inference_result:
    energies = [np.dot(w, pbl.joint_feature(x, y_hat)) for y_hat in range(3)]
    assert_equal(np.argmax(energies), y)

    # test loss_augmented inference energy
    y, energy = pbl.loss_augmented_inference(x, Y[0], w, return_energy=True)
    assert_almost_equal(energy,
                        np.dot(w, pbl.joint_feature(x, y)) + pbl.loss(Y[0], y))

    # test batch versions
    Y_batch = pbl.batch_inference(X, w)
    Y_ = [pbl.inference(x, w) for x in X]
    assert_array_equal(Y_batch, Y_)

    Y_batch = pbl.batch_loss_augmented_inference(X, Y, w)
    Y_ = [pbl.loss_augmented_inference(x, y, w) for x, y in zip(X, Y)]
    assert_array_equal(Y_batch, Y_)

    loss_batch = pbl.batch_loss(Y, Y_)
    loss = [pbl.loss(y, y_) for y, y_ in zip(Y, Y_)]
    assert_array_equal(loss_batch, loss)
Ejemplo n.º 11
0
def test_crammer_singer_model_class_weight():
    X, Y = make_blobs(n_samples=80, centers=3, random_state=42)
    # we have to add a constant 1 feature by hand :-/
    X = np.hstack([X, np.ones((X.shape[0], 1))])

    pbl = MultiClassClf(n_features=3, n_classes=3, class_weight=[1, 2, 1])

    rng = np.random.RandomState(0)
    w = rng.uniform(size=pbl.size_psi)
    # test inference energy
    x = X[0]
    y, energy = pbl.inference(x, w, return_energy=True)
    assert_almost_equal(energy, np.dot(w, pbl.psi(x, y)))

    # test inference_result:
    energies = [np.dot(w, pbl.psi(x, y_hat)) for y_hat in xrange(3)]
    assert_equal(np.argmax(energies), y)

    # test loss_augmented inference energy
    y, energy = pbl.loss_augmented_inference(x, Y[0], w, return_energy=True)
    assert_almost_equal(energy, np.dot(w, pbl.psi(x, y)) + pbl.loss(Y[0], y))

    # test batch versions
    Y_batch = pbl.batch_inference(X, w)
    Y_ = [pbl.inference(x, w) for x in X]
    assert_array_equal(Y_batch, Y_)

    Y_batch = pbl.batch_loss_augmented_inference(X, Y, w)
    Y_ = [pbl.loss_augmented_inference(x, y, w) for x, y in zip(X, Y)]
    assert_array_equal(Y_batch, Y_)

    loss_batch = pbl.batch_loss(Y, Y_)
    loss = [pbl.loss(y, y_) for y, y_ in zip(Y, Y_)]
    assert_array_equal(loss_batch, loss)
Ejemplo n.º 12
0
import numpy as np
from sklearn.datasets import load_digits
from pystruct.models import MultiClassClf
from pystruct.learners import FrankWolfeSSVM

digits = load_digits()
X_train, y_train = digits.data, digits.target
X_train = X_train / 16.
y_train = y_train.astype(np.int)
model = MultiClassClf()

bcfw = FrankWolfeSSVM(model=model,
                      C=.1,
                      max_iter=1000,
                      tol=0.1,
                      verbose=3,
                      check_dual_every=10)
batch_fw = FrankWolfeSSVM(model=model,
                          C=.1,
                          max_iter=1000,
                          batch_mode=True,
                          tol=0.1,
                          verbose=3,
                          check_dual_every=10)

bcfw.fit(X_train, y_train)
batch_fw.fit(X_train, y_train)

itr = [i * 10 for i in range(0, 12)]
plt.plot(list(itr), list(d_gapBCFW), 'go-', label='line 1', linewidth=2)
plt.title('BCFW duality gap at each 10 timesteps')
Ejemplo n.º 13
0
    #Add the heart rate features
    x = np.append(x, window_with_timestamp_and_label[-2])
   
    X = np.append(X, np.reshape(x, (1,-1)), axis=0)
    
    y = np.append(y, label)

print("Finished feature extraction over {} windows".format(len(X)))
print("Unique labels found: {}".format(set(y)))
sys.stdout.flush()




y = y.astype(int)
model = MultiClassClf(n_features=X.shape[1], n_classes=2)


print("\n")
print("---------------------- Grid Search -------------------------")
#grid_search
param_grid = [
    {
      'C':[0.075,0.05,0.1],
      'batch_size':[100,1000,150],
      'tol':[1e-2,-10],
      'inactive_window':[50,100,10],
      'inactive_threshold':[1e-5,1e-4,1e-6]
    }
    ]