Example #1
0
def test_mdla_dict_update():
    n_kernels = 10
    # n_samples, n_features, n_dims = 100, 5, 3
    n_samples, n_features, n_dims = 80, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    dico = MultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, max_iter=10, n_jobs=-1
    ).fit(X)
    first_epoch = list(dico.kernels_)
    dico = dico.fit(X)
    second_epoch = list(dico.kernels_)
    for k, c in zip(first_epoch, second_epoch):
        assert (k - c).sum() != 0.0

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1
    ).fit(X)
    first_epoch = list(dico.kernels_)
    dico = dico.fit(X)
    second_epoch = list(dico.kernels_)
    for k, c in zip(first_epoch, second_epoch):
        assert (k - c).sum() != 0.0

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1
    ).partial_fit(X)
    first_epoch = list(dico.kernels_)
    dico = dico.partial_fit(X)
    second_epoch = list(dico.kernels_)
    for k, c in zip(first_epoch, second_epoch):
        assert (k - c).sum() != 0.0
Example #2
0
def test_multivariate_input_shape():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 7
    n_dims_w = 6
    Xw = [rng_global.randn(n_features, n_dims_w) for i in range(n_samples)]

    dico = MultivariateDictLearning(n_kernels=n_kernels).fit(X)
    for i in range(n_kernels):
        assert dico.kernels_[i].shape == (n_features, n_dims)

    dico = MultivariateDictLearning(n_kernels=n_kernels)
    assert_raises(ValueError, dico.fit, Xw)

    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).fit(X)
    for i in range(n_kernels):
        assert dico.kernels_[i].shape == (n_features, n_dims)

    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels)
    assert_raises(ValueError, dico.fit, Xw)

    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).partial_fit(X)
    for i in range(n_kernels):
        assert dico.kernels_[i].shape == (n_features, n_dims)

    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels)
    assert_raises(ValueError, dico.partial_fit, Xw)
Example #3
0
def test_mdla_shuffle():
    n_kernels = 8
    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels,
                    random_state=0, n_iter=3, n_nonzero_coefs=1,
                    verbose=5, shuffle=False)
    code = dico.fit(X).transform(X[0])
    assert_true(len(code[0]) <= 1)
Example #4
0
def test_mdla_shuffle():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8
    dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels,
                                             random_state=0,
                                             n_iter=3,
                                             n_nonzero_coefs=1,
                                             verbose=5,
                                             shuffle=False)
    code = dico.fit(X).transform(X[0])
    assert_true(len(code[0]) <= 1)
Example #5
0
def test_dict_init():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8
    d = [rng_global.randn(n_features, n_dims) for i in range(n_kernels)]
    for i in range(len(d)):
        d[i] /= np.linalg.norm(d[i], "fro")
    dico = MultivariateDictLearning(
        n_kernels=n_kernels,
        random_state=0,
        max_iter=1,
        n_nonzero_coefs=1,
        learning_rate=0.0,
        dict_init=d,
        verbose=5,
    ).fit(X)
    dico = dico.fit(X)
    for i in range(n_kernels):
        assert_array_almost_equal(dico.kernels_[i], d[i])
    # code = dico.fit(X).transform(X[0])
    # assert (len(code[0]) > 1)

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels,
        random_state=0,
        n_iter=1,
        n_nonzero_coefs=1,
        dict_init=d,
        verbose=1,
        learning_rate=0.0,
    ).fit(X)
    dico = dico.fit(X)
    for i in range(n_kernels):
        assert_array_almost_equal(dico.kernels_[i], d[i])
Example #6
0
def test_callback():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8

    def my_callback(loc):
        _ = loc["dict_obj"]

    dico = MultivariateDictLearning(
        n_kernels=n_kernels,
        random_state=0,
        max_iter=2,
        n_nonzero_coefs=1,
        callback=my_callback,
    )
    code = dico.fit(X).transform(X[0])
    assert len(code[0]) <= 1
    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels,
        random_state=0,
        n_iter=2,
        n_nonzero_coefs=1,
        callback=my_callback,
    )
    code = dico.fit(X).transform(X[0])
    assert len(code[0]) <= 1
Example #7
0
def test_n_kernels():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    dico = MultivariateDictLearning(
        random_state=0, max_iter=2, n_nonzero_coefs=1, verbose=5
    ).fit(X)
    assert len(dico.kernels_) == 2 * n_features

    dico = MiniBatchMultivariateDictLearning(
        random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5
    ).fit(X)
    assert len(dico.kernels_) == 2 * n_features

    dico = MiniBatchMultivariateDictLearning(
        random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5
    ).partial_fit(X)
    assert len(dico.kernels_) == 2 * n_features
Example #8
0
def test_mdla_nonzero_coef_errors():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8
    dico = MultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=0
    )
    assert_raises(ValueError, dico.fit, X)

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=n_kernels + 1
    )
    assert_raises(ValueError, dico.fit, X)
Example #9
0
def test_mdla_normalization():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8
    dico = MultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, max_iter=2, verbose=1
    ).fit(X)
    for k in dico.kernels_:
        assert_almost_equal(np.linalg.norm(k, "fro"), 1.0)

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, n_iter=2, verbose=1
    ).fit(X)
    for k in dico.kernels_:
        assert_almost_equal(np.linalg.norm(k, "fro"), 1.0)
Example #10
0
def test_X_array():
    n_samples, n_features, n_dims = 10, 5, 3
    n_kernels = 8
    X = rng_global.randn(n_samples, n_features, n_dims)
    dico = MultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5
    )
    code = dico.fit(X).transform(X[0])
    assert len(code[0]) <= 3

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5
    )
    code = dico.fit(X).transform(X[0])
    assert len(code[0]) <= 3
Example #11
0
def test_mdla_shapes():
    n_samples, n_features, n_dims = 10, 5, 3
    X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)]
    n_kernels = 8
    dico = MultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, max_iter=10, verbose=5
    ).fit(X)
    for i in range(n_kernels):
        assert dico.kernels_[i].shape == (n_features, n_dims)

    dico = MiniBatchMultivariateDictLearning(
        n_kernels=n_kernels, random_state=0, verbose=5, n_iter=10
    ).fit(X)
    for i in range(n_kernels):
        assert dico.kernels_[i].shape == (n_features, n_dims)
n_samples = len(X)
n_dims = X[0].shape[0]  # 22 electrodes
n_features = X[0].shape[1]  # 375, 3s of decimated signal at 125Hz
kernel_init_len = 80  # kernel size is 50
n_kernels = 60
n_nonzero_coefs = 2
learning_rate = 5.0
n_iter = 40  # 100
n_jobs, batch_size = -1, None  # n_cpu, 5*n_cpu
figname = "-60ker-K3-klen80-lr5.0-emm-all"

d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels,
                                      batch_size=batch_size,
                                      n_iter=n_iter,
                                      n_nonzero_coefs=n_nonzero_coefs,
                                      n_jobs=n_jobs,
                                      learning_rate=learning_rate,
                                      kernel_init_len=kernel_init_len,
                                      verbose=1,
                                      random_state=rng_global)
d = d.fit(X)

plot_objective_func(d.error_, n_iter, figname)

n_jobs = 4
plot_atom_usage(X, d.kernels_, n_nonzero_coefs, n_jobs, figname)

with open('EEG-savedico' + figname + '.pkl', 'wb') as f:
    o = {
        'kernels': d.kernels_,
        'error': d.error_,
    hfs = zeros((n_snr, n_experiments, n_iter))
    hcpa = zeros((n_snr, n_experiments, n_iter))
    hbc = zeros((n_snr, n_experiments, n_iter))
    hg = zeros((n_snr, n_experiments, n_iter))
    hfb = zeros((n_snr, n_experiments, n_iter))
    dr99 = zeros((n_snr, n_experiments, n_iter))
    dr97 = zeros((n_snr, n_experiments, n_iter))

    for i, s in enumerate(snr):
        for e in range(n_experiments):
            g, X, code = _generate_testbed(kernel_init_len,
                n_nonzero_coefs, n_kernels, n_samples, n_features,
                n_dims, s)
            d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, 
                batch_size=batch_size, n_iter=n_iter,
                n_nonzero_coefs=n_nonzero_coefs, callback=callback_recovery,
                n_jobs=n_jobs, learning_rate=learning_rate,
                kernel_init_len=kernel_init_len, verbose=1,
                random_state=rng_global)
            d.generating_dict = list(g)
            d.wc, d.wfs, d.hc, d.hfs = list(), list(), list(), list()
            d.wcpa, d.wbc, d.wg, d.wfb = list(), list(), list(), list()
            d.hcpa, d.hbc, d.hg, d.hfb = list(), list(), list(), list()
            d.dr99, d.dr97 = list(), list()
            print ('\nExperiment', e+1, 'on', n_experiments)
            d = d.fit(X)
            wc[i, e, :] = array(d.wc); wfs[i, e, :] = array(d.wfs)
            hc[i, e, :] = array(d.hc); hfs[i, e, :] = array(d.hfs)
            wcpa[i, e, :] = array(d.wcpa); wbc[i, e, :] = array(d.wbc)
            wg[i, e, :] = array(d.wg); wfb[i, e, :] = array(d.wfb)
            hcpa[i, e, :] = array(d.hcpa); hbc[i, e, :] = array(d.hbc)
            hg[i, e, :] = array(d.hg); hfb[i, e, :] = array(d.hfb)
n_kernels, max_iter, learning_rate = 50, 10, 1.5
n_jobs, batch_size = -1, None

iter_time, plot_separator, it_separator = list(), list(), 0

generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs,
                                             n_kernels, n_samples, n_features,
                                             n_dims)

# Online without mini-batch
print ('Processing ', max_iter, 'iterations in online mode, '
       'without multiprocessing:', end='')
batch_size, n_jobs =n_samples, 1
learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, 
                                batch_size=batch_size, n_iter=max_iter,
                                n_nonzero_coefs=n_nonzero_coefs,
                                n_jobs=n_jobs, learning_rate=learning_rate,
                                kernel_init_len=kernel_init_len, verbose=1,
                                dict_init=None, random_state=rng_global)
ts = time()
learned_dict = learned_dict.fit(X)
iter_time.append((time()-ts) / max_iter)
it_separator += 1
plot_separator.append(it_separator)

# Online with mini-batch
minibatch_range = [cpu_count()]
minibatch_range.extend([cpu_count()*i for i in range(3, 10, 2)])
n_jobs = -1
for mb in minibatch_range:
    print ('\nProcessing ', max_iter, 'iterations in online mode, with ',
           'minibatch size', mb, 'and', cpu_count(), 'processes:', end='')
Example #15
0
n_jobs, batch_size = -1, 10
detect_rate, wasserstein, objective_error = list(), list(), list()

generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs,
                                             n_kernels, n_samples, n_features,
                                             n_dims)

# # Create a dictionary
# dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)]
# for i in range(len(dict_init)):
#     dict_init[i] /= norm(dict_init[i], 'fro')
dict_init = None
    
learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, 
                                batch_size=batch_size, n_iter=n_iter,
                                n_nonzero_coefs=n_nonzero_coefs,
                                n_jobs=n_jobs, learning_rate=learning_rate,
                                kernel_init_len=kernel_init_len, verbose=1,
                                dict_init=dict_init, random_state=rng_global)

# Update learned dictionary at each iteration and compute a distance
# with the generating dictionary
for i in range(max_iter):
    learned_dict = learned_dict.partial_fit(X)
    # Compute the detection rate
    detect_rate.append(detection_rate(learned_dict.kernels_,
                                        generating_dict, 0.99))
    # Compute the Wasserstein distance
    wasserstein.append(emd(learned_dict.kernels_, generating_dict,
                        'chordal', scale=True))
    # Get the objective error
    objective_error.append(learned_dict.error_.sum())
    wc = zeros((n_snr, n_experiments, n_iter))
    wfs = zeros((n_snr, n_experiments, n_iter))
    hc = zeros((n_snr, n_experiments, n_iter))
    hfs = zeros((n_snr, n_experiments, n_iter))
    bd = zeros((n_snr, n_experiments, n_iter))
    dr99 = zeros((n_snr, n_experiments, n_iter))
    dr97 = zeros((n_snr, n_experiments, n_iter))

    for i, s in enumerate(snr):
        for e in range(n_experiments):
            g, X, code = _generate_testbed(kernel_init_len,
                n_nonzero_coefs, n_kernels, n_samples, n_features,
                n_dims, s)
            d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, 
                batch_size=batch_size, n_iter=n_iter,
                n_nonzero_coefs=n_nonzero_coefs, callback=callback_recovery,
                n_jobs=n_jobs, learning_rate=learning_rate,
                kernel_init_len=kernel_init_len, verbose=1,
                random_state=rng_global)
            d.generating_dict = list(g)
            d.wc, d.wfs, d.hc, d.hfs = list(), list(), list(), list()
            d.bd, d.dr99, d.dr97 = list(), list(), list()
            print ('\nExperiment', e+1, 'on', n_experiments)
            d = d.fit(X)
            wc[i, e, :] = array(d.wc); wfs[i, e, :] = array(d.wfs)
            hc[i, e, :] = array(d.hc); hfs[i, e, :] = array(d.hfs)
            dr99[i, e, :] = array(d.dr99); dr97[i, e, :] = array(d.dr97)
            bd[i, e,:] = array(d.bd)
    with open("expe_reco.pck", "w") as f:
        o = {'wc':wc, 'wfs':wfs, 'hc':hc, 'hfs':hfs, 'bd':bd, 'dr99':dr99, 'dr97':dr97}
        pickle.dump(o, f)
    plot_recov(wc, wfs, hc, hfs, bd, dr99, dr97, n_iter, "univariate_recov")
# Online without mini-batch
print(
    "Processing ",
    max_iter,
    "iterations in online mode, "
    "without multiprocessing:",
    end="",
)
batch_size, n_jobs = n_samples, 1
learned_dict = MiniBatchMultivariateDictLearning(
    n_kernels=n_kernels,
    batch_size=batch_size,
    n_iter=max_iter,
    n_nonzero_coefs=n_nonzero_coefs,
    n_jobs=n_jobs,
    learning_rate=learning_rate,
    kernel_init_len=kernel_init_len,
    verbose=1,
    dict_init=None,
    random_state=rng_global,
)
ts = time()
learned_dict = learned_dict.fit(X)
iter_time.append((time() - ts) / max_iter)
it_separator += 1
plot_separator.append(it_separator)

# Online with mini-batch
minibatch_range = [cpu_count()]
minibatch_range.extend([cpu_count() * i for i in range(3, 10, 2)])
n_jobs = -1
Example #18
0
n_jobs, batch_size = -1, 10
detection_rate, wasserstein, objective_error = list(), list(), list()

generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs,
                                             n_kernels, n_samples, n_features,
                                             n_dims)

# # Create a dictionary
# dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)]
# for i in range(len(dict_init)):
#     dict_init[i] /= norm(dict_init[i], 'fro')
dict_init = None
    
learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, 
                                batch_size=batch_size, n_iter=n_iter,
                                n_nonzero_coefs=n_nonzero_coefs,
                                n_jobs=n_jobs, learning_rate=learning_rate,
                                kernel_init_len=kernel_init_len, verbose=1,
                                dict_init=dict_init, random_state=rng_global)

# Update learned dictionary at each iteration and compute a distance
# with the generating dictionary
for i in range(max_iter):
    learned_dict = learned_dict.partial_fit(X)
    # Compute the detection rate
    detection_rate.append(detectionRate(learned_dict.kernels_,
                                        generating_dict, 0.99))
    # Compute the Wasserstein distance
    wasserstein.append(emd(learned_dict.kernels_, generating_dict,
                        'chordal', scale=True))
    # Get the objective error
    objective_error.append(learned_dict.error_.sum())