def test_mdla_dict_update(): n_kernels = 10 # n_samples, n_features, n_dims = 100, 5, 3 n_samples, n_features, n_dims = 80, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=10, n_jobs=-1 ).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1 ).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1 ).partial_fit(X) first_epoch = list(dico.kernels_) dico = dico.partial_fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0
def test_multivariate_input_shape(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 7 n_dims_w = 6 Xw = [rng_global.randn(n_features, n_dims_w) for i in range(n_samples)] dico = MultivariateDictLearning(n_kernels=n_kernels).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.fit, Xw) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.fit, Xw) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).partial_fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.partial_fit, Xw)
def test_mdla_shuffle(): n_kernels = 8 dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=1, verbose=5, shuffle=False) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 1)
def test_mdla_shuffle(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=1, verbose=5, shuffle=False) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 1)
def test_dict_init(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 d = [rng_global.randn(n_features, n_dims) for i in range(n_kernels)] for i in range(len(d)): d[i] /= np.linalg.norm(d[i], "fro") dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=1, n_nonzero_coefs=1, learning_rate=0.0, dict_init=d, verbose=5, ).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i]) # code = dico.fit(X).transform(X[0]) # assert (len(code[0]) > 1) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=1, n_nonzero_coefs=1, dict_init=d, verbose=1, learning_rate=0.0, ).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i])
def test_callback(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 def my_callback(loc): _ = loc["dict_obj"] dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=1, callback=my_callback, ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 1 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=1, callback=my_callback, ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 1
def test_n_kernels(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dico = MultivariateDictLearning( random_state=0, max_iter=2, n_nonzero_coefs=1, verbose=5 ).fit(X) assert len(dico.kernels_) == 2 * n_features dico = MiniBatchMultivariateDictLearning( random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5 ).fit(X) assert len(dico.kernels_) == 2 * n_features dico = MiniBatchMultivariateDictLearning( random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5 ).partial_fit(X) assert len(dico.kernels_) == 2 * n_features
def test_mdla_nonzero_coef_errors(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=0 ) assert_raises(ValueError, dico.fit, X) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=n_kernels + 1 ) assert_raises(ValueError, dico.fit, X)
def test_mdla_normalization(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, verbose=1 ).fit(X) for k in dico.kernels_: assert_almost_equal(np.linalg.norm(k, "fro"), 1.0) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, verbose=1 ).fit(X) for k in dico.kernels_: assert_almost_equal(np.linalg.norm(k, "fro"), 1.0)
def test_X_array(): n_samples, n_features, n_dims = 10, 5, 3 n_kernels = 8 X = rng_global.randn(n_samples, n_features, n_dims) dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5 ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 3 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5 ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 3
def test_mdla_shapes(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=10, verbose=5 ).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, verbose=5, n_iter=10 ).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims)
n_samples = len(X) n_dims = X[0].shape[0] # 22 electrodes n_features = X[0].shape[1] # 375, 3s of decimated signal at 125Hz kernel_init_len = 80 # kernel size is 50 n_kernels = 60 n_nonzero_coefs = 2 learning_rate = 5.0 n_iter = 40 # 100 n_jobs, batch_size = -1, None # n_cpu, 5*n_cpu figname = "-60ker-K3-klen80-lr5.0-emm-all" d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, random_state=rng_global) d = d.fit(X) plot_objective_func(d.error_, n_iter, figname) n_jobs = 4 plot_atom_usage(X, d.kernels_, n_nonzero_coefs, n_jobs, figname) with open('EEG-savedico' + figname + '.pkl', 'wb') as f: o = { 'kernels': d.kernels_, 'error': d.error_,
hfs = zeros((n_snr, n_experiments, n_iter)) hcpa = zeros((n_snr, n_experiments, n_iter)) hbc = zeros((n_snr, n_experiments, n_iter)) hg = zeros((n_snr, n_experiments, n_iter)) hfb = zeros((n_snr, n_experiments, n_iter)) dr99 = zeros((n_snr, n_experiments, n_iter)) dr97 = zeros((n_snr, n_experiments, n_iter)) for i, s in enumerate(snr): for e in range(n_experiments): g, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_features, n_dims, s) d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, callback=callback_recovery, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, random_state=rng_global) d.generating_dict = list(g) d.wc, d.wfs, d.hc, d.hfs = list(), list(), list(), list() d.wcpa, d.wbc, d.wg, d.wfb = list(), list(), list(), list() d.hcpa, d.hbc, d.hg, d.hfb = list(), list(), list(), list() d.dr99, d.dr97 = list(), list() print ('\nExperiment', e+1, 'on', n_experiments) d = d.fit(X) wc[i, e, :] = array(d.wc); wfs[i, e, :] = array(d.wfs) hc[i, e, :] = array(d.hc); hfs[i, e, :] = array(d.hfs) wcpa[i, e, :] = array(d.wcpa); wbc[i, e, :] = array(d.wbc) wg[i, e, :] = array(d.wg); wfb[i, e, :] = array(d.wfb) hcpa[i, e, :] = array(d.hcpa); hbc[i, e, :] = array(d.hbc) hg[i, e, :] = array(d.hg); hfb[i, e, :] = array(d.hfb)
n_kernels, max_iter, learning_rate = 50, 10, 1.5 n_jobs, batch_size = -1, None iter_time, plot_separator, it_separator = list(), list(), 0 generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_features, n_dims) # Online without mini-batch print ('Processing ', max_iter, 'iterations in online mode, ' 'without multiprocessing:', end='') batch_size, n_jobs =n_samples, 1 learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=max_iter, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, dict_init=None, random_state=rng_global) ts = time() learned_dict = learned_dict.fit(X) iter_time.append((time()-ts) / max_iter) it_separator += 1 plot_separator.append(it_separator) # Online with mini-batch minibatch_range = [cpu_count()] minibatch_range.extend([cpu_count()*i for i in range(3, 10, 2)]) n_jobs = -1 for mb in minibatch_range: print ('\nProcessing ', max_iter, 'iterations in online mode, with ', 'minibatch size', mb, 'and', cpu_count(), 'processes:', end='')
n_jobs, batch_size = -1, 10 detect_rate, wasserstein, objective_error = list(), list(), list() generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_features, n_dims) # # Create a dictionary # dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)] # for i in range(len(dict_init)): # dict_init[i] /= norm(dict_init[i], 'fro') dict_init = None learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, dict_init=dict_init, random_state=rng_global) # Update learned dictionary at each iteration and compute a distance # with the generating dictionary for i in range(max_iter): learned_dict = learned_dict.partial_fit(X) # Compute the detection rate detect_rate.append(detection_rate(learned_dict.kernels_, generating_dict, 0.99)) # Compute the Wasserstein distance wasserstein.append(emd(learned_dict.kernels_, generating_dict, 'chordal', scale=True)) # Get the objective error objective_error.append(learned_dict.error_.sum())
wc = zeros((n_snr, n_experiments, n_iter)) wfs = zeros((n_snr, n_experiments, n_iter)) hc = zeros((n_snr, n_experiments, n_iter)) hfs = zeros((n_snr, n_experiments, n_iter)) bd = zeros((n_snr, n_experiments, n_iter)) dr99 = zeros((n_snr, n_experiments, n_iter)) dr97 = zeros((n_snr, n_experiments, n_iter)) for i, s in enumerate(snr): for e in range(n_experiments): g, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_features, n_dims, s) d = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, callback=callback_recovery, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, random_state=rng_global) d.generating_dict = list(g) d.wc, d.wfs, d.hc, d.hfs = list(), list(), list(), list() d.bd, d.dr99, d.dr97 = list(), list(), list() print ('\nExperiment', e+1, 'on', n_experiments) d = d.fit(X) wc[i, e, :] = array(d.wc); wfs[i, e, :] = array(d.wfs) hc[i, e, :] = array(d.hc); hfs[i, e, :] = array(d.hfs) dr99[i, e, :] = array(d.dr99); dr97[i, e, :] = array(d.dr97) bd[i, e,:] = array(d.bd) with open("expe_reco.pck", "w") as f: o = {'wc':wc, 'wfs':wfs, 'hc':hc, 'hfs':hfs, 'bd':bd, 'dr99':dr99, 'dr97':dr97} pickle.dump(o, f) plot_recov(wc, wfs, hc, hfs, bd, dr99, dr97, n_iter, "univariate_recov")
# Online without mini-batch print( "Processing ", max_iter, "iterations in online mode, " "without multiprocessing:", end="", ) batch_size, n_jobs = n_samples, 1 learned_dict = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, batch_size=batch_size, n_iter=max_iter, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, dict_init=None, random_state=rng_global, ) ts = time() learned_dict = learned_dict.fit(X) iter_time.append((time() - ts) / max_iter) it_separator += 1 plot_separator.append(it_separator) # Online with mini-batch minibatch_range = [cpu_count()] minibatch_range.extend([cpu_count() * i for i in range(3, 10, 2)]) n_jobs = -1
n_jobs, batch_size = -1, 10 detection_rate, wasserstein, objective_error = list(), list(), list() generating_dict, X, code = _generate_testbed(kernel_init_len, n_nonzero_coefs, n_kernels, n_samples, n_features, n_dims) # # Create a dictionary # dict_init = [rand(kernel_init_len, n_dims) for i in range(n_kernels)] # for i in range(len(dict_init)): # dict_init[i] /= norm(dict_init[i], 'fro') dict_init = None learned_dict = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, batch_size=batch_size, n_iter=n_iter, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, verbose=1, dict_init=dict_init, random_state=rng_global) # Update learned dictionary at each iteration and compute a distance # with the generating dictionary for i in range(max_iter): learned_dict = learned_dict.partial_fit(X) # Compute the detection rate detection_rate.append(detectionRate(learned_dict.kernels_, generating_dict, 0.99)) # Compute the Wasserstein distance wasserstein.append(emd(learned_dict.kernels_, generating_dict, 'chordal', scale=True)) # Get the objective error objective_error.append(learned_dict.error_.sum())