예제 #1
0
def main(data_name, method, dimZ, dimH, n_channel, batch_size, K_mc,
         checkpoint, lbd):
    # set up dataset specific stuff
    from config import config
    labels, n_iter, dimX, shape_high, ll = config(data_name, n_channel)

    if data_name == 'mnist':
        from mnist import load_mnist

    if data_name == 'notmnist':
        from notmnist import load_notmnist

    # import functionalities
    if method == 'onlinevi':
        from bayesian_generator import generator_head, generator_shared, \
            generator, construct_gen
        from onlinevi import construct_optimizer, init_shared_prior, \
            update_shared_prior, update_q_sigma
    if method in ['ewc', 'noreg', 'laplace', 'si']:
        from generator import generator_head, generator_shared, generator, construct_gen
        if method in ['ewc', 'noreg']:
            from vae_ewc import construct_optimizer, lowerbound
        if method == 'ewc': from vae_ewc import update_ewc_loss, compute_fisher
        if method == 'laplace':
            from vae_laplace import construct_optimizer, lowerbound
            from vae_laplace import update_laplace_loss, compute_fisher, init_fisher_accum
        if method == 'si':
            from vae_si import construct_optimizer, lowerbound, update_si_reg

    # then define model
    n_layers_shared = 2
    batch_size_ph = tf.placeholder(tf.int32, shape=(), name='batch_size')
    dec_shared = generator_shared(dimX, dimH, n_layers_shared, 'sigmoid',
                                  'gen')

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    string = method
    if method in ['ewc', 'laplace', 'si']:
        string = string + '_lbd%.1f' % lbd
    if method == 'onlinevi' and K_mc > 1:
        string = string + '_K%d' % K_mc
    path_name = data_name + '_%s/' % string
    if not os.path.isdir('save/'):
        os.mkdir('save/')
    if not os.path.isdir('save/' + path_name):
        os.mkdir('save/' + path_name)
        print 'create path save/' + path_name
    filename = 'save/' + path_name + 'checkpoint'
    if checkpoint < 0:
        print 'training from scratch'
        old_var_list = init_variables(sess)
    else:
        load_params(sess, filename, checkpoint)
    checkpoint += 1

    # visualise the samples
    N_gen = 10**2
    path = 'figs/' + path_name
    if not os.path.isdir('figs/'):
        os.mkdir('figs/')
    if not os.path.isdir(path):
        os.mkdir(path)
        print 'create path ' + path
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, dimX), name='x_ph')

    # now start fitting
    N_task = len(labels)
    gen_ops = []
    X_valid_list = []
    X_test_list = []
    eval_func_list = []
    result_list = []
    if method == 'onlinevi':
        shared_prior_params = init_shared_prior()
    if method in ['ewc', 'noreg']:
        ewc_loss = 0.0
    if method == 'laplace':
        F_accum = init_fisher_accum()
        laplace_loss = 0.0
    if method == 'si':
        old_params_shared = None
        si_reg = None
    n_layers_head = 2
    n_layers_enc = n_layers_shared + n_layers_head - 1
    for task in xrange(1, N_task + 1):
        # first load data
        if data_name == 'mnist':
            X_train, X_test, _, _ = load_mnist(digits=labels[task - 1],
                                               conv=False)
        if data_name == 'notmnist':
            X_train, X_test, _, _ = load_notmnist(data_path,
                                                  digits=labels[task - 1],
                                                  conv=False)
        N_train = int(X_train.shape[0] * 0.9)
        X_valid_list.append(X_train[N_train:])
        X_train = X_train[:N_train]
        X_test_list.append(X_test)

        # define the head net and the generator ops
        dec = generator(
            generator_head(dimZ, dimH, n_layers_head, 'gen_%d' % task),
            dec_shared)
        enc = encoder(dimX, dimH, dimZ, n_layers_enc, 'enc_%d' % task)
        gen_ops.append(construct_gen(dec, dimZ, sampling=False)(N_gen))
        print 'construct eval function...'
        eval_func_list.append(construct_eval_func(X_ph, enc, dec, ll, \
                                                  batch_size_ph, K=100, sample_W=False))

        # then construct loss func and fit func
        print 'construct fit function...'
        if method == 'onlinevi':
            fit = construct_optimizer(X_ph, enc, dec, ll, X_train.shape[0], batch_size_ph, \
                                      shared_prior_params, task, K_mc)
        if method in ['ewc', 'noreg']:
            bound = lowerbound(X_ph, enc, dec, ll)
            fit = construct_optimizer(X_ph, batch_size_ph, bound,
                                      X_train.shape[0], ewc_loss)
            if method == 'ewc':
                fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound,
                                                  X_train.shape[0])

        if method == 'laplace':
            bound = lowerbound(X_ph, enc, dec, ll)
            fit = construct_optimizer(X_ph, batch_size_ph, bound,
                                      X_train.shape[0], laplace_loss)
            fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound,
                                              X_train.shape[0])

        if method == 'si':
            bound = lowerbound(X_ph, enc, dec, ll)
            fit, shared_var_list = construct_optimizer(X_ph, batch_size_ph,
                                                       bound, X_train.shape[0],
                                                       si_reg,
                                                       old_params_shared, lbd)
            if old_params_shared is None:
                old_params_shared = sess.run(shared_var_list)

        # initialise all the uninitialised stuff
        old_var_list = init_variables(sess, old_var_list)

        # start training for each task
        if method == 'si':
            new_params_shared, w_params_shared = fit(sess, X_train, n_iter, lr)
        else:
            fit(sess, X_train, n_iter, lr)

        # plot samples
        x_gen_list = sess.run(gen_ops, feed_dict={batch_size_ph: N_gen})
        for i in xrange(len(x_gen_list)):
            plot_images(x_gen_list[i], shape_high, path, \
                        data_name + '_gen_task%d_%d' % (task, i + 1))

        x_list = [x_gen_list[i][:1] for i in xrange(len(x_gen_list))]
        x_list = np.concatenate(x_list, 0)
        tmp = np.zeros([10, dimX])
        tmp[:task] = x_list
        if task == 1:
            x_gen_all = tmp
        else:
            x_gen_all = np.concatenate([x_gen_all, tmp], 0)

        # print test-ll on all tasks
        tmp_list = []
        for i in xrange(len(eval_func_list)):
            print 'task %d' % (i + 1),
            test_ll = eval_func_list[i](sess, X_valid_list[i])
            tmp_list.append(test_ll)
        result_list.append(tmp_list)

        # save param values
        save_params(sess, filename, checkpoint)
        checkpoint += 1

        # update regularisers/priors
        if method == 'ewc':
            # update EWC loss
            print 'update ewc loss...'
            X_batch = X_train[np.random.permutation(range(
                X_train.shape[0]))[:batch_size]]
            ewc_loss = update_ewc_loss(sess, ewc_loss, var_list, fisher, lbd,
                                       X_batch)
        if method == 'laplace':
            # update EWC loss
            print 'update laplace loss...'
            X_batch = X_train[np.random.permutation(range(
                X_train.shape[0]))[:batch_size]]
            laplace_loss, F_accum = update_laplace_loss(
                sess, F_accum, var_list, fisher, lbd, X_batch)
        if method == 'onlinevi':
            # update prior
            print 'update prior...'
            shared_prior_params = update_shared_prior(sess,
                                                      shared_prior_params)
            # reset the variance of q
            update_q_sigma(sess)

        if method == 'si':
            # update regularisers/priors
            print 'update SI big omega matrices...'
            si_reg, _ = update_si_reg(sess, si_reg, new_params_shared,
                                      old_params_shared, w_params_shared)
            old_params_shared = new_params_shared

    plot_images(x_gen_all, shape_high, path, data_name + '_gen_all')

    for i in xrange(len(result_list)):
        print result_list[i]

    # save results
    fname = 'results/' + data_name + '_%s.pkl' % string
    import pickle
    pickle.dump(result_list, open(fname, 'wb'))
    print 'test-ll results saved in', fname
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
keras.backend.set_session(sess)

# the data, shuffled and split between train and test sets
data_name = 'notmnist'
if data_name == 'mnist':
    from mnist import load_mnist
    x_train, x_test, y_train, y_test = load_mnist()
if data_name == 'notmnist':
    from notmnist import load_notmnist
    data_path = './notMNIST_small'
    x_train, x_test, y_train, y_test = [], [], [], []
    for i in range(10):
        a, b, c, d = load_notmnist(data_path, [i], ratio=0.995)
        x_train.append(a); x_test.append(b)
        y_train.append(c); y_test.append(d)
    x_train = np.concatenate(x_train, 0)
    x_test = np.concatenate(x_test, 0)
    y_train = np.concatenate(y_train, 0)
    y_test = np.concatenate(y_test, 0)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

model = Sequential()
dimH = 1000
n_layer = 3
model.add(Dense(dimH, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
for _ in range(n_layer-1):
    model.add(Dense(dimH, activation='relu'))
예제 #3
0
def main(data_name, method, dimZ, dimH, n_channel, batch_size, K_mc, checkpoint, lbd):
    # set up dataset specific stuff
    from config import config
    labels, n_iter, dimX, shape_high, ll = config(data_name, n_channel)
    if data_name == 'mnist':
        from mnist import load_mnist
    if data_name == 'notmnist':
        from notmnist import load_notmnist

    # import functionalities
    if method == 'onlinevi':
        from bayesian_generator import generator_head, generator_shared, \
                               generator, construct_gen
    if method in ['ewc', 'noreg', 'si', 'laplace']:
        from generator import generator_head, generator_shared, generator, construct_gen

    # then define model
    n_layers_shared = 2
    batch_size_ph = tf.placeholder(tf.int32, shape=(), name='batch_size')
    dec_shared = generator_shared(dimX, dimH, n_layers_shared, 'sigmoid', 'gen')

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    string = method
    if method in ['ewc', 'laplace', 'si']:
        string = string + '_lbd%.1f' % lbd
    if method == 'onlinevi' and K_mc > 1:
        string = string + '_K%d' % K_mc
    path_name = data_name + '_%s/' % string
    assert os.path.isdir('save/'+path_name)
    filename = 'save/' + path_name + 'checkpoint'

    # visualise the samples
    N_gen = 10**2
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, dimX), name = 'x_ph')

    # now start fitting
    N_task = len(labels)
    gen_ops = []
    X_valid_list = []
    X_test_list = []
    eval_func_list = []
    result_list = []
    
    n_layers_head = 2
    n_layers_enc = n_layers_shared + n_layers_head - 1
    for task in xrange(1, N_task+1):
        # first load data
        # first load data
        if data_name == 'mnist':
            X_train, X_test, _, _ = load_mnist(digits = labels[task-1], conv = False)
        if data_name == 'notmnist':
            X_train, X_test, _, _ = load_notmnist(data_path, digits = labels[task-1], conv = False)
        N_train = int(X_train.shape[0] * 0.9)
        X_valid_list.append(X_train[N_train:])
        X_train = X_train[:N_train]
        X_test_list.append(X_test)
        
        # define the head net and the generator ops
        dec = generator(generator_head(dimZ, dimH, n_layers_head, 'gen_%d' % task), dec_shared)
        enc = encoder(dimX, dimH, dimZ, n_layers_enc, 'enc_%d' % task)
        gen_ops.append(construct_gen(dec, dimZ, sampling=False)(N_gen))
        eval_func_list.append(construct_eval_func(X_ph, enc, dec, ll, batch_size_ph, 
                                                  K = 5000, sample_W = False))
        
        # then load the trained model
        load_params(sess, filename, checkpoint=task-1, init_all = False)
        
        # plot samples
        x_gen_list = sess.run(gen_ops, feed_dict={batch_size_ph: N_gen})
        x_list = []
        for i in xrange(len(x_gen_list)):
            ind = np.random.randint(len(x_gen_list[i]))
            x_list.append(x_gen_list[i][ind:ind+1])
        x_list = np.concatenate(x_list, 0)
        tmp = np.zeros([10, dimX])
        tmp[:task] = x_list
        if task == 1:
            x_gen_all = tmp
        else:           
            x_gen_all = np.concatenate([x_gen_all, tmp], 0)
        
        # print test-ll on all tasks
        tmp_list = []
        for i in xrange(len(eval_func_list)):
            print 'task %d' % (i+1),
            test_ll = eval_func_list[i](sess, X_test_list[i])
            tmp_list.append(test_ll)
        result_list.append(tmp_list)
    
    #x_gen_all = 1.0 - x_gen_all
    if not os.path.isdir('figs/visualisation/'):
        os.mkdir('figs/visualisation/')
        print 'create path figs/visualisation/'
    plot_images(x_gen_all, shape_high, 'figs/visualisation/', data_name+'_gen_all_'+method)
    
    for i in xrange(len(result_list)):
        print result_list[i]
        
    # save results
    fname = 'results/' + data_name + '_%s.pkl' % string
    import pickle
    pickle.dump(result_list, open(fname, 'wb'))
    print 'test-ll results saved in', fname
def main(data_name, method, dimZ, dimH, n_channel, batch_size, K_mc,
         checkpoint, lbd):
    # set up dataset specific stuff
    from config import config
    labels, n_iter, dimX, shape_high, ll = config(data_name, n_channel)

    # import functionalities
    if method == 'onlinevi':
        from bayesian_generator import generator_head, generator_shared, \
                               generator, construct_gen
    if method in ['ewc', 'noreg', 'si', 'si2', 'laplace']:
        from generator import generator_head, generator_shared, generator, construct_gen

    # then define model
    n_layers_shared = 2
    batch_size_ph = tf.placeholder(tf.int32, shape=(), name='batch_size')
    dec_shared = generator_shared(dimX, dimH, n_layers_shared, 'sigmoid',
                                  'gen')

    # initialise sessions
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)
    string = method
    if method in ['ewc', 'laplace']:
        string = string + '_lbd%d' % lbd
    if method in ['si', 'si2']:
        string = string + '_lbd%.1f' % lbd
    if method == 'onlinevi' and K_mc > 1:
        string = string + '_K%d' % K_mc
    path_name = data_name + '_%s_no_share_enc/' % string
    assert os.path.isdir('save/' + path_name)
    filename = 'save/' + path_name + 'checkpoint'
    # load the classifier
    cla = load_model(data_name)
    # print test error
    X_ph = tf.placeholder(tf.float32, shape=(batch_size, 28**2))
    y_ph = tf.placeholder(tf.float32, shape=(batch_size, 10))
    y_pred = cla(X_ph)
    correct_pred = tf.equal(tf.argmax(y_ph, 1), tf.argmax(y_pred, 1))
    acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    y_pred_ = tf.clip_by_value(y_pred, 1e-9, 1.0)
    kl = tf.reduce_mean(-tf.reduce_sum(y_ph * tf.log(y_pred_), 1))

    for task in range(1):
        if data_name == 'mnist':
            from mnist import load_mnist
            _, X_test, _, Y_test = load_mnist([task])
        if data_name == 'notmnist':
            from notmnist import load_notmnist
            _, X_test, _, Y_test = load_notmnist(data_path, [task], conv=False)
        test_acc = 0.0
        test_kl = 0.0
        N_test = X_test.shape[0]
        for i in range(N_test / batch_size):
            indl = i * batch_size
            indr = min((i + 1) * batch_size, N_test)
            tmp1, tmp2 = sess.run(
                (acc, kl),
                feed_dict={
                    X_ph: X_test[indl:indr],
                    y_ph: Y_test[indl:indr],
                    keras.backend.learning_phase(): 0
                })
            test_acc += tmp1 / (N_test / batch_size)
            test_kl += tmp2 / (N_test / batch_size)
        print('classification accuracy on test data', test_acc)
        print('kl on test data', test_kl)

    # now start fitting
    N_task = len(labels)
    eval_func_list = []
    result_list = []

    n_layers_head = 2
    n_layers_enc = n_layers_shared + n_layers_head - 1
    for task in range(1, N_task + 1):

        # define the head net and the generator ops
        dec = generator(
            generator_head(dimZ, dimH, n_layers_head, 'gen_%d' % task),
            dec_shared)
        eval_func_list.append(construct_eval_func(dec, cla, batch_size_ph, \
                                                  dimZ, task-1, sample_W = True))

        # then load the trained model
        load_params(sess, filename, checkpoint=task - 1, init_all=False)

        # print test-ll on all tasks
        tmp_list = []
        for i in range(len(eval_func_list)):
            print('task %d' % (i + 1))
            kl = eval_func_list[i](sess)
            tmp_list.append(kl)
        result_list.append(tmp_list)

    for i in range(len(result_list)):
        print(result_list[i])

    # save results
    fname = 'results/' + data_name + '_%s_gen_class.pkl' % string
    import pickle
    pickle.dump(result_list, open(fname, 'wb'))
    print 'test-ll results saved in', fname