예제 #1
0
def train(args):
    print(args)
    numpy.random.seed(int(args['--seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['--num_labeled_samples']),
                                                n_v=int(args['--num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[2]

    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = t_func.matrix()
    ul_x = t_func.matrix()
    t = t_func.ivector()

    cost_semi = get_cost_type_semi(model, x, t, ul_x, args)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost_semi, params=model.params, alpha=float(args['--initial_learning_rate']))

    index = t_func.iscalar()
    ul_index = t_func.iscalar()
    batch_size = int(args['--batch_size'])
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(inputs=[index, ul_index], outputs=cost_semi, updates=optimizer.updates,
                              givens={
                                  x: x_train[batch_size * index:batch_size * (index + 1)],
                                  t: t_train[batch_size * index:batch_size * (index + 1)],
                                  ul_x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]},
                              on_unused_input='ignore')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                                  givens={
                                      x: x_train[batch_size * index:batch_size * (index + 1)],
                                      t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                                 givens={
                                     x: x_test[batch_size * index:batch_size * (index + 1)],
                                     t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                                    givens={
                                        x: x_train[batch_size * index:batch_size * (index + 1)],
                                        t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                                   givens={
                                       x: x_test[batch_size * index:batch_size * (index + 1)],
                                       t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_lr_decay = theano.function(inputs=[], outputs=optimizer.alpha,
                                 updates={optimizer.alpha: theano.shared(
                                     numpy.array(args['--learning_rate_decay']).astype(
                                         theano.config.floatX)) * optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[], outputs=x_train, updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[], outputs=ul_x_train, updates=update_ul_permutation)

    statuses = {'nll_train': [], 'error_train': [], 'nll_test': [], 'error_test': []}

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    l_i = 0
    ul_i = 0
    for epoch in range(int(args['--num_epochs'])):
        # cPickle.dump((statuses, args), open('./trained_model/' + 'tmp-' + args['--save_filename'], 'wb'),
        #              cPickle.HIGHEST_PROTOCOL)
        f_permute_train_set()
        f_permute_ul_train_set()
        for it in range(int(args['--num_batch_it'])):
            f_train(l_i, ul_i)
            l_i = 0 if l_i >= n_train / batch_size - 1 else l_i + 1
            ul_i = 0 if ul_i >= n_ul_train / ul_batch_size - 1 else ul_i + 1

        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(int(n_train / batch_size))])) * batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(int(n_train / batch_size))]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(int(n_test / batch_size))])) * batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(int(n_test / batch_size))]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        wlog("[Epoch] %d" % epoch)
        acc = 1 - 1.0*statuses['error_test'][-1]/n_test
        wlog("nll_test : %f error_test : %d accuracy:%f" % (statuses['nll_test'][-1], statuses['error_test'][-1], acc))
        # writer.add_scalar("Test/Loss", statuses['nll_test'][-1], epoch * int(args['--num_batch_it']))
        # writer.add_scalar("Test/Acc", acc, epoch * int(args['--num_batch_it']))
        f_lr_decay()
    # fine_tune batch stat
    f_fine_tune = theano.function(inputs=[ul_index], outputs=model.forward_for_finetuning_batch_stat(x),
                                  givens={x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]})
    [f_fine_tune(i) for i in range(n_ul_train // ul_batch_size)]

    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    acc = 1 - 1.0*statuses['error_test'][-1]/n_test
    wlog("final nll_test: %f error_test: %d accuracy:%f" % (statuses['nll_test'][-1], statuses['error_test'][-1], acc))
예제 #2
0
def train(args):
    print args

    numpy.random.seed(int(args['--seed']))

    if (args['--validation']):
        dataset = load_data.load_mnist_for_validation(
            n_v=int(args['--num_validation_samples']))
    else:
        dataset = load_data.load_mnist_full()
    x_train, t_train = dataset[0]
    x_test, t_test = dataset[1]

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    index = T.iscalar()
    batch_size = int(args['--batch_size'])
    f_train = theano.function(
        inputs=[index],
        outputs=cost,
        updates=optimizer.updates,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_train = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_test = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_error_train = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_error_test = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(
        n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],
                                          outputs=x_train,
                                          updates=update_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]", str(-1)
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump(
            (statuses, args),
            open('./trained_model/' + 'tmp-' + args['--save_filename'],
                 'wb'), cPickle.HIGHEST_PROTOCOL)

        f_permute_train_set()

        ### update parameters ###
        [f_train(i) for i in xrange(n_train / batch_size)]
        #########################

        sum_nll_train = numpy.sum(
            numpy.array([f_nll_train(i)
                         for i in xrange(n_train / batch_size)])) * batch_size
        sum_error_train = numpy.sum(
            numpy.array(
                [f_error_train(i) for i in xrange(n_train / batch_size)]))
        sum_nll_test = numpy.sum(
            numpy.array([f_nll_test(i)
                         for i in xrange(n_test / batch_size)])) * batch_size
        sum_error_test = numpy.sum(
            numpy.array([f_error_test(i)
                         for i in xrange(n_test / batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(
        inputs=[index],
        outputs=model.forward_for_finetuning_batch_stat(x),
        givens={x: x_train[batch_size * index:batch_size * (index + 1)]})
    [f_finetune(i) for i in xrange(n_train / batch_size)]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]
    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
def train(latent_dim = 2, #dimension of latent variable z
          z_prior = 'gaussian', # 'gaussian' or 'uniform'
          lamb = 10., #ratio between reconstruction and adversarial cost
          recon_obj_type = 'CE', #objective function on reconstruction ( 'CE'(cross ent.) or 'QE'(quadratic error) )
          initlal_learning_rate = 0.002,
          learning_rate_decay=1.0,
          num_epochs=50,
          batch_size=100,
          save_filename='trained_model',
          seed=1):


    numpy.random.seed(seed=seed)

    dataset = load_data.load_mnist_full()

    x_train,_ = dataset[0]
    x_test,_ = dataset[1]

    model = AdversarialAutoencoderMNIST(latent_dim=latent_dim,z_prior=z_prior)

    x = T.matrix()

    loss_for_training,_,adv_loss_for_training = costs.adversarial_autoenc_loss(x=x,
                                          enc_f=model.encode_train,
                                          dec_f=model.decode_train,
                                          disc_f=model.D_train,
                                          p_z_sampler=model.sample_from_prior,
                                          obj_type=recon_obj_type,
                                          lamb=numpy.asarray(lamb,dtype=theano.config.floatX))

    _,recon_loss,adv_loss = costs.adversarial_autoenc_loss(x=x,
                                          enc_f=model.encode_test,
                                          dec_f=model.decode_test,
                                          disc_f=model.D_test,
                                          p_z_sampler=model.sample_from_prior,
                                          obj_type=recon_obj_type,
                                          lamb=numpy.asarray(lamb,dtype=theano.config.floatX))

    optimizer_recon = optimizers.ADAM(cost=loss_for_training,
                                      params=model.model_params,
                                      alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))
    optimizer_adv = optimizers.ADAM(cost=adv_loss_for_training,
                                    params=model.D_params,
                                    alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))

    index = T.iscalar()

    f_training_model = theano.function(inputs=[index], outputs=loss_for_training, updates=optimizer_recon.updates,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)]})
    f_training_discriminator = theano.function(inputs=[index], outputs=adv_loss_for_training, updates=optimizer_adv.updates,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)]})


    f_recon_train = theano.function(inputs=[index], outputs=recon_loss,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)]})
    f_adv_train = theano.function(inputs=[index], outputs=adv_loss,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)]})
    f_recon_test = theano.function(inputs=[index], outputs=recon_loss,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)]})
    f_adv_test = theano.function(inputs=[index], outputs=adv_loss,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)]})

    f_lr_decay_recon = theano.function(inputs=[],outputs=optimizer_recon.alpha,
                                 updates={optimizer_recon.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_recon.alpha})
    f_lr_decay_adv = theano.function(inputs=[],outputs=optimizer_adv.alpha,
                                 updates={optimizer_adv.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_adv.alpha})

    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates={x_train:x_train[randix]})

    statuses = {}
    statuses['recon_train'] = []
    statuses['adv_train'] = []
    statuses['recon_test'] = []
    statuses['adv_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    statuses['recon_train'].append(sum_recon_train/n_train)
    statuses['adv_train'].append(sum_adv_train/n_train)
    statuses['recon_test'].append(sum_recon_test/n_test)
    statuses['adv_test'].append(sum_adv_test/n_test)
    print "[Epoch]",str(-1)
    print  "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
            "recon_test : " , statuses['recon_test'][-1],  "adv_test : ", statuses['adv_test'][-1]

    z = model.encode_test(input=x)
    f_enc = theano.function(inputs=[],outputs=z,givens={x:dataset[1][0]})
    def plot_latent_variable(epoch):
        output = f_enc()
        plt.figure(figsize=(8,8))
        color=cm.rainbow(numpy.linspace(0,1,10))
        for l,c in zip(range(10),color):
            ix = numpy.where(dataset[1][1].get_value()==l)[0]
            plt.scatter(output[ix,0],output[ix,1],c=c,label=l,s=8,linewidth=0)
        plt.xlim([-5.0,5.0])
        plt.ylim([-5.0,5.0])
        plt.legend(fontsize=15)
        plt.savefig('z_epoch' + str(epoch) + '.pdf')

    print "training..."
    make_sure_path_exists("./trained_model")

    for epoch in xrange(num_epochs):
        cPickle.dump((model,statuses),open('./trained_model/'+'tmp-' + save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
        f_permute_train_set()
        ### update parameters ###
        for i in xrange(n_train/batch_size):
            ### Optimize model and discriminator alternately ###
            f_training_discriminator(i)
            f_training_model(i)
        #########################

        if(latent_dim == 2):
            plot_latent_variable(epoch=epoch)

        sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
        sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
        sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
        sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
        statuses['recon_train'].append(sum_recon_train/n_train)
        statuses['adv_train'].append(sum_adv_train/n_train)
        statuses['recon_test'].append(sum_recon_test/n_test)
        statuses['adv_test'].append(sum_adv_test/n_test)
        print "[Epoch]",str(epoch)
        print  "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
                "recon_test : " , statuses['recon_test'][-1],  "adv_test : ", statuses['adv_test'][-1]

        f_lr_decay_recon()
        f_lr_decay_adv()

    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses),open('./trained_model/'+save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
    return model,statuses