Beispiel #1
0
def train(args):
    print args

    numpy.random.seed(int(args['--seed']))

    if (args['--validation']):
        dataset = load_data.load_mnist_for_validation(
            n_v=int(args['--num_validation_samples']))
    else:
        dataset = load_data.load_mnist_full()
    x_train, t_train = dataset[0]
    x_test, t_test = dataset[1]

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    index = T.iscalar()
    batch_size = int(args['--batch_size'])
    f_train = theano.function(
        inputs=[index],
        outputs=cost,
        updates=optimizer.updates,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_train = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_test = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_error_train = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_error_test = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(
        n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],
                                          outputs=x_train,
                                          updates=update_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]", str(-1)
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump(
            (statuses, args),
            open('./trained_model/' + 'tmp-' + args['--save_filename'],
                 'wb'), cPickle.HIGHEST_PROTOCOL)

        f_permute_train_set()

        ### update parameters ###
        [f_train(i) for i in xrange(n_train / batch_size)]
        #########################

        sum_nll_train = numpy.sum(
            numpy.array([f_nll_train(i)
                         for i in xrange(n_train / batch_size)])) * batch_size
        sum_error_train = numpy.sum(
            numpy.array(
                [f_error_train(i) for i in xrange(n_train / batch_size)]))
        sum_nll_test = numpy.sum(
            numpy.array([f_nll_test(i)
                         for i in xrange(n_test / batch_size)])) * batch_size
        sum_error_test = numpy.sum(
            numpy.array([f_error_test(i)
                         for i in xrange(n_test / batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(
        inputs=[index],
        outputs=model.forward_for_finetuning_batch_stat(x),
        givens={x: x_train[batch_size * index:batch_size * (index + 1)]})
    [f_finetune(i) for i in xrange(n_train / batch_size)]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]
    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
Beispiel #2
0
def train(args):

    print args

    numpy.random.seed(int(args['--seed']))

    if(args['--validation']):
        dataset = load_data.load_mnist_for_validation(n_v=int(args['--num_validation_samples']))
    else:
        dataset = load_data.load_mnist_full()
    x_train, t_train = dataset[0]
    x_test, t_test = dataset[1]


    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] 
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    t = T.ivector()


    if(args['--cost_type']=='MLE'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate']))


    index = T.iscalar()
    batch_size = int(args['--batch_size'])
    f_train = theano.function(inputs=[index], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha,
                                 updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha})
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates=update_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]",str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
        
        f_permute_train_set()

        ### update parameters ###
        [f_train(i) for i in xrange(n_train/batch_size)]
        #########################

        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
        statuses['nll_train'].append(sum_nll_train/n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test/n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]",str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(inputs=[index],outputs=model.forward_for_finetuning_batch_stat(x),
                                 givens={x:x_train[batch_size*index:batch_size*(index+1)]})
    [f_finetune(i) for i in xrange(n_train/batch_size)]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
Beispiel #3
0
def train_mlp(
    n_l,  # Number of labeled samples.
    layer_sizes,  # Layer sizes of neural network. For example, layer_sizes = [784,1200,1200,10] indicates 784 input nodes and 2 hidden layers and 1200 hidden nodes and 10 output nodes.
    activations,  # Specification of activation functions.
    initial_model_learning_rate,  # Initial learning rate of ADAM.
    learning_rate_decay,  # Learning rate decay of ADAM.
    n_epochs,  # Number of training epochs,
    n_it_batches,  # Number of parameter update of mini-batch stochastic gradient in each epoch.
    m_batch_size=100,  # Number of mini-batch size.
    m_ul_batch_size=250,  # Number of mini-batch size for calculation of LDS (semi-supervised learning only)
    cost_type="vat",  # Cost type. 'mle' is no regularization, 'at' is Adversarial training, 'vat' is Virtual Adversarial training (ours)
    lamb=1.0,  # Balance parameter.
    epsilon=0.05,  # Norm constraint parameter.
    num_power_iter=1,  # Number of iterations of power method.
    norm_constraint="L2",  # Specification of norm constraint. 'max' is [L-infinity norm] and 'L2' is [L2 norm].
    random_seed=1,  # Random seed.
    semi_supervised=False,  # Experiment on semi-supervised learning or not.
    n_v=10000,  # Number of validation samples.
    full_train=False,  # Training with all of training samples ( for evaluation on test samples )
    monitoring_cost_during_training=False,  # Monitoring transitions of cost during training.
):

    sys.setrecursionlimit(10000)

    # set random stream
    rng = numpy.random.RandomState(random_seed)

    # load mnist dataset
    if full_train and (not semi_supervised):
        dataset = load_mnist_for_test()
        train_set_x, train_set_y = dataset[0]
    else:
        dataset = load_mnist_for_validation(n_l=n_l, n_v=n_v, rng=rng)
        train_set_x, train_set_y, ul_train_set_x = dataset[0]
    valid_set_x, valid_set_y = dataset[1]
    n_train_batches = numpy.ceil((train_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_batch_size))
    n_valid_batches = numpy.ceil((valid_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_batch_size))

    print "... building the model"
    # define a classifier
    x = T.matrix("x")
    y = T.ivector("y")
    if semi_supervised:
        n_ul_train_batches = numpy.ceil((ul_train_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_ul_batch_size))
        ul_x = T.matrix("ul_x")
        classifier = mlp_ss.MLP_SS(
            rng=rng,
            input=x,
            ul_input=ul_x,
            layer_sizes=layer_sizes,
            activations=activations,
            epsilon=epsilon,
            lamb=lamb,
            m_batch_size=m_batch_size,
            m_ul_batch_size=m_ul_batch_size,
            num_power_iter=num_power_iter,
            norm_constraint=norm_constraint,
        )
    else:
        classifier = mlp.MLP(
            rng=rng,
            input=x,
            layer_sizes=layer_sizes,
            activations=activations,
            epsilon=epsilon,
            lamb=lamb,
            m_batch_size=m_batch_size,
            num_power_iter=num_power_iter,
            norm_constraint=norm_constraint,
        )

    # define a training_cost
    if cost_type == "mle":
        cost = classifier.cost(y)
    elif cost_type == "vat":
        cost = classifier.cost_vat(y)
    elif cost_type == "at":
        cost = classifier.cost_at(y)
    else:
        raise ValueError("cost_type:" + cost_type + " is not defined")

    # define a schedule of learning rate
    model_learning_rate = theano.shared(numpy.asarray(initial_model_learning_rate, dtype=theano.config.floatX))
    decay_model_learning_rate = theano.function(
        inputs=[], outputs=model_learning_rate, updates={model_learning_rate: model_learning_rate * learning_rate_decay}
    )
    updates = OrderedDict()
    updates = ADAM(classifier, cost, model_learning_rate, updates)
    updates.update(classifier.m_v_updates)

    # define permutation of train set
    def update_train_ind(x, y, ind):
        upd = OrderedDict()
        upd[x] = x[ind]
        if y != None:
            upd[y] = y[ind]
        return upd, upd[x][0]

    ind = T.ivector()
    upd_tr_ind, n_x_0 = update_train_ind(train_set_x, train_set_y, ind)
    permute_train_set = theano.function(inputs=[ind], outputs=n_x_0, updates=upd_tr_ind)

    # compile optimization function
    index = T.lscalar()
    if semi_supervised:
        upd_ul_tr_ind, n_ul_x_0 = update_train_ind(ul_train_set_x, None, ind)
        permute_ul_train_set = theano.function(inputs=[ind], outputs=n_ul_x_0, updates=upd_ul_tr_ind)
        ul_index = T.lscalar()
        optimize = theano.function(
            inputs=[index, ul_index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[m_batch_size * index : m_batch_size * (index + 1)],
                y: train_set_y[m_batch_size * index : m_batch_size * (index + 1)],
                ul_x: ul_train_set_x[m_ul_batch_size * ul_index : m_ul_batch_size * (ul_index + 1)],
            },
            on_unused_input="warn",
        )
    else:
        optimize = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[m_batch_size * index : m_batch_size * (index + 1)],
                y: train_set_y[m_batch_size * index : m_batch_size * (index + 1)],
            },
            on_unused_input="warn",
        )

    # compile functions for monitoring error and cost
    training_error = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[m_batch_size * index : m_batch_size * (index + 1)],
            y: train_set_y[m_batch_size * index : m_batch_size * (index + 1)],
        },
    )
    validation_error = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[m_batch_size * index : m_batch_size * (index + 1)],
            y: valid_set_y[m_batch_size * index : m_batch_size * (index + 1)],
        },
    )
    train_nll = theano.function(
        inputs=[index],
        outputs=classifier.neg_log_likelihood(y),
        givens={
            x: train_set_x[m_batch_size * index : m_batch_size * (index + 1)],
            y: train_set_y[m_batch_size * index : m_batch_size * (index + 1)],
        },
    )
    valid_nll = theano.function(
        inputs=[index],
        outputs=classifier.neg_log_likelihood(y),
        givens={
            x: valid_set_x[m_batch_size * index : m_batch_size * (index + 1)],
            y: valid_set_y[m_batch_size * index : m_batch_size * (index + 1)],
        },
    )
    num_power_iter_for_evalueation_LDS = 10  # num power iter for evaluation LDS
    train_LDS = theano.function(
        inputs=[index],
        outputs=classifier.LDS(num_power_iter=num_power_iter_for_evalueation_LDS),
        givens={x: train_set_x[m_batch_size * index : m_batch_size * (index + 1)]},
    )
    valid_LDS = theano.function(
        inputs=[index],
        outputs=classifier.LDS(num_power_iter=num_power_iter_for_evalueation_LDS),
        givens={x: valid_set_x[m_batch_size * index : m_batch_size * (index + 1)]},
    )

    print "... training"
    epoch_counter = 0
    l_index = 0
    ul_index = 0

    train_errors = list()
    valid_errors = list()
    train_nlls = list()
    valid_nlls = list()
    train_LDSs = list()
    valid_LDSs = list()
    train_LDSs_std = list()
    valid_LDSs_std = list()

    def monitor_error():
        training_errors = [training_error(i) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        validation_errors = [validation_error(i) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        this_training_errors = numpy.sum(training_errors)
        this_validation_errors = numpy.sum(validation_errors)
        train_errors.append(this_training_errors)
        valid_errors.append(this_validation_errors)
        print "epoch:{}, train error {}, valid error {}, learning_rate={}".format(
            epoch_counter, this_training_errors, this_validation_errors, model_learning_rate.get_value(borrow=True)
        )

    def monitor_cost():
        training_LDSs = [train_LDS(i) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        validation_LDSs = [valid_LDS(i) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        train_LDSs.append(numpy.mean(training_LDSs))
        valid_LDSs.append(numpy.mean(validation_LDSs))
        train_LDSs_std.append(numpy.std(training_LDSs))
        valid_LDSs_std.append(numpy.std(validation_LDSs))
        print "epoch:" + str(epoch_counter) + " train_KL:" + str(train_LDSs[-1]) + " std:" + str(
            train_LDSs_std[-1]
        ) + " valid_KL:" + str(valid_LDSs[-1]) + " std:" + str(valid_LDSs_std[-1])
        train_losses = [numpy.mean(train_nll(i)) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        train_nlls.append(numpy.mean(train_losses))
        valid_losses = [numpy.mean(valid_nll(i)) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        valid_nlls.append(numpy.mean(valid_losses))
        print "epoch:" + str(epoch_counter) + " train neg ll:" + str(train_nlls[-1]) + " valid neg ll:" + str(
            valid_nlls[-1]
        )

    # error and cost before training
    monitor_error()
    monitor_cost()

    while epoch_counter < n_epochs:
        epoch_counter = epoch_counter + 1

        # parameters update
        for it in xrange(n_it_batches):
            if semi_supervised:
                optimize(l_index, ul_index)
                ul_index = (ul_index + 1) if ((ul_index + 1) < numpy.int(n_ul_train_batches)) else 0
            else:
                optimize(l_index)
            l_index = (l_index + 1) if ((l_index + 1) < numpy.int(n_train_batches)) else 0

        #  permute train set
        rand_ind = numpy.asarray(rng.permutation(train_set_x.get_value().shape[0]), dtype="int32")
        permute_train_set(rand_ind)
        if semi_supervised:
            ul_rand_ind = numpy.asarray(rng.permutation(ul_train_set_x.get_value().shape[0]), dtype="int32")
            permute_ul_train_set(ul_rand_ind)

        decay_model_learning_rate()

        # error and cost in middle of training
        monitor_error()
        monitor_cost() if monitoring_cost_during_training or epoch_counter == n_epochs else None

    classifier.train_errors = train_errors
    classifier.valid_errors = valid_errors
    classifier.train_KLs = train_LDSs
    classifier.valid_KLs = valid_LDSs
    classifier.train_KLs_std = train_LDSs_std
    classifier.valid_KLs_std = valid_LDSs_std
    classifier.train_nlls = train_nlls
    classifier.valid_nlls = valid_nlls

    return classifier
Beispiel #4
0
def train_mlp(
        n_l, # Number of labeled samples.
        layer_sizes, # Layer sizes of neural network. For example, layer_sizes = [784,1200,1200,10] indicates 784 input nodes and 2 hidden layers and 1200 hidden nodes and 10 output nodes.
        activations, # Specification of activation functions.
        initial_model_learning_rate, # Initial learning rate of ADAM.
        learning_rate_decay, # Learning rate decay of ADAM.
        n_epochs, # Number of training epochs,
        n_it_batches, # Number of parameter update of mini-batch stochastic gradient in each epoch.
        m_batch_size=100, # Number of mini-batch size.
        m_ul_batch_size=250, # Number of mini-batch size for calculation of LDS (semi-supervised learning only)
        cost_type='vat', # Cost type. 'mle' is no regularization, 'at' is Adversarial training, 'vat' is Virtual Adversarial training (ours)
        lamb=1.0, # Balance parameter.
        epsilon=0.05, # Norm constraint parameter.
        num_power_iter=1, # Number of iterations of power method.
        norm_constraint='L2', # Specification of norm constraint. 'max' is [L-infinity norm] and 'L2' is [L2 norm].
        random_seed=1, # Random seed.
        semi_supervised=False, # Experiment on semi-supervised learning or not.
        n_v=10000, # Number of validation samples.
        full_train=False, # Training with all of training samples ( and evaluation on test samples )
        monitoring_cost_during_training=False # Monitoring transitions of cost during training.
):



    sys.setrecursionlimit(10000)

    # set random stream
    rng = numpy.random.RandomState(random_seed)

    # load mnist dataset
    if (full_train and (not semi_supervised)):
        dataset = load_mnist_for_test()
        train_set_x, train_set_y = dataset[0]
    else:
        dataset = load_mnist_for_validation(n_l=n_l, n_v=n_v, rng=rng)
        train_set_x, train_set_y, ul_train_set_x = dataset[0]
    valid_set_x, valid_set_y = dataset[1]
    n_train_batches = numpy.ceil((train_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_batch_size))
    n_valid_batches = numpy.ceil((valid_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_batch_size))

    print '... building the model'
    # define a classifier
    x = T.matrix('x')
    y = T.ivector('y')
    if (semi_supervised):
        n_ul_train_batches = numpy.ceil((ul_train_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_ul_batch_size))
        ul_x = T.matrix('ul_x')
        classifier = mlp_ss.MLP_SS(rng=rng, input=x, ul_input=ul_x, layer_sizes=layer_sizes, activations=activations,
                                   epsilon=epsilon, lamb=lamb,
                                   m_batch_size=m_batch_size, m_ul_batch_size=m_ul_batch_size,
                                   num_power_iter=num_power_iter, norm_constraint=norm_constraint)
    else:
        classifier = mlp.MLP(rng=rng, input=x, layer_sizes=layer_sizes, activations=activations, epsilon=epsilon,
                             lamb=lamb,
                             m_batch_size=m_batch_size, num_power_iter=num_power_iter, norm_constraint=norm_constraint)

    # define a training_cost
    if (cost_type == 'mle'):
        cost = classifier.cost(y)
    elif (cost_type == 'vat'):
        cost = classifier.cost_vat(y)
    elif (cost_type == 'at'):
        cost = classifier.cost_at(y)
    else:
        raise ValueError('cost_type:' + cost_type + ' is not defined')

    # define a schedule of learning rate
    model_learning_rate = theano.shared(numpy.asarray(initial_model_learning_rate, dtype=theano.config.floatX))
    decay_model_learning_rate = theano.function(inputs=[],
                                                outputs=model_learning_rate,
                                                updates={
                                                    model_learning_rate: model_learning_rate * learning_rate_decay})
    updates = OrderedDict()
    updates = ADAM(classifier, cost, model_learning_rate, updates)
    updates.update(classifier.m_v_updates_during_training)

    # define permutation of train set
    def update_train_ind(x, y, ind):
        upd = OrderedDict()
        upd[x] = x[ind]
        if (y != None):
            upd[y] = y[ind]
        return upd, upd[x][0]
    ind = T.ivector()
    upd_tr_ind, n_x_0 = update_train_ind(train_set_x, train_set_y, ind)
    permute_train_set = theano.function(inputs=[ind], outputs=n_x_0, updates=upd_tr_ind)

    # compile optimization function
    index = T.lscalar()
    if (semi_supervised):
        upd_ul_tr_ind, n_ul_x_0 = update_train_ind(ul_train_set_x, None, ind)
        permute_ul_train_set = theano.function(inputs=[ind], outputs=n_ul_x_0, updates=upd_ul_tr_ind)
        ul_index = T.lscalar()
        optimize = theano.function(inputs=[index, ul_index], outputs=cost,
                                   updates=updates,
                                   givens={
                                       x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                       y: train_set_y[m_batch_size * index:m_batch_size * (index + 1)],
                                       ul_x: ul_train_set_x[m_ul_batch_size * ul_index:m_ul_batch_size * (ul_index + 1)]},
                                   on_unused_input='warn'
                                   )
    else:
        optimize = theano.function(inputs=[index], outputs=cost,
                                   updates=updates,
                                   givens={
                                       x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                       y: train_set_y[m_batch_size * index:m_batch_size * (index + 1)]},
                                   on_unused_input='warn'
                                   )

    # compile functions for monitoring error and cost
    training_error = theano.function(inputs=[index],
                                     outputs=classifier.errors(y),
                                     givens={
                                         x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                         y: train_set_y[m_batch_size * index:m_batch_size * (index + 1)]}
                                     )
    validation_error = theano.function(inputs=[index],
                                       outputs=classifier.errors(y),
                                       givens={
                                           x: valid_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                           y: valid_set_y[m_batch_size * index:m_batch_size * (index + 1)]}
                                       )
    train_nll = theano.function(inputs=[index],
                                outputs=classifier.neg_log_likelihood(y),
                                givens={
                                    x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                    y: train_set_y[m_batch_size * index:m_batch_size * (index + 1)]}
                                )
    valid_nll = theano.function(inputs=[index],
                                outputs=classifier.neg_log_likelihood(y),
                                givens={
                                    x: valid_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                    y: valid_set_y[m_batch_size * index:m_batch_size * (index + 1)]}
                                )
    num_power_iter_for_evalueation_LDS = 10   # num power iter for evaluation LDS
    train_LDS = theano.function(inputs=[index],
                                outputs=classifier.LDS(num_power_iter=num_power_iter_for_evalueation_LDS),
                                givens={
                                    x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)]}
                                )
    valid_LDS = theano.function(inputs=[index],
                                outputs=classifier.LDS(num_power_iter=num_power_iter_for_evalueation_LDS),
                                givens={
                                    x: valid_set_x[m_batch_size * index:m_batch_size * (index + 1)]}
                                )

    print '... training'
    epoch_counter = 0
    l_index = 0
    ul_index = 0

    train_errors = list()
    valid_errors = list()
    train_nlls = list()
    valid_nlls = list()
    train_LDSs = list()
    valid_LDSs = list()
    train_LDSs_std = list()
    valid_LDSs_std = list()

    def monitor_error():
        training_errors = [training_error(i) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        validation_errors = [validation_error(i) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        this_training_errors = numpy.sum(training_errors)
        this_validation_errors = numpy.sum(validation_errors)
        train_errors.append(this_training_errors)
        valid_errors.append(this_validation_errors)
        print 'epoch:{}, train error {}, valid error {}, learning_rate={}'.format(
            epoch_counter, this_training_errors, this_validation_errors,
            model_learning_rate.get_value(borrow=True))

    def monitor_cost():
        training_LDSs = [train_LDS(i) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        validation_LDSs = [valid_LDS(i) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        train_LDSs.append(numpy.mean(training_LDSs))
        valid_LDSs.append(numpy.mean(validation_LDSs))
        train_LDSs_std.append(numpy.std(training_LDSs))
        valid_LDSs_std.append(numpy.std(validation_LDSs))
        print 'epoch:' + str(epoch_counter) + ' train_LDS:' + str(train_LDSs[-1]) + ' std:' + str(
            train_LDSs_std[-1]) + ' valid_LDS:' + str(
            valid_LDSs[-1]) + ' std:' + str(valid_LDSs_std[-1])
        train_losses = [numpy.mean(train_nll(i)) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
        train_nlls.append(numpy.mean(train_losses))
        valid_losses = [numpy.mean(valid_nll(i)) for i in xrange(numpy.int(numpy.ceil(n_valid_batches)))]
        valid_nlls.append(numpy.mean(valid_losses))
        print 'epoch:' + str(epoch_counter) + ' train neg ll:' + str(
            train_nlls[-1]) + ' valid neg ll:' + str(valid_nlls[-1])

    while epoch_counter < n_epochs:
        # monitoring error and cost in middle of training
        monitor_error()
        monitor_cost() if monitoring_cost_during_training or epoch_counter == 0 else None

        epoch_counter = epoch_counter + 1

        # parameters update
        for it in xrange(n_it_batches):
            if (semi_supervised):
                optimize(l_index, ul_index)
                ul_index = (ul_index + 1) if ((ul_index + 1) < numpy.int(n_ul_train_batches)) else 0
            else:
                optimize(l_index)
            l_index = (l_index + 1) if ((l_index + 1) < numpy.int(n_train_batches)) else 0

        # permute train set
        rand_ind = numpy.asarray(rng.permutation(train_set_x.get_value().shape[0]), dtype='int32')
        permute_train_set(rand_ind)
        if (semi_supervised):
            ul_rand_ind = numpy.asarray(rng.permutation(ul_train_set_x.get_value().shape[0]), dtype='int32')
            permute_ul_train_set(ul_rand_ind)

        decay_model_learning_rate()

    print "finished training!"

    # finetune batch mean and var for batch normalization
    print "finetuning batch mean and var for batch normalization..."
    if(semi_supervised):
        finetune_batch_mean_and_var = theano.function(inputs=[index],
                                                      outputs=classifier.finetuning_N,
                                                      updates=classifier.m_v_updates_for_finetuning,
                                                      givens={
                                                          ul_x: ul_train_set_x[m_ul_batch_size * index:m_ul_batch_size * (index + 1)],
                                                      })
        [finetune_batch_mean_and_var(i) for i in xrange(numpy.int(numpy.ceil(n_ul_train_batches)))]
    else:
        finetune_batch_mean_and_var = theano.function(inputs=[index],
                                                      outputs=classifier.finetuning_N,
                                                      updates=classifier.m_v_updates_for_finetuning,
                                                      givens={
                                                          x: train_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                                      })
        [finetune_batch_mean_and_var(i) for i in xrange(numpy.int(numpy.ceil(n_train_batches)))]
    print "final errors and costs:"
    monitor_error()
    monitor_cost()

    classifier.train_errors = train_errors
    classifier.valid_errors = valid_errors
    classifier.train_LDSs = train_LDSs
    classifier.valid_LDSs = valid_LDSs
    classifier.train_LDSs_std = train_LDSs_std
    classifier.valid_LDSs_std = valid_LDSs_std
    classifier.train_nlls = train_nlls
    classifier.valid_nlls = valid_nlls

    return classifier