Example #1
0
def get_cost_type(model, x, t, args):
    forward_func = None
    if args["dataset"] == "mnist":
        forward_func = model.forward_no_update_batch_stat
    if args['--cost_type'] == 'MLE' or args['--cost_type'] == 'dropout':
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train)
    elif args['--cost_type'] == 'L2':
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif args['--cost_type'] == 'AT':
        cost = costs.adversarial_training(x, t, model.forward_train,
                                          'CE',
                                          epsilon=float(args['--epsilon']),
                                          lamb=float(args['--lamb']),
                                          norm_constraint=args['--norm_constraint'],
                                          forward_func_for_generating_adversarial_examples=forward_func)
    elif args['--cost_type'] == 'VAT':
        cost = costs.virtual_adversarial_training(x, t, model.forward_train,
                                                  'CE',
                                                  epsilon=float(args['--epsilon']),
                                                  norm_constraint=args['--norm_constraint'],
                                                  num_power_iter=int(args['--num_power_iter']),
                                                  forward_func_for_generating_adversarial_examples=forward_func)
    elif args['--cost_type'] == 'VAT_n':
        cost = costs.virtual_adversarial_training_noise(x, t, model.forward_train,
                                                        'CE',
                                                        epsilon=float(args['--epsilon']),
                                                        eps_n=float(args['--eps_w']),
                                                        neg_lamb=float(args['--neg_lamb']),
                                                        norm_constraint=args['--norm_constraint'],
                                                        num_power_iter=int(args['--num_power_iter']),
                                                        forward_func_for_generating_adversarial_examples=forward_func)
    elif args['--cost_type'] == 'VAT_f':
        cost = costs.virtual_adversarial_training_finite_diff(x, t, model.forward_train,
                                                              'CE',
                                                              epsilon=float(args['--epsilon']),
                                                              norm_constraint=args['--norm_constraint'],
                                                              num_power_iter=int(args['--num_power_iter']),
                                                              forward_func_for_generating_adversarial_examples=forward_func)
    else:
        print("method not exists")
        sys.exit(-1)
    return cost
Example #2
0
def get_cost_type_semi(model, x, t, ul_x, args, fix_d=None):
    if args['--cost_type'] == 'MLE':
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train)
    elif args['--cost_type'] == 'L2':
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif args['--cost_type'] == 'AT':
        cost = costs.adversarial_training(x, t, model.forward_train,
                                          'CE',
                                          epsilon=float(args['--epsilon']),
                                          lamb=float(args['--lamb']),
                                          norm_constraint=args['--norm_constraint'],
                                          forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif args['--cost_type'] == 'VATsup':
        cost = costs.virtual_adversarial_training_finite_diff(x, t, model.forward_train,
                                                              'CE',
                                                              epsilon=float(args['--epsilon']),
                                                              norm_constraint=args['--norm_constraint'],
                                                              num_power_iter=int(args['--num_power_iter']),
                                                              x_for_generating_adversarial_examples=x,
                                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif args['--cost_type'] == 'VAT':
        cost = costs.virtual_adversarial_training(x, t, model.forward_train,
                                                  'CE',
                                                  epsilon=float(args['--epsilon']),
                                                  norm_constraint=args['--norm_constraint'],
                                                  num_power_iter=int(args['--num_power_iter']),
                                                  x_for_generating_adversarial_examples=ul_x,
                                                  forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif args['--cost_type'] == 'VAT_f':
        cost = costs.virtual_adversarial_training_finite_diff(x, t, model.forward_train,
                                                              'CE',
                                                              xi=float(args['--xi']),
                                                              epsilon=float(args['--epsilon']),
                                                              norm_constraint=args['--norm_constraint'],
                                                              num_power_iter=int(args['--num_power_iter']),
                                                              x_for_generating_adversarial_examples=ul_x,
                                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    else:
        print("method not exists")
        sys.exit(-1)
    return cost
Example #3
0
def train(args):
    print args

    numpy.random.seed(int(args['--seed']))

    if (args['--validation']):
        dataset = load_data.load_mnist_for_validation(
            n_v=int(args['--num_validation_samples']))
    else:
        dataset = load_data.load_mnist_full()
    x_train, t_train = dataset[0]
    x_test, t_test = dataset[1]

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    index = T.iscalar()
    batch_size = int(args['--batch_size'])
    f_train = theano.function(
        inputs=[index],
        outputs=cost,
        updates=optimizer.updates,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_train = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_test = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_error_train = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_error_test = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(
        n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],
                                          outputs=x_train,
                                          updates=update_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]", str(-1)
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump(
            (statuses, args),
            open('./trained_model/' + 'tmp-' + args['--save_filename'],
                 'wb'), cPickle.HIGHEST_PROTOCOL)

        f_permute_train_set()

        ### update parameters ###
        [f_train(i) for i in xrange(n_train / batch_size)]
        #########################

        sum_nll_train = numpy.sum(
            numpy.array([f_nll_train(i)
                         for i in xrange(n_train / batch_size)])) * batch_size
        sum_error_train = numpy.sum(
            numpy.array(
                [f_error_train(i) for i in xrange(n_train / batch_size)]))
        sum_nll_test = numpy.sum(
            numpy.array([f_nll_test(i)
                         for i in xrange(n_test / batch_size)])) * batch_size
        sum_error_test = numpy.sum(
            numpy.array([f_error_test(i)
                         for i in xrange(n_test / batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(
        inputs=[index],
        outputs=model.forward_for_finetuning_batch_stat(x),
        givens={x: x_train[batch_size * index:batch_size * (index + 1)]})
    [f_finetune(i) for i in xrange(n_train / batch_size)]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]
    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
Example #4
0
def train(args):

    print args

    numpy.random.seed(int(args['--seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['--num_labeled_samples']),
                                            n_v=int(args['--num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[1]


    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] 
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()

    if(args['--cost_type']=='MLE'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate']))


    index = T.iscalar()
    ul_index = T.iscalar()
    batch_size = int(args['--batch_size'])
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(inputs=[index,ul_index], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)],
                                  ul_x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]},
                              on_unused_input='warn')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha,
                                 updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[],outputs=ul_x_train,updates=update_ul_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []


    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]",str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]




    print "training..."

    make_sure_path_exists("./trained_model")

    l_i = 0
    ul_i = 0
    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
        
        f_permute_train_set()
        f_permute_ul_train_set()

        for it in xrange(int(args['--num_batch_it'])):
            f_train(l_i,ul_i)
            l_i = 0 if l_i>=n_train/batch_size-1 else l_i + 1
            ul_i = 0 if ul_i >=n_ul_train/ul_batch_size-1 else ul_i + 1


        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
        statuses['nll_train'].append(sum_nll_train/n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test/n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]",str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]


        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x),
                                 givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]})
    [f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
Example #5
0
def train(args):

    print args

    numpy.random.seed(1)

    dataset = cPickle.load(open('dataset/' + args['--dataset_filename']))
    x_train = theano.shared(numpy.asarray(dataset[0][0][0],dtype=theano.config.floatX))
    t_train =  theano.shared(numpy.asarray(dataset[0][0][1],dtype='int32'))
    x_test =  theano.shared(numpy.asarray(dataset[0][1][0],dtype=theano.config.floatX))
    t_test =  theano.shared(numpy.asarray(dataset[0][1][1],dtype='int32'))

    if(args['--cost_type']=='dropout'):
        model = FNN_syn_dropout(drate=float(args['--dropout_rate']))
    else:
        model = FNN_syn()
    x = T.matrix()
    t = T.ivector()

    if(args['--cost_type']=='MLE' or args['--cost_type']=='dropout'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'])
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']))
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']))
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.MomentumSGD(cost=cost,params=model.params,lr=float(args['--initial_learning_rate']),
                                       momentum_ratio=float(args['--momentum_ratio']))

    f_train = theano.function(inputs=[], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_train = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_test = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_test,
                                  t:t_test})

    f_error_train = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_error_test = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_test,
                                  t:t_test})
    if(args['--monitoring_LDS']):
        LDS = costs.average_LDS_finite_diff(x,
                        model.forward_test,
                        main_obj_type='CE',
                        epsilon=float(args['--epsilon']),
                        norm_constraint = args['--norm_constraint'],
                        num_power_iter = int(args['--num_power_iter_for_monitoring_LDS']))
        f_LDS_train = theano.function(inputs=[], outputs=LDS,
                              givens={
                                  x:x_train})
        f_LDS_test = theano.function(inputs=[], outputs=LDS,
                              givens={
                                  x:x_test})
    f_lr_decay = theano.function(inputs=[],outputs=optimizer.lr,
                                 updates={optimizer.lr:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.lr})


    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []
    if(args['--monitoring_LDS']==True):
        statuses['LDS_train'] = []
        statuses['LDS_test'] = []

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train())
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test())
    print "[Epoch]",str(0)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
    if(args['--monitoring_LDS']):
        statuses['LDS_train'].append(f_LDS_train())
        statuses['LDS_test'].append(f_LDS_test())
        print "LDS_train : ", statuses['LDS_train'][-1], "LDS_test : " , statuses['LDS_test'][-1]

    print "training..."

    for epoch in xrange(int(args['--num_epochs'])):
        train_cost = f_train()
        if((epoch+1)%20==0):
            statuses['nll_train'].append(f_nll_train())
            statuses['error_train'].append(f_error_train())
            statuses['nll_test'].append(f_nll_test())
            statuses['error_test'].append(f_error_test())
            print "[Epoch]",str(epoch)
            print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                    "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
            if(args['--monitoring_LDS']):
                statuses['LDS_train'].append(f_LDS_train())
                statuses['LDS_test'].append(f_LDS_test())
                print "LDS_train : ", statuses['LDS_train'][-1], "LDS_test : " , statuses['LDS_test'][-1]

        f_lr_decay()
    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
Example #6
0
def train(args):

    print args

    numpy.random.seed(1)

    dataset = cPickle.load(open('dataset/' + args['--dataset_filename']))
    x_train = theano.shared(
        numpy.asarray(dataset[0][0][0], dtype=theano.config.floatX))
    t_train = theano.shared(numpy.asarray(dataset[0][0][1], dtype='int32'))
    x_test = theano.shared(
        numpy.asarray(dataset[0][1][0], dtype=theano.config.floatX))
    t_test = theano.shared(numpy.asarray(dataset[0][1][1], dtype='int32'))

    if (args['--cost_type'] == 'dropout'):
        model = FNN_syn_dropout(drate=float(args['--dropout_rate']))
    else:
        model = FNN_syn()
    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE' or args['--cost_type'] == 'dropout'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'])
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']))
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']))
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.MomentumSGD(
        cost=cost,
        params=model.params,
        lr=float(args['--initial_learning_rate']),
        momentum_ratio=float(args['--momentum_ratio']))

    f_train = theano.function(inputs=[],
                              outputs=cost,
                              updates=optimizer.updates,
                              givens={
                                  x: x_train,
                                  t: t_train
                              })
    f_nll_train = theano.function(inputs=[],
                                  outputs=nll,
                                  givens={
                                      x: x_train,
                                      t: t_train
                                  })
    f_nll_test = theano.function(inputs=[],
                                 outputs=nll,
                                 givens={
                                     x: x_test,
                                     t: t_test
                                 })

    f_error_train = theano.function(inputs=[],
                                    outputs=error,
                                    givens={
                                        x: x_train,
                                        t: t_train
                                    })
    f_error_test = theano.function(inputs=[],
                                   outputs=error,
                                   givens={
                                       x: x_test,
                                       t: t_test
                                   })
    if (args['--monitoring_LDS']):
        LDS = costs.average_LDS_finite_diff(
            x,
            model.forward_test,
            main_obj_type='CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter_for_monitoring_LDS']))
        f_LDS_train = theano.function(inputs=[],
                                      outputs=LDS,
                                      givens={x: x_train})
        f_LDS_test = theano.function(inputs=[],
                                     outputs=LDS,
                                     givens={x: x_test})
    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.lr,
        updates={
            optimizer.lr:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.lr
        })

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []
    if (args['--monitoring_LDS'] == True):
        statuses['LDS_train'] = []
        statuses['LDS_test'] = []

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train())
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test())
    print "[Epoch]", str(0)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
    if (args['--monitoring_LDS']):
        statuses['LDS_train'].append(f_LDS_train())
        statuses['LDS_test'].append(f_LDS_test())
        print "LDS_train : ", statuses['LDS_train'][
            -1], "LDS_test : ", statuses['LDS_test'][-1]

    print "training..."

    for epoch in xrange(int(args['--num_epochs'])):
        train_cost = f_train()
        if ((epoch + 1) % 20 == 0):
            statuses['nll_train'].append(f_nll_train())
            statuses['error_train'].append(f_error_train())
            statuses['nll_test'].append(f_nll_test())
            statuses['error_test'].append(f_error_test())
            print "[Epoch]", str(epoch)
            print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                    "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
            if (args['--monitoring_LDS']):
                statuses['LDS_train'].append(f_LDS_train())
                statuses['LDS_test'].append(f_LDS_test())
                print "LDS_train : ", statuses['LDS_train'][
                    -1], "LDS_test : ", statuses['LDS_test'][-1]

        f_lr_decay()
    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)