Ejemplo n.º 1
0
def train(args,x_train,t_train,x_test,t_test,ul_x_train=None):

    print args

    numpy.random.seed(int(args['--seed']))


    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')]
    model = FNN_sentiment(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()

    if(args['--cost_type']=='MLE'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
					                          unchain_y = False,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate']))



    ul_index = T.iscalar()
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(inputs=[ul_index], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train,
                                  t:t_train,
                                  ul_x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]},
                                  on_unused_input='warn')
    f_nll_train = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_test = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_test,
                                  t:t_test})

    f_error_train = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_error_test = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_test,
                                  t:t_test})
    f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha,
                                 updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha})

    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    f_permute_ul_train_set = theano.function(inputs=[],outputs=ul_x_train,updates={ul_x_train:ul_x_train[ul_randix]})

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = numpy.asarray(x_train.get_value().shape[0],theano.config.floatX)
    n_test = numpy.asarray(x_test.get_value().shape[0],theano.config.floatX)
    n_ul_train = ul_x_train.get_value().shape[0]

    print "n_train:" + str(n_train)
    print "n_test:" + str(n_test)
    print "n_ul_train:" + str(n_ul_train)

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train()/n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test()/n_test)

    print "[Epoch]",str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    ul_i = 0
    for epoch in xrange(int(args['--num_epochs'])):
        #cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)

        for it in xrange(int(args['--num_batch_it'])):
            print "ul_batch_index:" + str(ul_i) + "\r",
            f_train(ul_i)

            if ul_i >=n_ul_train/ul_batch_size-1:
                f_permute_ul_train_set()
                ul_i =0
            else :
                ul_i = ul_i + 1


        statuses['nll_train'].append(f_nll_train())
        statuses['error_train'].append(f_error_train()/n_train)
        statuses['nll_test'].append(f_nll_test())
        statuses['error_test'].append(f_error_test()/n_test)
        print "[Epoch]",str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]


        f_lr_decay()
    """
    ### finetune batch stat ###
    #f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x),
    #                             givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]})
    #[f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)]

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train()/n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test()/n_test)
    print "[after finetuning]"
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    ###########################
    """
    #make_sure_path_exists("./trained_model")
    #cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
    return f_error_train()/n_train, f_error_test()/n_test
Ejemplo n.º 2
0
def train(args,x_train,t_train,x_test,t_test,ul_x_train=None):

    print args

    numpy.random.seed(int(args['--seed']))

    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] 
    model = FNN_sentiment(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()


    if(args['--cost_type']=='MLE'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples=ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples=ul_x,
					      unchain_y = False,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate']))


    f_train = theano.function(inputs=[], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train,
                                  t:t_train,
                                  ul_x:ul_x_train},on_unused_input='warn')
    f_nll_train = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_test = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_test,
                                  t:t_test})

    f_error_train = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_error_test = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_test,
                                  t:t_test})

    f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha,
                                 updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha})


    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = numpy.asarray(x_train.get_value().shape[0],theano.config.floatX)
    n_test = numpy.asarray(x_test.get_value().shape[0],theano.config.floatX)


    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train()/n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test()/n_test)

    print "[Epoch]",str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)

        ### update parameters ###
        f_train() 
        #########################


        statuses['nll_train'].append(f_nll_train())
        statuses['error_train'].append(f_error_train()/n_train)
        statuses['nll_test'].append(f_nll_test())
        statuses['error_test'].append(f_error_test()/n_test)
        print "[Epoch]",str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    return f_error_train()/n_train, f_error_test()/n_test
Ejemplo n.º 3
0
def train(args, x_train, t_train, x_test, t_test, ul_x_train=None):

    print args

    numpy.random.seed(int(args['--seed']))

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_sentiment(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            x_for_generating_adversarial_examples=ul_x,
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            x_for_generating_adversarial_examples=ul_x,
            unchain_y=False,
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    f_train = theano.function(inputs=[],
                              outputs=cost,
                              updates=optimizer.updates,
                              givens={
                                  x: x_train,
                                  t: t_train,
                                  ul_x: ul_x_train
                              },
                              on_unused_input='warn')
    f_nll_train = theano.function(inputs=[],
                                  outputs=nll,
                                  givens={
                                      x: x_train,
                                      t: t_train
                                  })
    f_nll_test = theano.function(inputs=[],
                                 outputs=nll,
                                 givens={
                                     x: x_test,
                                     t: t_test
                                 })

    f_error_train = theano.function(inputs=[],
                                    outputs=error,
                                    givens={
                                        x: x_train,
                                        t: t_train
                                    })
    f_error_test = theano.function(inputs=[],
                                   outputs=error,
                                   givens={
                                       x: x_test,
                                       t: t_test
                                   })

    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = numpy.asarray(x_train.get_value().shape[0], theano.config.floatX)
    n_test = numpy.asarray(x_test.get_value().shape[0], theano.config.floatX)

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train() / n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test() / n_test)

    print "[Epoch]", str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump(
            (statuses, args),
            open('./trained_model/' + 'tmp-' + args['--save_filename'],
                 'wb'), cPickle.HIGHEST_PROTOCOL)

        ### update parameters ###
        f_train()
        #########################

        statuses['nll_train'].append(f_nll_train())
        statuses['error_train'].append(f_error_train() / n_train)
        statuses['nll_test'].append(f_nll_test())
        statuses['error_test'].append(f_error_test() / n_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    return f_error_train() / n_train, f_error_test() / n_test
Ejemplo n.º 4
0
def train(args, x_train, t_train, x_test, t_test, ul_x_train=None):

    print args

    numpy.random.seed(int(args['--seed']))

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_sentiment(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            x_for_generating_adversarial_examples=ul_x,
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            x_for_generating_adversarial_examples=ul_x,
            unchain_y=False,
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    ul_index = T.iscalar()
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(
        inputs=[ul_index],
        outputs=cost,
        updates=optimizer.updates,
        givens={
            x:
            x_train,
            t:
            t_train,
            ul_x:
            ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]
        },
        on_unused_input='warn')
    f_nll_train = theano.function(inputs=[],
                                  outputs=nll,
                                  givens={
                                      x: x_train,
                                      t: t_train
                                  })
    f_nll_test = theano.function(inputs=[],
                                 outputs=nll,
                                 givens={
                                     x: x_test,
                                     t: t_test
                                 })

    f_error_train = theano.function(inputs=[],
                                    outputs=error,
                                    givens={
                                        x: x_train,
                                        t: t_train
                                    })
    f_error_test = theano.function(inputs=[],
                                   outputs=error,
                                   givens={
                                       x: x_test,
                                       t: t_test
                                   })
    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })

    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(
        n=ul_x_train.shape[0])
    f_permute_ul_train_set = theano.function(
        inputs=[],
        outputs=ul_x_train,
        updates={ul_x_train: ul_x_train[ul_randix]})

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = numpy.asarray(x_train.get_value().shape[0], theano.config.floatX)
    n_test = numpy.asarray(x_test.get_value().shape[0], theano.config.floatX)
    n_ul_train = ul_x_train.get_value().shape[0]

    print "n_train:" + str(n_train)
    print "n_test:" + str(n_test)
    print "n_ul_train:" + str(n_ul_train)

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train() / n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test() / n_test)

    print "[Epoch]", str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    ul_i = 0
    for epoch in xrange(int(args['--num_epochs'])):
        #cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)

        for it in xrange(int(args['--num_batch_it'])):
            print "ul_batch_index:" + str(ul_i) + "\r",
            f_train(ul_i)

            if ul_i >= n_ul_train / ul_batch_size - 1:
                f_permute_ul_train_set()
                ul_i = 0
            else:
                ul_i = ul_i + 1

        statuses['nll_train'].append(f_nll_train())
        statuses['error_train'].append(f_error_train() / n_train)
        statuses['nll_test'].append(f_nll_test())
        statuses['error_test'].append(f_error_test() / n_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

        f_lr_decay()
    """
    ### finetune batch stat ###
    #f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x),
    #                             givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]})
    #[f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)]

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train()/n_train)
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test()/n_test)
    print "[after finetuning]"
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    ###########################
    """
    #make_sure_path_exists("./trained_model")
    #cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
    return f_error_train() / n_train, f_error_test() / n_test