def train(args,x_train,t_train,x_test,t_test,ul_x_train=None): print args numpy.random.seed(int(args['--seed'])) layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] model = FNN_sentiment(layer_sizes=layer_sizes) x = T.matrix() ul_x = T.matrix() t = T.ivector() if(args['--cost_type']=='MLE'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) elif(args['--cost_type']=='L2'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \ + costs.weight_decay(params=model.params,coeff=float(args['--lamb'])) elif(args['--cost_type']=='AT'): cost = costs.adversarial_training(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint = args['--norm_constraint'], forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) elif(args['--cost_type']=='VAT'): cost = costs.virtual_adversarial_training(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint = args['--norm_constraint'], num_power_iter = int(args['--num_power_iter']), x_for_generating_adversarial_examples = ul_x, forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) elif(args['--cost_type']=='VAT_finite_diff'): cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint = args['--norm_constraint'], num_power_iter = int(args['--num_power_iter']), x_for_generating_adversarial_examples = ul_x, unchain_y = False, forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test) error = costs.error(x=x,t=t,forward_func=model.forward_test) optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate'])) ul_index = T.iscalar() ul_batch_size = int(args['--ul_batch_size']) f_train = theano.function(inputs=[ul_index], outputs=cost, updates=optimizer.updates, givens={ x:x_train, t:t_train, ul_x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]}, on_unused_input='warn') f_nll_train = theano.function(inputs=[], outputs=nll, givens={ x:x_train, t:t_train}) f_nll_test = theano.function(inputs=[], outputs=nll, givens={ x:x_test, t:t_test}) f_error_train = theano.function(inputs=[], outputs=error, givens={ x:x_train, t:t_train}) f_error_test = theano.function(inputs=[], outputs=error, givens={ x:x_test, t:t_test}) f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha, updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha}) ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0]) f_permute_ul_train_set = theano.function(inputs=[],outputs=ul_x_train,updates={ul_x_train:ul_x_train[ul_randix]}) statuses = {} statuses['nll_train'] = [] statuses['error_train'] = [] statuses['nll_test'] = [] statuses['error_test'] = [] n_train = numpy.asarray(x_train.get_value().shape[0],theano.config.floatX) n_test = numpy.asarray(x_test.get_value().shape[0],theano.config.floatX) n_ul_train = ul_x_train.get_value().shape[0] print "n_train:" + str(n_train) print "n_test:" + str(n_test) print "n_ul_train:" + str(n_ul_train) statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[Epoch]",str(-1) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] print "training..." make_sure_path_exists("./trained_model") ul_i = 0 for epoch in xrange(int(args['--num_epochs'])): #cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL) for it in xrange(int(args['--num_batch_it'])): print "ul_batch_index:" + str(ul_i) + "\r", f_train(ul_i) if ul_i >=n_ul_train/ul_batch_size-1: f_permute_ul_train_set() ul_i =0 else : ul_i = ul_i + 1 statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[Epoch]",str(epoch) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] f_lr_decay() """ ### finetune batch stat ### #f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x), # givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]}) #[f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)] statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[after finetuning]" print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] ########################### """ #make_sure_path_exists("./trained_model") #cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL) return f_error_train()/n_train, f_error_test()/n_test
def train(args,x_train,t_train,x_test,t_test,ul_x_train=None): print args numpy.random.seed(int(args['--seed'])) layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] model = FNN_sentiment(layer_sizes=layer_sizes) x = T.matrix() ul_x = T.matrix() t = T.ivector() if(args['--cost_type']=='MLE'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) elif(args['--cost_type']=='L2'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \ + costs.weight_decay(params=model.params,coeff=float(args['--lamb'])) elif(args['--cost_type']=='AT'): cost = costs.adversarial_training(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint = args['--norm_constraint'], forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) elif(args['--cost_type']=='VAT'): cost = costs.virtual_adversarial_training(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint = args['--norm_constraint'], num_power_iter = int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) elif(args['--cost_type']=='VAT_finite_diff'): cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint = args['--norm_constraint'], num_power_iter = int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, unchain_y = False, forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat) nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test) error = costs.error(x=x,t=t,forward_func=model.forward_test) optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate'])) f_train = theano.function(inputs=[], outputs=cost, updates=optimizer.updates, givens={ x:x_train, t:t_train, ul_x:ul_x_train},on_unused_input='warn') f_nll_train = theano.function(inputs=[], outputs=nll, givens={ x:x_train, t:t_train}) f_nll_test = theano.function(inputs=[], outputs=nll, givens={ x:x_test, t:t_test}) f_error_train = theano.function(inputs=[], outputs=error, givens={ x:x_train, t:t_train}) f_error_test = theano.function(inputs=[], outputs=error, givens={ x:x_test, t:t_test}) f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha, updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha}) statuses = {} statuses['nll_train'] = [] statuses['error_train'] = [] statuses['nll_test'] = [] statuses['error_test'] = [] n_train = numpy.asarray(x_train.get_value().shape[0],theano.config.floatX) n_test = numpy.asarray(x_test.get_value().shape[0],theano.config.floatX) statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[Epoch]",str(-1) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] print "training..." make_sure_path_exists("./trained_model") for epoch in xrange(int(args['--num_epochs'])): cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL) ### update parameters ### f_train() ######################### statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[Epoch]",str(epoch) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] f_lr_decay() return f_error_train()/n_train, f_error_test()/n_test
def train(args, x_train, t_train, x_test, t_test, ul_x_train=None): print args numpy.random.seed(int(args['--seed'])) layer_sizes = [ int(layer_size) for layer_size in args['--layer_sizes'].split('-') ] model = FNN_sentiment(layer_sizes=layer_sizes) x = T.matrix() ul_x = T.matrix() t = T.ivector() if (args['--cost_type'] == 'MLE'): cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) elif (args['--cost_type'] == 'L2'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \ + costs.weight_decay(params=model.params,coeff=float(args['--lamb'])) elif (args['--cost_type'] == 'AT'): cost = costs.adversarial_training( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint=args['--norm_constraint'], forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) elif (args['--cost_type'] == 'VAT'): cost = costs.virtual_adversarial_training( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint=args['--norm_constraint'], num_power_iter=int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) elif (args['--cost_type'] == 'VAT_finite_diff'): cost = costs.virtual_adversarial_training_finite_diff( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), lamb=float(args['--lamb']), norm_constraint=args['--norm_constraint'], num_power_iter=int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, unchain_y=False, forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test) error = costs.error(x=x, t=t, forward_func=model.forward_test) optimizer = optimizers.ADAM(cost=cost, params=model.params, alpha=float(args['--initial_learning_rate'])) f_train = theano.function(inputs=[], outputs=cost, updates=optimizer.updates, givens={ x: x_train, t: t_train, ul_x: ul_x_train }, on_unused_input='warn') f_nll_train = theano.function(inputs=[], outputs=nll, givens={ x: x_train, t: t_train }) f_nll_test = theano.function(inputs=[], outputs=nll, givens={ x: x_test, t: t_test }) f_error_train = theano.function(inputs=[], outputs=error, givens={ x: x_train, t: t_train }) f_error_test = theano.function(inputs=[], outputs=error, givens={ x: x_test, t: t_test }) f_lr_decay = theano.function( inputs=[], outputs=optimizer.alpha, updates={ optimizer.alpha: theano.shared( numpy.array(args['--learning_rate_decay']).astype( theano.config.floatX)) * optimizer.alpha }) statuses = {} statuses['nll_train'] = [] statuses['error_train'] = [] statuses['nll_test'] = [] statuses['error_test'] = [] n_train = numpy.asarray(x_train.get_value().shape[0], theano.config.floatX) n_test = numpy.asarray(x_test.get_value().shape[0], theano.config.floatX) statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train() / n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test() / n_test) print "[Epoch]", str(-1) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] print "training..." make_sure_path_exists("./trained_model") for epoch in xrange(int(args['--num_epochs'])): cPickle.dump( (statuses, args), open('./trained_model/' + 'tmp-' + args['--save_filename'], 'wb'), cPickle.HIGHEST_PROTOCOL) ### update parameters ### f_train() ######################### statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train() / n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test() / n_test) print "[Epoch]", str(epoch) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] f_lr_decay() return f_error_train() / n_train, f_error_test() / n_test
def train(args, x_train, t_train, x_test, t_test, ul_x_train=None): print args numpy.random.seed(int(args['--seed'])) layer_sizes = [ int(layer_size) for layer_size in args['--layer_sizes'].split('-') ] model = FNN_sentiment(layer_sizes=layer_sizes) x = T.matrix() ul_x = T.matrix() t = T.ivector() if (args['--cost_type'] == 'MLE'): cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) elif (args['--cost_type'] == 'L2'): cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \ + costs.weight_decay(params=model.params,coeff=float(args['--lamb'])) elif (args['--cost_type'] == 'AT'): cost = costs.adversarial_training( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint=args['--norm_constraint'], forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) elif (args['--cost_type'] == 'VAT'): cost = costs.virtual_adversarial_training( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint=args['--norm_constraint'], num_power_iter=int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) elif (args['--cost_type'] == 'VAT_finite_diff'): cost = costs.virtual_adversarial_training_finite_diff( x, t, model.forward_train, 'CE', epsilon=float(args['--epsilon']), norm_constraint=args['--norm_constraint'], num_power_iter=int(args['--num_power_iter']), x_for_generating_adversarial_examples=ul_x, unchain_y=False, forward_func_for_generating_adversarial_examples=model. forward_no_update_batch_stat) nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test) error = costs.error(x=x, t=t, forward_func=model.forward_test) optimizer = optimizers.ADAM(cost=cost, params=model.params, alpha=float(args['--initial_learning_rate'])) ul_index = T.iscalar() ul_batch_size = int(args['--ul_batch_size']) f_train = theano.function( inputs=[ul_index], outputs=cost, updates=optimizer.updates, givens={ x: x_train, t: t_train, ul_x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)] }, on_unused_input='warn') f_nll_train = theano.function(inputs=[], outputs=nll, givens={ x: x_train, t: t_train }) f_nll_test = theano.function(inputs=[], outputs=nll, givens={ x: x_test, t: t_test }) f_error_train = theano.function(inputs=[], outputs=error, givens={ x: x_train, t: t_train }) f_error_test = theano.function(inputs=[], outputs=error, givens={ x: x_test, t: t_test }) f_lr_decay = theano.function( inputs=[], outputs=optimizer.alpha, updates={ optimizer.alpha: theano.shared( numpy.array(args['--learning_rate_decay']).astype( theano.config.floatX)) * optimizer.alpha }) ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation( n=ul_x_train.shape[0]) f_permute_ul_train_set = theano.function( inputs=[], outputs=ul_x_train, updates={ul_x_train: ul_x_train[ul_randix]}) statuses = {} statuses['nll_train'] = [] statuses['error_train'] = [] statuses['nll_test'] = [] statuses['error_test'] = [] n_train = numpy.asarray(x_train.get_value().shape[0], theano.config.floatX) n_test = numpy.asarray(x_test.get_value().shape[0], theano.config.floatX) n_ul_train = ul_x_train.get_value().shape[0] print "n_train:" + str(n_train) print "n_test:" + str(n_test) print "n_ul_train:" + str(n_ul_train) statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train() / n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test() / n_test) print "[Epoch]", str(-1) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] print "training..." make_sure_path_exists("./trained_model") ul_i = 0 for epoch in xrange(int(args['--num_epochs'])): #cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL) for it in xrange(int(args['--num_batch_it'])): print "ul_batch_index:" + str(ul_i) + "\r", f_train(ul_i) if ul_i >= n_ul_train / ul_batch_size - 1: f_permute_ul_train_set() ul_i = 0 else: ul_i = ul_i + 1 statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train() / n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test() / n_test) print "[Epoch]", str(epoch) print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] f_lr_decay() """ ### finetune batch stat ### #f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x), # givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]}) #[f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)] statuses['nll_train'].append(f_nll_train()) statuses['error_train'].append(f_error_train()/n_train) statuses['nll_test'].append(f_nll_test()) statuses['error_test'].append(f_error_test()/n_test) print "[after finetuning]" print "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \ "nll_test : " , statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1] ########################### """ #make_sure_path_exists("./trained_model") #cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL) return f_error_train() / n_train, f_error_test() / n_test