def main(): train_dataset = dataset.Dataset("data/names/*.txt") input_size = train_dataset.n_letters hidden_size = 256 output_size = train_dataset.n_lang rnn = model.RNN(input_size, hidden_size, output_size) filename = "rnn_state.pt" try: state = torch.load(filename) rnn.load_state_dict(state["state_dict"]) #optimizer.load_state_dict(state["optimizer_dict"]) except: print("Could not load model file") return while True: name = raw_input("Enter a name: ") rnn.zero_grad() hidden = rnn.initHidden() name_tensor = Variable(dataset.lineToTensor(name)) n_letters = name_tensor.size()[0] for i in range(n_letters): output, hidden = rnn(name_tensor[i], hidden) k = 3 top_v, top_i = output.data.topk(k) for i in range(k): lang_index = top_i[0][i] print(train_dataset.lang_index_map[lang_index])
def evaluate_rnn_model(ckpt_weights_file, x_data, y_data, batch_size, max_seq_length, vocab_size, n_classes, embed_dim, emb_trainable, model_name, rnn_unit_type, loss_type, hidden_dim, hidden_activation, out_activation, bidirectional, learning_rate, verbose): rnn_model = model.RNN(max_seq_length=max_seq_length, vocab_size=vocab_size, n_classes=n_classes, embed_dim=embed_dim, emb_trainable=emb_trainable, model_name=model_name, rnn_unit_type=rnn_unit_type, loss_type=loss_type, hidden_dim=hidden_dim, hidden_activation=hidden_activation, out_activation=out_activation, bidirectional=bidirectional) utils.load_model(ckpt_weights_file, rnn_model, learning_rate) print("Model from checkpoint %s was loaded." % ckpt_weights_file) metrics_names, scores = rnn_model.evaluate(x_data, y_data, batch_size=batch_size, verbose=verbose) loss = scores[0] return loss
def main(): train_dataset = dataset.Dataset("data/names/*.txt") train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, num_workers=1) input_size = train_dataset.n_letters hidden_size = 256 output_size = train_dataset.n_lang rnn = model.RNN(input_size, hidden_size, output_size) train(rnn, train_loader)
def load_model(): x = tf.placeholder(tf.float32, [None, num_input, num_input]) y = tf.placeholder(tf.float32, [None, num_classes]) weights = tf.Variable(tf.random_normal([num_hidden, num_classes])) biases = tf.Variable(tf.random_normal([num_classes])) logits = model.RNN(x, weights, biases, num_input, num_hidden) prediction = tf.nn.softmax(logits) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) correct_predictions = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) return x, y, train_op, accuracy
def main(): #loads glove embeddings glove = GloveReader() #load train dataset train_data = SST(*load_data('./trees/train.txt', glove)) # load dev dataset dev_data = SST(*load_data('./trees/dev.txt', glove)) from itertools import product configuration = { 'dropout': [0, 0.2], 'hidden_size': [256, 512], 'n_layers': [1, 3], 'embeddings': [glove.embeddings, None] } model_params = list(product(*configuration.values())) param_names = list(configuration) import pickle for params in model_params: info = f'{params[0]}_{params[1]}_{params[2]}_{"glove" if params[3] is not None else "default"}' model_name = f'model_{info}' data_name = f'data_{info}' m = model.RNN(300, params[2], params[1], 5, pretrained_embeddings=params[3], dropout=params[0]) data = train(m, train_data, dev_data, model_name=model_name) with open(data_name, 'wb') as f: pickle.dump(data, f) del m del data
def __model_init(self, n_pos_tags): """ Initializes a Feed-Forward Neural Network model @param n_pos_tags: Number of POS tags; equals to the size of the output layer of the neural networks """ # load model architecture based on settings, default is FNN (Feed-forward Neural Network) if self.architecture == 'RNN': nn_model = model.RNN(self.vocab_size, self.n_timesteps, self.embedding_size, self.h_size, n_pos_tags) else: nn_model = model.FNN(self.vocab_size, self.n_past_words, self.embedding_size, self.h_size, n_pos_tags) global_step = tf.Variable(initial_value=0, name="global_step", trainable=False) train_op = nn_model.optimizer.minimize(nn_model.loss, global_step=global_step) return nn_model, train_op, global_step
def predict_rnn_model(ckpt_weights_file, x_data, batch_size, max_seq_length, vocab_size, n_classes, embed_dim, emb_trainable, model_name, rnn_unit_type, loss_type, hidden_dim, hidden_activation, out_activation, bidirectional, learning_rate, verbose): rnn_model = model.RNN(max_seq_length=max_seq_length, vocab_size=vocab_size, n_classes=n_classes, embed_dim=embed_dim, emb_trainable=emb_trainable, model_name=model_name, rnn_unit_type=rnn_unit_type, loss_type=loss_type, hidden_dim=hidden_dim, hidden_activation=hidden_activation, out_activation=out_activation, bidirectional=bidirectional) utils.load_model(ckpt_weights_file, rnn_model, learning_rate) predictions = rnn_model.predict(x_data, batch_size=batch_size, verbose=verbose) return predictions
def main(): print("Fetching data") train_data, valid_data = fetch_data2(train_path, start=4), fetch_data2(dev_path, start=4) # print(train_data[1362]) # print(train_data[1337]) train_data = convert_to_vector_representation2(train_data) valid_data = convert_to_vector_representation2(valid_data) print("Vectorized data") lstm_attn = model.bilstm_attn(batch_size=batch_size, output_size=output_size, hidden_size=hidden_size, embed_dim=embed_dim, bidirectional=bidirectional, dropout=dropout) rnn = model.RNN(input_dim=embed_dim, h=hidden_size, num_layer=num_layer) # ffnn = model.FFNN(input_dim=embed_dim, h=hidden_size) # optimizer = torch.optim.Adam(lstm_attn.parameters(), lr=lr, weight_decay=weight_decay) optimizer = torch.optim.Adam(rnn.parameters(), lr=lr, weight_decay=weight_decay) train_acc = valid_acc = 1 epoch = 0 while epoch < epochs: epoch += 1 optimizer.zero_grad() loss = None correct = 0 total = 0 start_time = time.time() print("Training started for epoch {}".format(epoch)) random.shuffle( train_data) # Good practice to shuffle order of training data N = len(train_data) for minibatch_index in tqdm(range(N // batch_size)): optimizer.zero_grad() loss = None for example_index in range(batch_size): input_vector, gold_label = train_data[minibatch_index * batch_size + example_index] input_vector = torch.from_numpy( np.asarray([np.asarray(word) for word in input_vector])).unsqueeze(1) # predicted_vector = lstm_attn(input_vector) predicted_vector = rnn(input_vector) predicted_label = torch.argmax(predicted_vector) #if predicted_label != gold_label and epoch == 13: # print(minibatch_index * batch_size + example_index) correct += int(predicted_label == gold_label) total += 1 example_loss = rnn.compute_Loss(predicted_vector.view(1, -1), torch.tensor([gold_label])) if loss is None: loss = example_loss else: loss += example_loss loss = loss / batch_size loss.backward() optimizer.step() print("Training completed for epoch {}".format(epoch)) print("Training accuracy for epoch {}: {}".format( epoch, correct / total)) print("Training time for this epoch: {}".format(time.time() - start_time)) correct = 0 total = 0 start_time = time.time() print("Validation started for epoch {}".format(epoch)) random.shuffle( valid_data) # Good practice to shuffle order of validation data N = len(valid_data) for minibatch_index in tqdm(range(N // batch_size)): for example_index in range(batch_size): input_vector, gold_label = valid_data[minibatch_index * batch_size + example_index] input_vector = torch.from_numpy( np.asarray([np.asarray(word) for word in input_vector])).unsqueeze(1) # predicted_vector = lstm_attn(input_vector) predicted_vector = rnn(input_vector) predicted_label = torch.argmax(predicted_vector) correct += int(predicted_label == gold_label) total += 1 print("Validation completed for epoch {}".format(epoch)) print("Validation accuracy for epoch {}: {}".format( epoch, correct / total)) print("Validation time for this epoch: {}".format(time.time() - start_time)) torch.save(rnn.state_dict(), model_path)
y_ = tf.placeholder(tf.int32, [None, n_steps]) early_stop = tf.placeholder(tf.int32) # LSTM layer # 2 x n_hidden = state_size = (hidden state & cell state) istate = tf.placeholder(tf.float32, [None, 2*n_hidden]) weights = { 'hidden' : model.weight_variable([n_input, n_hidden]), 'out' : model.weight_variable([n_hidden, n_classes]) } biases = { 'hidden' : model.bias_variable([n_hidden]), 'out': model.bias_variable([n_classes]) } y = model.RNN(x, istate, weights, biases, n_hidden, n_steps, n_input, early_stop) batch_size = 1 logits = tf.reshape(tf.concat(y, 1), [-1, n_classes]) NUM_THREADS = 1 config = tf.ConfigProto(intra_op_parallelism_threads=NUM_THREADS, inter_op_parallelism_threads=NUM_THREADS, log_device_placement=False) sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() # save all variables checkpoint_dir = model_dir ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path :
c_to_i = pickle.load(open(args.c_to_i, 'rb')) i_to_c = pickle.load(open(args.i_to_c, 'rb')) n_char = len(c_to_i) dataloaders = [] for fn in args.filenames: with open(fn) as f: lines = f.readlines() lines = [s.strip().split()[1] for s in lines] test_dataset = MolDataset(lines, c_to_i) test_dataloader = DataLoader(test_dataset, args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=my_collate) dataloaders.append(test_dataloader) model = model.RNN(args.n_feature, args.n_feature, n_char, args.n_layer, i_to_c) model = utils.initialize_model(model, device, args.save_files) print("number of parameters :", sum(p.numel() for p in model.parameters() if p.requires_grad)) model.eval() for fn,dataloader in zip(args.filenames, dataloaders): log_likelihoods = [] for i_batch, sample in enumerate(dataloader) : x, l = sample['X'].to(device).long(), sample['L'].long().data.cpu().numpy() output, p_char = model(x) p_char = torch.log(p_char+1e-10) p_char = p_char.data.cpu().numpy() x = x.data.cpu().numpy()
batch_size = 1 n_steps = 1 n_input = unique_chars n_output = 2 num_layers = 1 n_hidden = 128 # Define weights weights = { 'out': tf.Variable(tf.random_normal([n_hidden, n_output]), name="weights") } biases = {'out': tf.Variable(tf.random_normal([n_output]), name="biases")} state_placeholder = tf.placeholder(tf.float32, shape=((num_layers, 2, 1, n_hidden))) outputs, states = model.RNN(input, weights, biases, num_layers, n_input, n_hidden, batch_size, n_steps, state_placeholder) predictions = tf.nn.softmax(tf.squeeze(outputs, squeeze_dims=[0])) saver = tf.train.Saver([weights['out'], biases['out']]) def lcs(S, T): m = len(S) n = len(T) counter = [[0] * (n + 1) for x in range(m + 1)] longest = 0 lcs_set = set() for i in range(m): for j in range(n): if S[i] == T[j]: c = counter[i][j] + 1
def train(input_list, unique_word_list): vocab_size = len(unique_word_list) word_embedding = load_embedding(vocab_size, embedding_size) rnn = model.RNN(embedding_size, hidden_size, vocab_size, word_embedding) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") rnn.to(device) hidden = rnn.initHidden() # rnn.zero_grad() # for i in input_list: # print(unique_word_list[i]) criterion = nn.CrossEntropyLoss() loss = 0 optimizer = torch.optim.Adam(rnn.parameters(), lr = learning_rate) for epoch in range(1, 1 + num_epochs): print("epoch is ", epoch) for i in range(len(input_list) - 1): # word_embedding = load_embedding(vocab_size, embedding_size) input_ind = torch.tensor(input_list[i]) input = word_embedding(input_ind) optimizer.zero_grad() output, hidden_var = rnn(input, hidden) # print(type(output), output.size()) # new_output = word_embedding(output[0].max(0)[1]) target = torch.tensor(input_list[i + 1]) # target = torch.reshape(word_embedding(target_ind), (1, embedding_size)) target = torch.reshape(target, (1,)) # output = torch.reshape(output) # print(target) # print(output) # print(type(target), target.shape) # print(type(output), output.size()) loss = criterion(output, target) # l = nn.CrossEntropyLoss(new_output, target, reduction='none') # loss += l # loss.sum().backward(retain_graph=True) loss.backward() optimizer.step() # for p in rnn.parameters(): # p.data.add_(-learning_rate, p.grad.data) print("end of epoch") rnn.eval() i = 0 poem = [] index = torch.tensor(initial_index) while i < word_number: # print(i) outcome, hidden_var1 = rnn(word_embedding(index), hidden_var) # print(list(outcome)) prediction = outcome[0].max(0)[1] # print(prediction) prediction_character = unique_word_list[prediction.numpy()] # print('character ', prediction_character) # if prediction_character != 'END' and prediction_character != 'UNK' and prediction_character != 'START': poem.append(prediction_character) i = i + 1 index = prediction # else: # index = torch.randint(0, vocab_size, (1,)) # print("index is ", index) # # print(unique_word_list[index.numpy()]) # hidden_var = hidden_var + torch.rand(hidden_var.size()) # rnn.train() # for i in [0, POEM_TYPE, 2 * POEM_TYPE, 3 * POEM_TYPE]: # if i == 0 or i == 2 * POEM_TYPE: # print(''.join(poem[i: i + POEM_TYPE]) + ',') # else: # print(''.join(poem[i: i + POEM_TYPE]) + '。') return poem
# -*- coding: utf-8 -*- """ Created on Wed Jun 26 14:52:40 2019 @author: ashima.garg """ #Classifying Names with a Character Level RNN import data import model import config if __name__ == "__main__": data_obj = data.DATA() data_obj.read() print("Train Data Loaded") # BUILD MODEL rnn = model.RNN(data_obj.n_letters, config.N_HIDDEN, data_obj.n_categories) modeloperator = model.Operator(rnn, config.LEARNING_RATE) # TRAIN MODEL modeloperator.train(data_obj) print("Model Trained") # TEST MODEL print("all categories: ", len(data_obj.categories)) modeloperator.predict(data_obj, 'Dovesky') modeloperator.predict(data_obj, 'Satoshi') modeloperator.predict(data_obj, 'Jackson')
import model import utils import random import unidecode import string import torch import torch.nn as nn from settings import INPUT_PATH, INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE, EPOCHS, NUM_LAYERS, LEARNING_RATE, CHUNK_LENGTH, PREDICT_LENGTH, TEMPERATURE, PRINT_EVERY input_string = unidecode.unidecode(open(INPUT_PATH, 'r').read()) network = model.RNN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE, NUM_LAYERS) optimizer = torch.optim.Adam(network.parameters(), lr=LEARNING_RATE) criterion = nn.CrossEntropyLoss() for epoch in range(EPOCHS): input_tensor, target_tensor = utils.random_training_set(input_string, CHUNK_LENGTH) loss = model.train(network, input_tensor, target_tensor, CHUNK_LENGTH, optimizer, criterion) if epoch % PRINT_EVERY == 0: print(loss) print(model.generate(network, random.choice(string.ascii_uppercase), PREDICT_LENGTH, TEMPERATURE), '\n') with open(OUTPUT_PATH, 'a') as file: for i in range(10): file.write(model.generate(network, random.choice(string.ascii_uppercase), 200, TEMPERATURE))
c_to_i = pickle.load(open(args.c_to_i, 'rb')) i_to_c = pickle.load(open(args.i_to_c, 'rb')) n_char = len(c_to_i) dataloaders = [] with open('data/vs_chemist.txt') as f: lines = f.readlines() lines = [l.strip().split() for l in lines] s_to_human_score = {l[1]: float(l[3]) for l in lines} if args.model == 'Trans': model = model.TransformerModel(args, n_char, i_to_c) else: model = model.RNN(args, n_char, i_to_c) model = utils.initialize_model(model, device, args.save_files) print("number of parameters :", sum(p.numel() for p in model.parameters() if p.requires_grad)) softmax = nn.Softmax(dim=-1) model.eval() log_likelihoods = [] humanscores = [] sascores = [] with torch.no_grad(): for s in s_to_human_score.keys(): humanscores.append(s_to_human_score[s]) s = Chem.MolFromSmiles(s)
def run(batch_size, permuted, modeltype='surprise_gru', n_hidden=64, zoneout=0.25, layer_norm=True, optimizer='adam', learnrate=1e-3, aux_weight=0.1, cuda=True, resume=False): assert isinstance(batch_size, int) assert isinstance(permuted, bool) assert modeltype in MODELS_IMPLEMENTED assert isinstance(n_hidden, int) assert isinstance(zoneout, (int, float)) assert isinstance(layer_norm, bool) assert isinstance(optimizer, str) assert isinstance(learnrate, (int, float)) assert isinstance(cuda, bool) assert isinstance(resume, bool) # Name the experiment s.t. parameters are easily readable exp_name = ( '%s_perm%r_h%i_z%2f_norm%r_%s' % (modeltype, permuted, n_hidden, zoneout, layer_norm, optimizer)) exp_path = os.path.join('/home/jason/experiments/recurrent_pytorch/', exp_name) if not os.path.isdir(exp_path): os.makedirs(exp_path) if not resume: # Store experiment params in params.json params = { 'batch_size': batch_size, 'permuted': permuted, 'modeltype': modeltype, 'n_hidden': n_hidden, 'zoneout': zoneout, 'layer_norm': layer_norm, 'optimizer': optimizer, 'learnrate': learnrate, 'aux_weight': aux_weight, 'cuda': cuda } with open(os.path.join(exp_path, 'params.json'), 'w') as f: json.dump(params, f) # Model if modeltype.lower() == 'rnn': net = model.RNN(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'gru': net = model.GRU(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'surprise_gru': net = model.SurpriseGRU(1, n_hidden, 10, layer_norm) else: raise ValueError else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_path, 'params.json')) and os.path.isfile(os.path.join(exp_path, 'stats.json')) and os.path.isfile(os.path.join(exp_path, 'checkpoint'))): raise Exception( 'Missing params, stats or checkpoint file (resume)') net = torch.load(os.path.join(exp_path, 'checkpoint')) # Data loaders train_loader, val_loader = data.mnist(batch_size, sequential=True, permuted=permuted) # Train train.fit_recurrent(train_loader, val_loader, net, exp_path, zoneout, optimizer, aux_weight=aux_weight, cuda=cuda, resume=resume) # Post-trainign visualization post_training(exp_path, val_loader)
import os import torch import model import numpy as np from data_loader import fetch_test from main import convert_to_vector_test test_path = './test.csv' out_path = './output.csv' model_path = 'model/rnn_best.pth' embed_dim = 300 hidden_size = 64 num_layer = 6 test_data = fetch_test(test_path) test_data = convert_to_vector_test(test_data) # test_id = get_id(test_path) rnn = model.RNN(input_dim=embed_dim, h=hidden_size, num_layer=num_layer) checkpoint = torch.load(model_path) rnn.load_state_dict(checkpoint) with open(out_path, "w+") as outfile: for temp_input in test_data: temp_input = torch.from_numpy( np.asarray([np.asarray(word) for word in temp_input])).unsqueeze(1) predicted = rnn(temp_input) predicted_label = int(torch.argmax(predicted)) + 1 outfile.write(str(predicted_label)) outfile.write('\n')
import preprocess as pre import model import utils import torch category_lines, all_categories, n_letters, all_letters = pre.get_data() rnn = model.RNN(n_letters, 128, n_letters, len(all_categories)) rnn.load_state_dict(torch.load("advacnced_start_optim.pth")) #name = utils.sample("Spanish", all_categories, n_letters, all_letters, rnn, start="AB", max_length=20) #print(name) utils.samples('Russian', all_categories, n_letters, all_letters, rnn, 'RUS', start_token=True) utils.samples('German', all_categories, n_letters, all_letters, rnn, 'GER', start_token=True) utils.samples('Spanish', all_categories, n_letters,
args.last_hid_size, args.num_layers, tie_weights=args.tied, dropout=args.dropout, weight_drop=args.w_drop, drop_h=args.drop_h, drop_i=args.drop_i, drop_e=args.drop_e) elif args.model == 'RNN': ''' RNN , support (emb_size != last_hid_size), but with an extra dense layer ''' model = model.RNN(args.rnn_cell, vocab_size, args.emb_size, args.hid_size, args.last_hid_size, args.num_layers, tie_weights=args.tied, dropout=args.dropout, weight_drop=0, drop_h=0, drop_i=0, drop_e=0) elif args.model == 'StandardRNN': model = gluonnlp.model.StandardRNN(args.rnn_cell, vocab_size, args.emb_size, args.hid_size, args.num_layers, dropout=args.dropout, tie_weights=args.tied) loss = gluon.loss.SoftmaxCrossEntropyLoss()
if __name__ == "__main__": train_dataset = fetch_20newsgroups(subset='train', data_home='./') test_dataset = fetch_20newsgroups(subset='test', data_home='./') train_set = get_data(train_dataset) test_set = get_data(test_dataset) vocab = Vocabulary(min_freq=10).from_dataset(train_set, field_name='words') vocab.index_dataset(train_set, field_name='words', new_field_name='input') vocab.index_dataset(test_set, field_name='words', new_field_name='input') vocab_size = len(vocab) train_set.set_input('input') train_set.set_target('target') test_set.set_input('input') test_set.set_target('target') model_name = "RNN" if model_name == "CNN": net = model.CNN(vocab_size, embed_dim, num_classes) else: net = model.RNN(vocab_size, embed_dim, num_classes, hidden_dim, num_layers, bidirect) print(bidirect, num_layers) optimizer = Adam(lr=lr, weight_decay=1e-4) loss = CrossEntropyLoss(pred="output", target="target") acc = AccuracyMetric(pred="output", target="target") main(net, optimizer, train_set, test_set, loss, acc)
args.save_dir = os.path.join( args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model m_model = None if args.snapshot is None: if args.which_model == 'lstm': m_model = model.LSTM(args, m_embedding) elif args.which_model == 'gru': m_model = model.GRU(args, m_embedding) elif args.which_model == 'rnn': m_model = model.RNN(args, m_embedding) else: print('\nLoading model from [%s]...' % args.snapshot) try: m_model = torch.load(args.snapshot) except: print("Sorry, This snapshot doesn't exist.") exit() if args.cuda: m_model = m_model.cuda() # train or predict assert m_model is not None if args.predict is not None: label = train.predict(args.predict, m_model, text_field, label_field)
start_step = int(tokens[-2]) + 1 else: start_epoch = int(tokens[-2]) + 1 start_step = 1 #num = re.split('_|\.', filename)[-2] #start_epoch = int(num)+1 print "Modello recuperato dal file "+filename else: print "Nessun file trovato per il modello "+args.model+". Ne verrà creato uno nuovo." args.restart = False # instanzia nuova rete neurale if not args.restart: if args.model == 'RNN': rnn = model.RNN(data.n_letters, args.n_hidden, data.n_categories, cuda=args.cuda) elif args.model == 'LSTM': rnn = model.LSTM(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) elif args.model == 'GRU': rnn = model.GRU(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) assert rnn #optimizer = torch.optim.SGD(rnn.parameters(), lr=args.lr) optimizer = torch.optim.Adam(rnn.parameters(), lr=args.lr) criterion = nn.NLLLoss() if args.cuda: rnn.cuda() criterion.cuda()