def create_model(self, device): ntokens = len(self.corpus.dictionary) net = model.RNNModel(self.model_type, ntokens, self.emsize, self.nhid, self.nlayers, self.dropout, self.tied).to(device) self.criterion = nn.NLLLoss() return net
def objective(hparams): """Minimizing validation loss wrt our hyperparameters""" nlayers = int(hparams[0]) emsize = int(hparams[1]) nhid = int(hparams[2]) rnn_type = str(hparams[3]) ntokens = len(corpus.dictionary) global model model = model.RNNModel(rnn_type, ntokens, emsize, nhid, nlayers, args.dropout, args.tied).to(device) #model = nn.DataParallel(model) global lr lr = args.lr best_val_loss = None for epoch in range(1, args.epochs+1): train() val_loss = evaluate(val_data) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) if not best_val_loss or val_loss < best_val_loss: best_val_loss = val_loss else: # Anneal the learning rate if no improvement has been seen in the validation dataset. lr /= 4.0 return val_loss
def test_torch_lm(): # device = torch.device('cuda:4') net = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) # net = loaded_model(pretrained=False) x = np.arange(80 * 5 * 32 / 20).reshape(20, 32).astype('int64') init_hidden = np.zeros((20, 32, 200), dtype='float32') init_hidden = torch.tensor(init_hidden) init_cell = np.zeros((20, 32, 200), dtype='float32') pre_cell = torch.tensor(init_cell) # x = torch.LongTensor(x) # net.to(device) # x = x.to(device) # init_hidden = init_hidden.to(device) # init_cell = pre_cell.to(device) with torch.no_grad(): # warmup out = net(x, init_hidden, pre_cell) t1 = time.time() for i in range(100): out = net(x, init_hidden, pre_cell) t2 = time.time() print('torch cost: {} ms.'.format((t2 - t1) * 10))
def __init__(self): # TODO can we just change this to 1? self.eval_batch_size = 10 self.train_data = self.batchify(corpus.train, args.batch_size) self.val_data = self.batchify(corpus.valid, self.eval_batch_size) self.test_data = self.batchify(corpus.test, self.eval_batch_size) # self.test_data = self.batchify(corpus.test, 1) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.load: f = open(args.save, 'rb') self.model = torch.load(f) f.close() else: self.model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) if args.cuda: self.model.cuda() self.criterion = nn.CrossEntropyLoss()
def model_func(wrapped_import, inputs): ############################################################################### # Build the model ############################################################################### if wrapped_import: nn = wrapped_import("torch.nn") model = wrapped_import("model") else: from torch import nn import model if args.model == 'Transformer': net = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout) else: net = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) net.eval() # for verification, need no random elements (e.g. dropout) # criterion = nn.NLLLoss() if args.model != 'Transformer': hidden = net.init_hidden(args.batch_size) else: hidden = None with torch.no_grad(): if args.model == 'Transformer': output = net(inputs) output = output.view(-1, ntokens) else: output, hidden = net(inputs, hidden) return output
def train_rnn_model(x_train, y_train, x_val, y_val, params): rnn_model = model.RNNModel( max_seq_length=x_train.shape[1], input_size=params["input_size"], output_size=params["output_size"], embed_dim=params["embed_dim"], emb_trainable=params["emb_trainable"], model_name=params["model_name"], hidden_activation=params["hidden_activation"], out_activation=params["out_activation"], hidden_dim=params["hidden_dims"][0], kernel_initializer=params["kernel_initializer"], kernel_regularizer=params["kernel_regularizer"], recurrent_regularizer=params["recurrent_regularizer"], input_dropout=params["input_dropout"], recurrent_dropout=params["recurrent_dropout"], rnn_unit_type=params["rnn_unit_type"], bidirectional=params["bidirectional"], attention=params["attention"], embs_matrix=params["embs_matrix"]) history = train_model(rnn_model, x_train, y_train, out_dir=params["out_dir"], validation_data=(x_val, y_val), save_checkpoint=params["save_checkpoint"], n_epochs=params["n_epochs"], batch_size=params["batch_size"], verbose=params["verbose"], early_stopping=params["early_stopping"], learning_rate=params["learning_rate"], loss=params["loss"], ckpt_name_prefix=utils.get_file_name(params)) return utils.extract_results_from_history(history)
def _get_model(is_train): if FLAGS.model == 'rnn': return model.RNNModel(is_train=is_train) elif FLAGS.model == 'sprnn': return model.SPRNNModel(is_train=is_train) else: raise AttributeError("Model unimplemented: " + FLAGS.model)
def main(): data_loader = DataLoader(args) if (args.process_data): data_loader.process_data() return torch.cuda.set_device(args.gpu_device) data_loader.load() if (args.model == "rnn"): myModel = model.RNNModel(args, data_loader.vocab_size, 8, data_loader.id_2_vec).cuda() elif (args.model == "cnn"): myModel = model.CNNModel(args, data_loader.vocab_size, 8, data_loader.id_2_vec).cuda() elif (args.model == "baseline"): myModel = model.Baseline(args, data_loader.vocab_size, 8, data_loader.id_2_vec).cuda() else: print("invalid model type") exit(1) if (args.test_only): test(myModel, data_loader, args) else: train(myModel, data_loader, args)
def main_func(datasets, context_len, epochs): for dataset in datasets: for bptt in context_len: train_d, valid_d, test_d, data = data_generator(dataset) train_data = batchify(train_d, bptt) val_data = batchify(valid_d, bptt) test_data = batchify(test_d, bptt) ntokens = len(set(data)) best_val_loss = None lr = args.lr if args.model == 'Transformer': model = model1.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) else: model = model1.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), args.lr) for epoch in range(1, epochs + 1): epoch_start_time = time.time() train(train_data, bptt, ntokens, model, criterion, optimizer, epoch) val_loss = evaluate(val_data, bptt, ntokens, model, criterion) print('-' * 89) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: with open(args.save, 'wb') as f: torch.save(model, f) best_val_loss = val_loss else: lr /= 0.4 # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']: model.rnn.flatten_parameters() # Run on test data. test_loss = evaluate(test_data, bptt, ntokens, model, criterion) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'. format(test_loss, math.exp(test_loss))) print('=' * 89) fr.writelines("test loss for len %d and dataset %s is %f\n" % (bptt, dataset, test_loss)) #fr.close() return
def evaluate_rnn(params): x_data, y_data = utils.load_data(params["eval_x_data"], params["eval_y_data"]) rnn_model = model.RNNModel( max_seq_length=x_data.shape[1], input_size=params["input_size"], output_size=params["output_size"], embed_dim=params["embed_dim"], emb_trainable=params["emb_trainable"], model_name=params["model_name"], hidden_activation=params["hidden_activation"], out_activation=params["out_activation"], hidden_dim=params["hidden_dims"][0], kernel_initializer=params["kernel_initializer"], kernel_regularizer=params["kernel_regularizer"], recurrent_regularizer=params["recurrent_regularizer"], input_dropout=0.0, recurrent_dropout=0.0, rnn_unit_type=params["rnn_unit_type"], bidirectional=params["bidirectional"], attention=params["attention"], embs_matrix=params["embs_matrix"]) utils.load_model(params["eval_weights_ckpt"], rnn_model, params["learning_rate"]) print("Model from checkpoint %s was loaded." % params["eval_weights_ckpt"]) # if y_data is not None: # metrics_names, scores = rnn_model.evaluate(x_data, y_data, batch_size=params["batch_size"], verbose=params["verbose"]) # loss = scores[0] # print("Evaluation loss: %.3f"%loss) sample_idxs = np.random.randint(x_data.shape[0], size=params["attn_sample_size"]) x_data_sample = x_data[sample_idxs, :] cPickle.dump( sample_idxs, open(os.path.join(params["eval_res_folder"], "sample_idxs.pickle"), "wb")) if params["attention"]: attention_weights = get_attention_weights(rnn_model, x_data_sample) print("Attention weights shape: ", attention_weights.shape) cPickle.dump( attention_weights, open( os.path.join(params["eval_res_folder"], "attn_weights.pickle"), "wb")) vocab = cPickle.load(open(params["vocab_file"])) if params["plot_attn"]: inverse_vocab = {value: key for key, value in vocab.items()} utils.plot_attention_weights(x_data_sample, attention_weights, inverse_vocab, params["eval_res_folder"], ids=sample_idxs) predictions = rnn_model.predict(x_data, batch_size=params["batch_size"], verbose=params["verbose"]) utils.save_predictions(predictions, params["eval_res_folder"], rnn_model.model_name + "_predictions.txt")
def save_torch_ptb_lm(): RNNModel = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) x = np.arange(80 * 5 * 32 / 20).reshape(20, 32).astype('int64') init_hidden = np.zeros((20, 32, 200), dtype='float32') init_hidden = torch.tensor(init_hidden) init_cell = np.zeros((20, 32, 200), dtype='float32') pre_cell = torch.tensor(init_cell) x = torch.LongTensor(x) RNNModel = torch.jit.trace(RNNModel, [x, init_hidden, pre_cell]) RNNModel.save(torch_model_dir + "RNNModel.pth")
def train_lstm(): ########################################################################### # Build the model ########################################################################### train_data, val_data = read_data(args.data_dir) model_instance = model.RNNModel(args.model, args.nclasses, train_data[0].size(-1), args.nhid, args.nlayers, args.dropout).cuda() if args.multilabel: criterion = nn.BCEWithLogitsLoss() else: criterion = nn.CrossEntropyLoss() # Loop over epochs. lr = args.lr best_val_acc = None # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train(lr, model_instance, criterion, train_data) val_acc, final_res, _, _, _ = evaluate(model_instance, val_data) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | ' 'valid acc {:5.4f} | '.format( epoch, (time.time() - epoch_start_time), val_acc)) print('-' * 89) # Save the model if the validation loss is the best we've seen so # far. if not best_val_acc or val_acc > best_val_acc: with open(args.save, 'wb') as f: torch.save(model_instance, f) best_val_acc = val_acc else: # Anneal the learning rate if no improvement has been # seen in the validation dataset. lr /= 4.0 except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Run final test, and store the predictions _, _, idx, _, preds = evaluate(model_instance, val_data) preds_to_store = [] for idx_i, pred_i in zip(idx.tolist(), preds.tolist()): preds_to_store.append((idx_i, pred_i)) with open(osp.join(osp.dirname(args.save), 'results_probs_lstm.pkl'), 'wb') as f: pkl.dump(preds_to_store, f, protocol=2) return final_res
def initialize(self): cuda = torch.cuda.is_available() self.model_test = model.RNNModel(self.model_nn, self.data_size, self.emsize, self.nhid, self.nlayers, self.dropout, self.tied) if cuda: self.model_test.cuda() with open(self.load_model, 'rb') as f: self.sd = torch.load(f) try: self.model_test.load_state_dict(self.sd) print('Model loaded state dict') except: apply_weight_norm(self.model_test.rnn) self.model_test.load_state_dict(sd) remove_weight_norm(self.model_test) # Get the neuron and polarity self.neuron, self.polarity = get_neuron_and_polarity( self.sd, self.neuron) self.neuron = self.neuron if self.visualize or self.overwrite is not None else None self.mask = self.overwrite is not None # model_test train ? self.model_test.eval() # Computing self.hidden = self.model_test.rnn.init_hidden(1) self.input = Variable(torch.LongTensor([int(ord('\n'))])) if cuda: self.input = self.input.cuda() self.input = self.input.view(1, 1).contiguous() model_step(self.model_test, self.input, self.neuron, self.mask, self.overwrite, self.polarity) self.input.data.fill_(int(ord(' '))) out = model_step(self.model_test, self.input, self.neuron, self.mask, self.overwrite, self.polarity) if self.neuron is not None: out = out[0] self.input.data.fill_(sample(out, self.temperature))
def build_model(corpus, model_name, emsize, nhid, nlayers, dropout, dropouth, dropouti, dropoute, wdrop, lr, tied, resume, cuda): criterion = None ntokens = len(corpus.dictionary) model = model_module.RNNModel(model_name, ntokens, emsize, nhid, nlayers, dropout, dropouth, dropouti, dropoute, wdrop, tied) ### if resume: print('Resuming model ...') model, criterion, optimizer = model_load(resume) optimizer.param_groups[0]['lr'] = lr model.dropouti, model.dropouth, model.dropout, model.dropoute = dropouti, dropouth, dropout, dropoute if wdrop: from weight_drop import WeightDrop for rnn in model.rnns: if type(rnn) == WeightDrop: rnn.dropout = wdrop elif rnn.zoneout > 0: rnn.zoneout = wdrop ### if not criterion: splits = [] if ntokens > 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif ntokens > 75000: # WikiText-103 splits = [2800, 20000, 76000] print('Using', splits) criterion = SplitCrossEntropyLoss(emsize, splits=splits, verbose=False) ### if cuda: model = model.cuda() criterion = criterion.cuda() ### params = list(model.parameters()) + list(criterion.parameters()) total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) print('Args:', args) print('Model total parameters:', total_params) return model, criterion, None
def main(device): lines, vocab_size = readData(args.data + '/train.txt', args.vsize) print("Vocabulary size : " + str(vocab_size)) with open('vocab.pickle', 'wb') as handle: pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL) rnn = model.RNNModel('LSTM', vocab_size, args.nhid, args.nhid, 2, 0.2).to(device) criterion = nn.CrossEntropyLoss() num_epochs = 40 start_time = timeit.default_timer() num_tokens = len(lines[0].split()) lines = ['<start> ' + line + ' <end>' for line in lines] text = ' '.join(lines).split() num_batches = len(text) / (BPTT * args.bsz) print("# of batches %d" % num_batches) for e in range(num_epochs): for i in range(num_batches): hidden = rnn.initHidden(args.bsz, device) input_batch, target_batch = get_batch_continuous(text, i, device) train(rnn, hidden, criterion, args.lr, input_batch, target_batch) elapsed = timeit.default_timer() - start_time print('##################') print('Epoch %d :' % e) print('Time elapsed : %s' % (get_readable_time(int(elapsed)))) if args.cuda: loss, perp = evaluate(args.data + '/validation.txt', rnn, vocab, cuda=True) else: loss, perp = evaluate(args.data + '/validation.txt', rnn, vocab) print('Validation loss : %.1f' % loss) print('Validation perplexity : %.1f' % perp) rnn.eval() samples = generate(rnn, vocab, args.cuda) print('Samples : ') for sample in samples: print(sample) with open('model.pt', 'wb') as f: torch.save(rnn, f)
def main(): if args.mode == 'sentence': corpus = data.Corpus(args.data) else: corpus = data.CorpusCharacter(args.data) eval_batch_size = 10 train_data = utils.batchify(corpus.train, args.batch_size, device) val_data = utils.batchify(corpus.valid, eval_batch_size, device) test_data = utils.batchify(corpus.test, eval_batch_size, device) ntokens = len(corpus.dictionary) _model = model.RNNModel(args.model, ntokens, args.embsize, args.nhidden, args.nlayers, args.dropout).to(device) _criterion = nn.NLLLoss() _lr = args.lr best_val_loss = None try: for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train(_model, _criterion, train_data, ntokens, _lr, epoch) val_loss = evaluate(_model, _criterion, val_data, eval_batch_size) print('-' * 89) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) if not best_val_loss or val_loss < best_val_loss: with open(args.save, 'wb') as f: torch.save(_model, f) best_val_loss = val_loss else: _lr /= 4.0 except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') test(_criterion, test_data, eval_batch_size)
train_label.reshape(len(train_label))).long() if args.cuda: train_data, train_label = train_data.cuda(), train_label.cuda( ) #和nn.Module不同,调用tensor.cuda()只是返回这个tensor对象在GPU内存上的拷贝,而不会对自身进行改变。因此必须对tensor进行重新赋值,即tensor=tensor.cuda(). val_data, val_label = val_data.cuda(), val_label.cuda() eval_batch_size = len(val_label) #一次测试全部测试集数据 print(len(val_label)) '''############################################################################### # Build the model 实例化模型 ###############################################################################''' model = model.RNNModel(rnn_type=args.model, ninp=args.ninp, nhid=args.nhid, nlayers=args.nlayers, dropout=args.dropout, dropouth=args.dropouth, wdrop=args.wdrop, tie_weights=args.tied, class_num=args.class_num) criterion = nn.CrossEntropyLoss() image = mate_data() if args.resume: print('Resuming model ...') model_load(args.resume) optimizer.param_groups[0]['lr'] = args.lr model.dropouti, model.dropouth, model.dropout, args.dropoute = args.dropouti, args.dropouth, args.dropout, args.dropoute if args.wdrop: from weight_drop import WeightDrop for rnn in model.rnns:
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
corpus = data.Corpus(args.data) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied) if args.cuda: model.cuda() total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in model.parameters()) print('Args:', args) print('Model total parameters:', total_params) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ###############################################################################
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, dropout=args.dropout, tie_weights=args.tied) if torch.cuda.device_count() > 1: LOGGER.info("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss() ###############################################################################
test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nhidlast, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied, args.dropoutl, args.n_experts, args.num4embed, args.num4first, args.num4second) if args.cuda: if args.single_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model total_params = sum(x.data.nelement() for x in model.parameters()) logging('Args: {}'.format(args)) logging('Model total parameters: {}'.format(total_params))
eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast, args.nlayers, 0., 0., 0., 0., 0., 0., args.n_experts, args.emblocks, args.emdensity, sparse_mode=args.sparse_mode, sparse_fract=args.sparse_fract) if args.cuda: if not args.multi_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model logging('Args: {}'.format(args)) params_total, params_encoder, params_rnns = 0, 0, 0 for n, p in model.named_parameters(): #print('param {}: {}'.format(n, p.nelement()))
num_operations=args.num_operations, intermediate_nodes=args.num_intermediate_nodes, args=args, genos_init=args.uniform_genos_init) # swarm = Swarm(args, population_size=args.population_size) # initial genotype genotype = swarm.global_best.genotype() # initializing the model if args.use_pretrained: logger.info('PRETRAINED MODEL LOADED!') model = torch.load(os.path.join(args.pretrained_dir, 'model.pt')) else: model = model_module.RNNModel(ntokens, args, genotype=genotype) size = 0 for p in model.parameters(): size += p.nelement() logger.info('param size: {}'.format(size)) logger.info('initial genotype:') logger.info(model.genotype()) if args.cuda: if args.single_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model
eval_batch_size = args.batch_size train_data = batchify(corpus.train, args.batch_size) print(train_data.size()) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) T = (train_data.size(0) // args.bptt) * args.epochs ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, args.bidirectional) # Load checkpoint if args.checkpoint != '': if args.cuda: model = torch.load(args.checkpoint) else: # Load GPU model on CPU model = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) if args.finetune: assert args.pretrained, "you must specify a pre-trained model" with open(args.pretrained, 'rb') as f:
################################################3 criterion = None ntokens = len(corpus.dictionary) # 10000 # pre_emb,_= tools.load_fasttext_embd(args.emb_path, corpus, words_to_load=100000, reload=False) if args.wvec: model = model.RNNModel( args.model, ntokens, args.emsize, args.nhid, args.chunk_size, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied, pre_emb=pre_emb, ) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.chunk_size, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied) ### if args.resume: tools.print_log(args.save, 'Resuming model ...')
if not args.test and not args.interact: if args.load_checkpoint: # Load the best saved model. print(' Continuing training from previous checkpoint') with open(args.model_file, 'rb') as f: if args.cuda: model = torch.load(f).to(device) else: model = torch.load(f, map_location='cpu') else: ntokens = len(corpus.dictionary) model = model.RNNModel( args.model, ntokens, args.emsize, args.nhid, args.nlayers, embedding_file=args.embedding_file, dropout=args.dropout, tie_weights=args.tied, freeze_embedding=args.freeze_embedding).to(device) if args.cuda and (not args.single) and (torch.cuda.device_count() > 1): # If applicable, use multi-gpu for training # Scatters minibatches (in dim=1) across available GPUs model = nn.DataParallel(model, dim=1) if isinstance(model, torch.nn.DataParallel): # if multi-gpu, access real model for training model = model.module # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass model.rnn.flatten_parameters()
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) # # load glove embeddings to tensor # glove_dict = load_glove_to_dict(args.glove_path, args.emsize) # glove_tensor = glove_dict_to_tensor(corpus.dictionary.word2idx, glove_dict) # Build the model ntokens = len(corpus.dictionary) # model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.glove, glove_tensor, args.dropout, args.tied) #改动3 # model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.glove, args.dropout, args.tied) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, args.glove) #glove_tensor if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss() # Loop over epochs. lr = args.lr best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train()
if args.cuda: data = data.cuda() return data eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.init) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def clip_gradient(model, clip): """Computes a gradient clipping coefficient based on gradient norm.""" totalnorm = 0 for p in model.parameters(): modulenorm = p.grad.data.norm()
print("load test data - complete") cutoffs = args.cutoffs + [ntokens] # with doing('Constructing model'): # if not args.lm1b: # criterion = AdaptiveLoss(cutoffs) # else: # criterion = SplitCrossEntropyLoss(args.emsize, args.cutoffs, verbose=False) # criterion.cuda() logging.info("Constructing model") criterion = AdaptiveLoss(cutoffs).cuda() if args.old is None: logging.info("building model") model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, cutoffs, args.proj, args.dropout, args.tied, args.lm1b) else: with open(args.old, 'rb') as model_file: model = torch.load(model_file) if args.cuda: model.cuda() optimizer = optim.Adagrad(model.parameters(), args.lr, weight_decay=1e-6) eval_batch_size = 1 ############################################################################### # Training code ############################################################################### # Loop over epochs.
device = torch.device("cuda" if args.cuda else "cpu") train_data = batchify(corpus.train, args.batch_size, device) val_data = batchify(corpus.valid, eval_batch_size, device) test_data = batchify(corpus.test, eval_batch_size, device) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.load is None: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, corpus=corpus, embeddings=embeddings) else: with open(args.load, 'rb') as f: model = torch.load(f) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) ###############################################################################