# Build the model ############################################################################### ntokens = len( corpus.dictionary) #extract number of tokens in the corpus into a variable #selection of model if args.model == 'Transformer': model = model.TransformerModel( ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) #initialize model to transformer elif args.model == 'LSTM': model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) #initializa model to RNN else: model = model.FNNModel(ntokens, args.emsize, args.nhid, args.tied).to( device) #initialize model to FNN by default criterion = nn.NLLLoss() #use negative log likelihood for loss function #selection of optimizer if args.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) print("Running SGD optimizer") elif args.optimizer == 'ASGD': optimizer = torch.optim.ASGD(model.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) print("Running ASGD optimizer")
data = data.narrow(0, 0, nbatch * batch_size) # Evenly divide the data across the bsz batches. data = data.view(batch_size, -1).contiguous() return data.to(device) train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, args.eval_batch_size) test_data = batchify(corpus.test, args.eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.FNNModel(args.seq_size, ntokens, args.emsize, args.nhid, args.tied).to(device) criterion = nn.NLLLoss() optimizer = torch.optim.Adam(model.parameters(), args.lr) ############################################################################### # Training code ############################################################################### # get_batch subdivides the source data into chunks of length args.seq_size. # If source is equal to the example output of the batchify function, with # a seq_size-limit of 2, we'd get the following two Variables for i = 0: # ┌ a g m s ┐ ┌ b h n t ┐ # └ b h n t ┘ └ c i o u ┘ # Note that despite the name of the function, the subdivison of data is not # done along the batch dimension (i.e. dimension 1), since that was handled
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) elif args.model == 'FNN': model = model.FNNModel(args.n, ntokens, args.emsize, args.dropout, args.tied).to(device) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor):
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) flag_share = True if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) elif args.model == 'FNNModel': model = model.FNNModel(ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, flag_share).to(device) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.CrossEntropyLoss() #criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history."""
test_data = batchify(corpus.test, eval_batch_size) print(train_data.shape) print(val_data.shape) print(test_data.shape) print(len(corpus.dictionary)) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) elif args.model == 'FNN': model = model.FNNModel(args.emsize, args.nhid, ntokens).to(device) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.CrossEntropyLoss() """ [JH] Add Optimizer """ optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history."""
eval_ngram_size = args.ngram_size train_data = batchify(corpus.train, args.ngram_size) val_data = batchify(corpus.valid, eval_ngram_size) test_data = batchify(corpus.test, eval_ngram_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) elif args.model == "FNN": model = model.FNNModel(ntokens, args.emsize, args.nhid, args.ngram_size, args.dropout, args.tied).to(device) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor):
return data.to(device) eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.FNNModel(ntokens, args.ngrams, args.emsize, args.nhid, args.dropout, tie_weights=args.tied).to(device) model = nn.DataParallel(model).to(device) model = model.module criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) ############################################################################### # Training code ############################################################################### # def get_batch(source, i): # seq_len = min(args.bptt, len(source) - 1 - i) # data = source[i:i+seq_len] # target = source[i+1:i+1+seq_len].view(-1) # return data, target
#eval_batch_size = args.batch_size train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == 'Transformer': model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device) else: #model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) model = model.FNNModel(ntokens, args.emsize, args.bptt, args.nhid_tan, args.dropout, arg.tied).to(device) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
#data = data.view(bsz, -1) return data.to(device) eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.FNNModel(args.model, ntokens, args.embedding_size, args.window_size, args.hidden_size, args.bptt, args.dropout, args.tied).to(device) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), args.lr) ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach()
# Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() return data.to(device) eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.FNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) device = torch.device("cuda" if args.cuda else "cpu") # =============== Load data =============== corpus = data.Corpus(args.data) n_tokens = len(corpus.dictionary) eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) # print(n_tokens, train_data.size(), val_data.size(), test_data.size()) # =============== Build the model =============== model = model.FNNModel(n_tokens, args.embed, args.hidden, args.tied).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # print(model, criterion, optimizer) # =============== Train the model =============== try: best_val_loss = None for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train() val_loss = evaluate(val_data) print('-' * 89) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}' .format(epoch, (time.time() - epoch_start_time), val_loss,
eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, args.batch_size) test_data = batchify(corpus.test, args.batch_size) print(train_data) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.model == "FNN": model = model.FNNModel(args.emsize, ntokens, (args.nhid, ), ngram=args.n, dropout=args.dropout, tie_weights=args.tied).to(device) criterion = nn.NLLLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach()