Exemple #1
0
# Build the model
###############################################################################

ntokens = len(
    corpus.dictionary)  #extract number of tokens in the corpus into a variable
#selection of model
if args.model == 'Transformer':
    model = model.TransformerModel(
        ntokens, args.emsize, args.nhead, args.nhid, args.nlayers,
        args.dropout).to(device)  #initialize model to transformer
elif args.model == 'LSTM':
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout,
                           args.tied).to(device)  #initializa model to RNN
else:
    model = model.FNNModel(ntokens, args.emsize, args.nhid, args.tied).to(
        device)  #initialize model to FNN by default

criterion = nn.NLLLoss()  #use negative log likelihood for loss function

#selection of optimizer
if args.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    print("Running SGD optimizer")
elif args.optimizer == 'ASGD':
    optimizer = torch.optim.ASGD(model.parameters(),
                                 lr=0.01,
                                 lambd=0.0001,
                                 alpha=0.75,
                                 t0=1000000.0,
                                 weight_decay=0)
    print("Running ASGD optimizer")
Exemple #2
0
    data = data.narrow(0, 0, nbatch * batch_size)
    # Evenly divide the data across the bsz batches.
    data = data.view(batch_size, -1).contiguous()
    return data.to(device)


train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, args.eval_batch_size)
test_data = batchify(corpus.test, args.eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.FNNModel(args.seq_size, ntokens, args.emsize, args.nhid,
                       args.tied).to(device)

criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), args.lr)

###############################################################################
# Training code
###############################################################################

# get_batch subdivides the source data into chunks of length args.seq_size.
# If source is equal to the example output of the batchify function, with
# a seq_size-limit of 2, we'd get the following two Variables for i = 0:
# ┌ a g m s ┐ ┌ b h n t ┐
# └ b h n t ┘ └ c i o u ┘
# Note that despite the name of the function, the subdivison of data is not
# done along the batch dimension (i.e. dimension 1), since that was handled
Exemple #3
0
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
elif args.model == 'FNN':
    model = model.FNNModel(args.n, ntokens, args.emsize, args.dropout,
                           args.tied).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
Exemple #4
0
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
flag_share = True
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
elif args.model == 'FNNModel':
    model = model.FNNModel(ntokens, args.emsize, args.nhid, args.nlayers,
                           args.dropout, flag_share).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
Exemple #5
0
test_data = batchify(corpus.test, eval_batch_size)

print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
print(len(corpus.dictionary))
###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
elif args.model == 'FNN':
    model = model.FNNModel(args.emsize, args.nhid, ntokens).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()
""" [JH] Add Optimizer """
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
Exemple #6
0
eval_ngram_size = args.ngram_size
train_data = batchify(corpus.train, args.ngram_size)
val_data = batchify(corpus.valid, eval_ngram_size)
test_data = batchify(corpus.test, eval_ngram_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
elif args.model == "FNN":
    model = model.FNNModel(ntokens, args.emsize, args.nhid, args.ngram_size,
                           args.dropout, args.tied).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
Exemple #7
0
    return data.to(device)


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.FNNModel(ntokens,
                       args.ngrams,
                       args.emsize,
                       args.nhid,
                       args.dropout,
                       tie_weights=args.tied).to(device)
model = nn.DataParallel(model).to(device)
model = model.module
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
###############################################################################
# Training code
###############################################################################

# def get_batch(source, i):
#     seq_len = min(args.bptt, len(source) - 1 - i)
#     data = source[i:i+seq_len]
#     target = source[i+1:i+1+seq_len].view(-1)
#     return data, target
Exemple #8
0
#eval_batch_size = args.batch_size
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
else:
    #model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)
    model = model.FNNModel(ntokens, args.emsize, args.bptt, args.nhid_tan,
                           args.dropout, arg.tied).to(device)

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
Exemple #9
0
    #data = data.view(bsz, -1)
    return data.to(device)


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.FNNModel(args.model, ntokens, args.embedding_size,
                       args.window_size, args.hidden_size, args.bptt,
                       args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), args.lr)

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
Exemple #10
0
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.FNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
Exemple #11
0
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
    device = torch.device("cuda" if args.cuda else "cpu")

    # =============== Load data ===============
    corpus = data.Corpus(args.data)
    n_tokens = len(corpus.dictionary)
    eval_batch_size = 10
    train_data = batchify(corpus.train, args.batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)
    # print(n_tokens, train_data.size(), val_data.size(), test_data.size())

    # =============== Build the model ===============
    model = model.FNNModel(n_tokens, args.embed, args.hidden,
                           args.tied).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # print(model, criterion, optimizer)

    # =============== Train the model ===============
    try:
        best_val_loss = None
        for epoch in range(1, args.epochs + 1):
            epoch_start_time = time.time()
            train()
            val_loss = evaluate(val_data)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}'
                .format(epoch, (time.time() - epoch_start_time), val_loss,
Exemple #12
0

eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, args.batch_size)
test_data = batchify(corpus.test, args.batch_size)
print(train_data)
###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == "FNN":
    model = model.FNNModel(args.emsize,
                           ntokens, (args.nhid, ),
                           ngram=args.n,
                           dropout=args.dropout,
                           tie_weights=args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()