コード例 #1
0
 def __init__(self, optimizer, lr, model_type, vocsize, emsize, buffer_len,
              nhead, nhid, nlayers, dropout, learn_iterations, warmup,
              after_warmup):
     criterion = nn.CrossEntropyLoss()
     super(TransformerLearner, self).__init__(criterion, vocsize,
                                              learn_iterations)
     self.model = model.TransformerModel(vocsize, emsize, nhead, nhid,
                                         nlayers, dropout)
     self.dmodel = emsize
     if lr == 42:
         self.lr = self.dmodel**-0.5
     else:
         self.lr = lr
     self.step = 1
     self.warmup = warmup
     self.after_warmup = after_warmup
     self.buffer_len = buffer_len
     self.buffer = None
     kwargs = {}
     if optimizer == 'Adam':
         kwargs['betas'] = (0.9, 0.98)
         kwargs['eps'] = 1e-9
     lr = self.compute_lr()
     self.optimizer = getattr(optim, optimizer)(self.model.parameters(),
                                                lr=lr)
コード例 #2
0
    def model_func(wrapped_import, inputs):
        ###############################################################################
        # Build the model
        ###############################################################################
        if wrapped_import:
            nn = wrapped_import("torch.nn")
            model = wrapped_import("model")
        else:
            from torch import nn
            import model

        if args.model == 'Transformer':
            net = model.TransformerModel(ntokens, args.emsize, args.nhead,
                                         args.nhid, args.nlayers, args.dropout)
        else:
            net = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                                 args.nlayers, args.dropout, args.tied)

        net.eval()  # for verification, need no random elements (e.g. dropout)
        # criterion = nn.NLLLoss()

        if args.model != 'Transformer':
            hidden = net.init_hidden(args.batch_size)
        else:
            hidden = None
        with torch.no_grad():
            if args.model == 'Transformer':
                output = net(inputs)
                output = output.view(-1, ntokens)
            else:
                output, hidden = net(inputs, hidden)

            return output
コード例 #3
0
def main_func(datasets, context_len, epochs):
    for dataset in datasets:
        for bptt in context_len:
            train_d, valid_d, test_d, data = data_generator(dataset)
            train_data = batchify(train_d, bptt)
            val_data = batchify(valid_d, bptt)
            test_data = batchify(test_d, bptt)

            ntokens = len(set(data))
            best_val_loss = None
            lr = args.lr
            if args.model == 'Transformer':
                model = model1.TransformerModel(ntokens, args.emsize,
                                                args.nhead, args.nhid,
                                                args.nlayers,
                                                args.dropout).to(device)
            else:
                model = model1.RNNModel(args.model, ntokens, args.emsize,
                                        args.nhid, args.nlayers, args.dropout,
                                        args.tied).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), args.lr)

            for epoch in range(1, epochs + 1):
                epoch_start_time = time.time()
                train(train_data, bptt, ntokens, model, criterion, optimizer,
                      epoch)
                val_loss = evaluate(val_data, bptt, ntokens, model, criterion)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    with open(args.save, 'wb') as f:
                        torch.save(model, f)
                    best_val_loss = val_loss
                else:
                    lr /= 0.4

            # Load the best saved model.
            with open(args.save, 'rb') as f:
                model = torch.load(f)
                if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
                    model.rnn.flatten_parameters()
            # Run on test data.
            test_loss = evaluate(test_data, bptt, ntokens, model, criterion)
            print('=' * 89)
            print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.
                  format(test_loss, math.exp(test_loss)))
            print('=' * 89)
            fr.writelines("test loss for len %d and dataset %s is %f\n" %
                          (bptt, dataset, test_loss))
            #fr.close()
    return
コード例 #4
0
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
コード例 #5
0
def main():
    parser = argparse.ArgumentParser(description="Compute sentence scores of "
                                     "nbest lists with a PyTorch trained "
                                     "neural language model.")
    parser.add_argument("--nbest-list",
                        type=str,
                        required=True,
                        help="N-best hypotheses for rescoring")
    parser.add_argument(
        "--outfile",
        type=str,
        required=True,
        help="Output file with language model scores associated "
        "with each hypothesis",
    )
    parser.add_argument("--vocabulary",
                        type=str,
                        required=True,
                        help="Vocabulary used for training")
    parser.add_argument(
        "--model-path",
        type=str,
        required=True,
        help="Path to a pretrained neural model.",
    )
    parser.add_argument(
        "--model",
        type=str,
        default="LSTM",
        help="Network type. can be RNN, LSTM or Transformer.",
    )
    parser.add_argument("--emsize",
                        type=int,
                        default=200,
                        help="size of word embeddings")
    parser.add_argument("--nhid",
                        type=int,
                        default=200,
                        help="number of hidden units per layer")
    parser.add_argument("--nlayers",
                        type=int,
                        default=2,
                        help="number of layers")
    parser.add_argument(
        "--nhead",
        type=int,
        default=2,
        help="the number of heads in the encoder/decoder of the "
        "transformer model",
    )
    args = parser.parse_args()
    assert os.path.exists(args.nbest_list), "Nbest list path does not exists."
    assert os.path.exists(args.vocabulary), "Vocabulary path does not exists."
    assert os.path.exists(args.model_path), "Model path does not exists."

    print("Load vocabulary")
    vocab = read_vocab(args.vocabulary)
    ntokens = len(vocab)
    print("Load model and criterion")
    import model

    if args.model == "Transformer":
        model = model.TransformerModel(
            ntokens,
            args.emsize,
            args.nhead,
            args.nhid,
            args.nlayers,
            activation="gelu",
            tie_weights=True,
        )
    else:
        model = model.RNNModel(args.model,
                               ntokens,
                               args.emsize,
                               args.nhid,
                               args.nlayers,
                               tie_weights=True)
    with open(args.model_path, "rb") as f:
        model.load_state_dict(
            torch.load(f, map_location=lambda storage, loc: storage))
        if args.model in ["RNN_TANH", "RNN_RELU", "LSTM", "GRU"]:
            model.rnn.flatten_parameters()
    criterion = nn.CrossEntropyLoss()
    print("Load nbest list")
    nbest = load_nbest(args.nbest_list)
    print("Compute sentence scores with a ", args.model, " model")
    nbest_and_scores = compute_scores(nbest,
                                      model,
                                      criterion,
                                      ntokens,
                                      vocab,
                                      model_type=args.model)
    print("Write sentence scores out")
    write_scores(nbest_and_scores, args.outfile)
コード例 #6
0
def setup_model_and_optim(args, train_data, tokenizer):
    ntokens = args.data_size
    if args.model.lower() == 'transformer':
        embed_tokens = m.Embedding(
            ntokens,
            args.decoder_embed_dim,
            padding_idx=tokenizer.command_name_map['pad'].Id)
        model = m.TransformerModel(m.DecoderPreprocessor(args, embed_tokens),
                                   m.TransformerDecoder(args, embed_tokens))
    else:
        model = m.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied)
        global rnn_model
        rnn_model = model
    LR_Warmer = None
    print('* number of parameters: %d' %
          sum([p.nelement() for p in model.parameters()]))
    if args.cuda:
        model.cuda()

    optim = None
    if args.load is not None and args.load != '':
        sd = torch.load(args.load, map_location='cpu')
        if args.load_optim:
            #optim_sd = torch.load(os.path.join(os.path.dirname(args.load), 'optim.pt'), map_location='cpu')
            rng = torch.load(os.path.join(os.path.dirname(args.load),
                                          'rng.pt'))
            torch.cuda.set_rng_state(rng[0])
            torch.set_rng_state(rng[1])
        try:
            model.load_state_dict(sd)
        except:
            if hasattr(model, 'rnn'):
                apply_weight_norm(model.rnn, hook_child=False)
            else:
                apply_weight_norm(model, hook_child=False)
            model.load_state_dict(sd)
            remove_weight_norm(model)

    if not args.no_weight_norm:
        if hasattr(model, 'rnn'):
            apply_weight_norm(model.rnn, hook_child=False)
        else:
            apply_weight_norm(model, hook_child=False)

    if optim is None:
        optim_choice = 'Adam' if args.stlr_cut_frac else args.optim
        if args.fp16:
            model = FP16_Module(model)
            optim = eval('torch.optim.' + args.optim)(model.parameters(),
                                                      lr=args.lr)
            optim = FP16_Optimizer(optim,
                                   static_loss_scale=args.loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale)
        else:
            optim = eval('torch.optim.' + args.optim)(model.parameters(),
                                                      lr=args.lr)

    if args.load_optim:
        optim.load_state_dict(optim_sd)

    # add linear learning rate scheduler
    if train_data is not None:
        if args.constant_decay:
            num_iters = args.constant_decay
        else:
            num_iters = args.train_iters * args.epochs

        init_step = -1
        if args.load_optim:
            #TODO: this no longer makes sense given the new data loaders
            init_step = optim_sd['iter'] - optim_sd['skipped_iter']
            train_data.batch_sampler.start_iter = (optim_sd['iter'] %
                                                   len(train_data)) + 1

        warmup_iter = args.warmup * num_iters

        if args.stlr_cut_frac is not None:
            LR = SlantedTriangularLR(optim,
                                     cut_frac=args.stlr_cut_frac,
                                     num_iters=num_iters)
        else:
            LR = AnnealingLR(optim,
                             start_lr=args.lr,
                             warmup_iter=warmup_iter,
                             num_iters=num_iters,
                             decay_style=args.decay_style)

        if args.warmup != 0:
            LR_Warmer = WarmupLR(optim, warmup_iter, last_iter=init_step)

    # wrap model for distributed training
    if args.world_size > 1:
        model = DDP(model)

    criterion = nn.CrossEntropyLoss(reduce=False)
    return model, optim, LR, LR_Warmer, criterion
コード例 #7
0
ファイル: runner.py プロジェクト: humorbeing/python_github
def main(args):
    # print(os.getcwd())
    # ss('s')
    if args.wandb:  # using None as trigger. if not None, it should be project name
        import wandb
        wandb.init(project=args.wandb, reinit=True)
        wandb.config.update(args)

    torch.manual_seed(args.seed)
    # if torch.cuda.is_available():
    #     if not args.cuda:
    #         print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    device = torch.device("cuda" if args.is_cuda else "cpu")

    ###############################################################################
    # Load data
    ###############################################################################

    corpus = data.Corpus(args.data_root)

    # Starting from sequential data, batchify arranges the dataset into columns.
    # For instance, with the alphabet as the sequence and batch size 4, we'd get
    # ┌ a g m s ┐
    # │ b h n t │
    # │ c i o u │
    # │ d j p v │
    # │ e k q w │
    # └ f l r x ┘.
    # These columns are treated as independent by the model, which means that the
    # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
    # batch processing.

    def batchify(data, bsz):
        # Work out how cleanly we can divide the dataset into bsz parts.
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        data = data.view(bsz, -1).t().contiguous()
        return data.to(device)

    eval_batch_size = 10
    train_data = batchify(corpus.train, args.batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)


    #
    ###############################################################################
    # Build the model
    ###############################################################################

    ntokens = len(corpus.dictionary)
    if args.model == 'Transformer':
        model = Model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device)
    else:
        model = Model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)

    criterion = nn.CrossEntropyLoss()
    if args.wandb:
        wandb.watch(model)
    #
    ###############################################################################
    # Training code
    ###############################################################################
    optimizer = optim.Adam(model.parameters(), lr=args.lr_adam)
    lmbda = lambda epoch: 0.95**epoch
    # scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lmbda])
    # scheduler.step()
    # scheduler.step()
    # scheduler.step()
    # args.is_manual_update = True
    lr = args.lr
    def get_lr():
        if args.is_manual_update:
            output = 'M{:02.5f}'.format(lr)
        else:
            for p in optimizer.param_groups:
                output = 'A{:02.5f}'.format(p['lr'])
        return output
    print(get_lr())

    # for p in optimizer.param_groups:
    #     print(p['lr'])
    #     # break
    ss('-in main')
    def repackage_hidden(h):
        """Wraps hidden states in new Tensors, to detach them from their history."""

        if isinstance(h, torch.Tensor):
            return h.detach()
        else:
            return tuple(repackage_hidden(v) for v in h)


    # get_batch subdivides the source data into chunks of length args.bptt.
    # If source is equal to the example output of the batchify function, with
    # a bptt-limit of 2, we'd get the following two Variables for i = 0:
    # ┌ a g m s ┐ ┌ b h n t ┐
    # └ b h n t ┘ └ c i o u ┘
    # Note that despite the name of the function, the subdivison of data is not
    # done along the batch dimension (i.e. dimension 1), since that was handled
    # by the batchify function. The chunks are along dimension 0, corresponding
    # to the seq_len dimension in the LSTM.

    def get_batch(source, i):
        seq_len = min(args.bptt, len(source) - 1 - i)
        data = source[i:i+seq_len]
        target = source[i+1:i+1+seq_len].view(-1)
        return data, target


    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_loss = 0.
        ntokens = len(corpus.dictionary)
        if args.model != 'Transformer':
            hidden = model.init_hidden(eval_batch_size)
        with torch.no_grad():
            for i in range(0, data_source.size(0) - 1, args.bptt):
                data, targets = get_batch(data_source, i)
                if args.model == 'Transformer':
                    output = model(data)
                else:
                    output, hidden = model(data, hidden)
                    hidden = repackage_hidden(hidden)
                output_flat = output.view(-1, ntokens)
                total_loss += len(data) * criterion(output_flat, targets).item()
        return total_loss / (len(data_source) - 1)


    def train():
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0.
        log_loss = 0.
        start_time = time.time()
        ntokens = len(corpus.dictionary)
        if args.model != 'Transformer':
            hidden = model.init_hidden(args.batch_size)
        for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
            data, targets = get_batch(train_data, i)
            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            model.zero_grad()
            if args.model == 'Transformer':
                output = model(data)
            else:
                hidden = repackage_hidden(hidden)
                output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, ntokens), targets)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)

            if args.is_manual_update:
                for p in model.parameters():
                    p.data.add_(-lr, p.grad.data)
            else:
                optimizer.step()

            total_loss += loss.item()
            log_loss += len(data) * loss.item()
            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches | lr {} | ms/batch {:5.2f} | '
                        'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch, len(train_data) // args.bptt, get_lr(),
                    elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()

            if args.is_quickrun:
                break
        return log_loss / (train_data.size(0)-1)
            # break


    # def export_onnx(path, batch_size, seq_len):
    #     print('The model is also exported in ONNX format at {}'.
    #           format(os.path.realpath(args.onnx_export)))
    #     model.eval()
    #     dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device)
    #     hidden = model.init_hidden(batch_size)
    #     torch.onnx.export(model, (dummy_input, hidden), path)


    # Loop over epochs.

    best_val_loss = None
    early_stop_count = 0
    early_stop_when = 10
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, args.epoch+1):
            epoch_start_time = time.time()
            log_loss = train()
            # ss('-in main')
            val_loss = evaluate(val_data)
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                               val_loss, math.exp(val_loss)))
            print('-' * 89)
            if args.wandb:
                wandb.log({
                    'train loss': log_loss,
                    'valid loss': val_loss
                })
            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                # with open(args.save, 'wb') as f:
                #     torch.save(model, f)
                best_val_loss = val_loss
                early_stop_count = 0
            else:
                if args.is_manual_update:
                    # Anneal the learning rate if no improvement has been seen in the validation dataset.
                    lr /= 4.0
                else:
                    scheduler.step()
                early_stop_count += 1
            if args.early_stop != None:
                print('early stop monitor [{}/{}]'.format(early_stop_count, args.early_stop))
                if early_stop_count > args.early_stop:
                    print('trigger early stop')
                    break
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model.
    # with open(args.save, 'rb') as f:
    #     model = torch.load(f)
    #     # after load the rnn params are not a continuous chunk of memory
    #     # this makes them a continuous chunk, and will speed up forward pass
    #     # Currently, only rnn model supports flatten_parameters function.
    #     if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
    #         model.rnn.flatten_parameters()

    # Run on test data.
    test_loss = evaluate(test_data)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
    if args.wandb:
        wandb.log({'test loss':test_loss})
        wandb.join()
コード例 #8
0
    return data.to(device)


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################
ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntoken=ntokens,
                                   ninp=args.emsize,
                                   nhead=args.nhead,
                                   nhid=args.nhid,
                                   nlayers=args.nlayers,
                                   dropout=args.dropout).to(device)
else:
    model = model.RNNModel(args.model,
                           ntoken=ntokens,
                           ninp=args.emsize,
                           nhid=args.nhid,
                           nlayers=args.nlayers,
                           dropout=args.dropout).to(device)

criterion = nn.NLLLoss()


###############################################################################
# Training code
コード例 #9
0
    # Load data
    corpus = data.Corpus(args.input_path)
    eval_batch_size = 10
    train_data = batchify(corpus.train, batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)

    print(f'Train data shape: {train_data.shape}')
    print(f'Val data shape: {val_data.shape}')
    print(f'Test data shape: {test_data.shape}')

    # build model
    ntokens = len(corpus.dictionary)
    if model_type == 'Transformer':
        model = model.TransformerModel(ntokens, emsize, nhead, nhid, nlayers,
                                       dropout).to(device)
    else:
        model = model.RNNModel(model_type, ntokens, emsize, nhid, nlayers,
                               dropout, args.tied).to(device)

    print(f'model: {model}')

    criterion = nn.NLLLoss()

    # Training code
    best_val_loss = None
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train(model_type, model, corpus, train_data, batch_size, args.bptt,
              clip, args.log_interval, args.dry_run, epoch)
        val_loss = evaluate(model_type, model, corpus, val_data, args.bptt)
コード例 #10
0
ファイル: main.py プロジェクト: syssel/examples
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)

eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model_class.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout).to(device)
else:
    model = model_class.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################

def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
コード例 #11
0
ファイル: main.py プロジェクト: kushaltirumala/nero
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens,
                                   args.emsize,
                                   args.nhead,
                                   args.nhid,
                                   args.nlayers,
                                   args.dropout,
                                   args.norm,
                                   0.0,
                                   activation='relu').to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

if args.optim == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)
elif args.optim == 'adam':
コード例 #12
0
    os.environ['CUDA_VISIBLE_DEVICES'] = cmd[:-1]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

c_to_i = pickle.load(open(args.c_to_i, 'rb'))
i_to_c = pickle.load(open(args.i_to_c, 'rb'))
n_char = len(c_to_i)

dataloaders = []

with open('data/vs_chemist.txt') as f:
    lines = f.readlines()
    lines = [l.strip().split() for l in lines]
    s_to_human_score = {l[1]: float(l[3]) for l in lines}

if args.model == 'Trans':
    model = model.TransformerModel(args, n_char, i_to_c)
else:
    model = model.RNN(args, n_char, i_to_c)

model = utils.initialize_model(model, device, args.save_files)

print("number of parameters :",
      sum(p.numel() for p in model.parameters() if p.requires_grad))
softmax = nn.Softmax(dim=-1)
model.eval()
log_likelihoods = []
humanscores = []
sascores = []

with torch.no_grad():
    for s in s_to_human_score.keys():
コード例 #13
0
    test_primer = dataset.__getitem__(test_indices[0])[0].type(
        torch.LongTensor).view(1, -1)
    train_dataloader = torch.utils.data.DataLoader(dataset,
                                                   batch_size=16,
                                                   num_workers=8,
                                                   sampler=train_sampler)
    test_dataloader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=16,
                                                  num_workers=8,
                                                  sampler=test_sampler)
    print("> Done.")
    print(f"> Loaded {dataset.length} MIDI sequences.")

    transformer = model.TransformerModel(336,
                                         128,
                                         8,
                                         256,
                                         8,
                                         dropout=0.0,
                                         device=device).to(device)
    print("> Model Summary:")
    print(transformer, '\n')

    if len(sys.argv) == 2:
        print("> Loading existing model from file\n")
        transformer = torch.load(sys.argv[1])
    # generate(transformer, "load-test")

    train(transformer, train_dataloader, test_dataloader)
コード例 #14
0
def main():
    parser = argparse.ArgumentParser(
        description="Compute word scores of"
        "hypotheses for each utterance in parallel"
        "with a PyTorch-trained neural language model.")
    parser.add_argument('--infile',
                        type=str,
                        required=True,
                        help="Word hypotheses generated from a lattice.")
    parser.add_argument('--outfile',
                        type=str,
                        required=True,
                        help="Output file with neural language model scores"
                        "for input word hypotheses.")
    parser.add_argument(
        '--vocabulary',
        type=str,
        required=True,
        help="Vocabulary used for neural language model training.")
    parser.add_argument('--model-path',
                        type=str,
                        required=True,
                        help="Path to a pretrained neural language model.")
    parser.add_argument('--model',
                        type=str,
                        default='LSTM',
                        help='Network type. Can be RNN, LSTM or Transformer.')
    parser.add_argument('--emsize',
                        type=int,
                        default=200,
                        help='Size of word embeddings.')
    parser.add_argument('--nhid',
                        type=int,
                        default=200,
                        help='Number of hidden units per layer.')
    parser.add_argument('--nlayers',
                        type=int,
                        default=2,
                        help='Number of layers.')
    parser.add_argument('--nhead',
                        type=int,
                        default=2,
                        help='Number of heads in a Transformer model.')
    parser.add_argument('--oov',
                        type=str,
                        default='<unk>',
                        help='Out of vocabulary word.')
    parser.add_argument('--sent-boundary',
                        type=str,
                        default='<s>',
                        help='Sentence boundary symbol.')
    args = parser.parse_args()
    assert os.path.exists(
        args.infile), "Path for input word sequences does not exist."
    assert os.path.exists(args.vocabulary), "Vocabulary path does not exist."
    assert os.path.exists(args.model_path), "Model path does not exist."

    print("Load vocabulary.")
    vocab = read_vocab(args.vocabulary)
    ntokens = len(vocab)
    print("Load model and criterion.")
    import model
    if args.model == 'Transformer':
        model = model.TransformerModel(ntokens,
                                       args.emsize,
                                       args.nhead,
                                       args.nhid,
                                       args.nlayers,
                                       activation="gelu",
                                       tie_weights=True)
    else:
        model = model.RNNModel(args.model,
                               ntokens,
                               args.emsize,
                               args.nhid,
                               args.nlayers,
                               tie_weights=True)
    with open(args.model_path, 'rb') as f:
        model.load_state_dict(
            torch.load(f, map_location=lambda storage, loc: storage))
        if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
            model.rnn.flatten_parameters()
    criterion = nn.CrossEntropyLoss(reduction='none')
    print("Load input word hypotheses.")
    sents = load_sents(args.infile)
    print("Compute word scores with a ", args.model, " model.")
    sents_and_scores = compute_scores(args,
                                      sents,
                                      model,
                                      criterion,
                                      ntokens,
                                      vocab,
                                      model_type=args.model)
    print("Write out word scores.")
    write_scores(sents_and_scores, args.outfile)
コード例 #15
0
ファイル: transformer_main.py プロジェクト: JackieTien97/rnn
    print("torch.cuda.is_available(): ", torch.cuda.is_available())
    # load data
    train_iter, val_iter, VOCAB_SIZE = data.get_data("../data/ptb/",
                                                     batch_size, bptt_len,
                                                     device)
    print("VOCAB_SIZE: ", VOCAB_SIZE)

    # WRITE CODE HERE within two '#' bar
    ########################################
    # Build LMModel best_model (build your language best_model here)
    emsize = 256
    nhid = 256
    nlayers = 2
    nhead = 2
    dropout = 0.2
    MyModel = model.TransformerModel(VOCAB_SIZE, emsize, nhead, nhid, nlayers,
                                     dropout)
    print(MyModel)
    MyModel.to(device)
    ########################################

    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.001
    step_size = 10
    optimizer = torch.optim.Adam(MyModel.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=step_size,
                                                gamma=0.3)
    GRAD_CLIP = 0.5

    ########################################