Python NMT.load_state_dictの例

プログラミング言語: Python

名前空間/パッケージ名: nmt_model

クラス/型: NMT

メソッド/関数: load_state_dict

hotexamples.comのコード掲載数: 17

pythonのnmt_model.NMT.load_state_dictは、モデルの学習済みの状態を読み込むためのメソッドです。このメソッドは、パラメータや重み、バイアスなどの学習済みの状態を辞書として保存したファイルを読み込み、モデルに適用します。このメソッドを使用することで、別のモデルで学習済みの状態を再利用したり、以前の学習状態からの継続的な学習を行ったりすることが可能になります。

Python NMT.load_state_dict - 17件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnmt_model.NMT.load_state_dictの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load(23)

train(21)

parameters(19)

load_state_dict(17)

to(16)

save(15)

eval(11)

cuda(6)

beam_search(4)

NMT(3)

forward(2)

args(1)

init(1)

load_pretrained_embeddings(1)

sample(1)

translate(1)

コード例 #1

ファイルを表示

def test(args):
    test_data_src = read_corpus(args.test_src, source='src')
    test_data_tgt = read_corpus(args.test_tgt, source='tgt')
    test_data = list(zip(test_data_src, test_data_tgt))

    if args.load_model:
        print('load model from [%s]' % args.load_model)
        params = torch.load(args.load_model,
                            map_location=lambda storage, loc: storage)
        vocab = params['vocab']
        saved_args = params['args']
        state_dict = params['state_dict']

        model = NMT(saved_args, vocab)
        model.load_state_dict(state_dict)
    else:
        vocab = torch.load(args.vocab)
        model = NMT(args, vocab)

    model.eval()

    if args.cuda:
        # model = nn.DataParallel(model).cuda()
        model = model.cuda()

    hypotheses = decode(model, test_data)
    top_hypotheses = [hyps[0] for hyps in hypotheses]

    bleu_score = get_bleu([tgt for src, tgt in test_data], top_hypotheses)
    word_acc = get_acc([tgt for src, tgt in test_data], top_hypotheses,
                       'word_acc')
    sent_acc = get_acc([tgt for src, tgt in test_data], top_hypotheses,
                       'sent_acc')
    print('Corpus Level BLEU: %f, word level acc: %f, sentence level acc: %f' %
          (bleu_score, word_acc, sent_acc),
          file=sys.stderr)

    if args.save_to_file:
        print('save decoding results to %s' % args.save_to_file)
        with open(args.save_to_file, 'w') as f:
            for hyps in hypotheses:
                f.write(' '.join(hyps[0][1:-1]) + '\n')

        if args.save_nbest:
            nbest_file = args.save_to_file + '.nbest'
            print('save nbest decoding results to %s' % nbest_file)
            with open(nbest_file, 'w') as f:
                for src_sent, tgt_sent, hyps in zip(test_data_src,
                                                    test_data_tgt, hypotheses):
                    print('Source: %s' % ' '.join(src_sent), file=f)
                    print('Target: %s' % ' '.join(tgt_sent), file=f)
                    print('Hypotheses:', file=f)
                    for i, hyp in enumerate(hyps, 1):
                        print('[%d] %s' % (i, ' '.join(hyp)), file=f)
                    print('*' * 30, file=f)

コード例 #2

ファイルを表示

    def load(model_path: str):
        """ Load the model from a file.
        @param model_path (str): path to model
        """
        params = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
        args = params['args']
        nmt_model = NMT(vocab=params['vocab'], **args)
        nmt_model.load_state_dict(params['state_dict'])
        model = DPPNMT(nmt_model=nmt_model)

        return model

コード例 #3

ファイルを表示

def sample(args):
    train_data_src = read_corpus(args.train_src, source='src')
    train_data_tgt = read_corpus(args.train_tgt, source='tgt')
    train_data = zip(train_data_src, train_data_tgt)

    if args.load_model:
        print('load model from [%s]' % args.load_model)
        params = torch.load(args.load_model,
                            map_location=lambda storage, loc: storage)
        vocab = params['vocab']
        opt = params['args']
        state_dict = params['state_dict']

        model = NMT(opt, vocab)
        model.load_state_dict(state_dict)
    else:
        vocab = torch.load(args.vocab)
        model = NMT(args, vocab)

    model.eval()

    if args.cuda:
        # model = nn.DataParallel(model).cuda()
        model = model.cuda()

    print('begin sampling')

    check_every = 10
    train_iter = cum_samples = 0
    train_time = time.time()
    for src_sents, tgt_sents in data_iter(train_data,
                                          batch_size=args.batch_size):
        train_iter += 1
        samples = model.sample(src_sents,
                               sample_size=args.sample_size,
                               to_word=True)
        cum_samples += sum(len(sample) for sample in samples)

        if train_iter % check_every == 0:
            elapsed = time.time() - train_time
            print('sampling speed: %d/s' % (cum_samples / elapsed))
            cum_samples = 0
            train_time = time.time()

        for i, tgt_sent in enumerate(tgt_sents):
            print('*' * 80)
            print('target:' + ' '.join(tgt_sent))
            tgt_samples = samples[i]
            print('samples:')
            for sid, sample in enumerate(tgt_samples, 1):
                print('[%d] %s' % (sid, ' '.join(sample[1:-1])))
            print('*' * 80)

コード例 #4

ファイルを表示

def interactive(args):
    assert args.load_model, 'You have to specify a pre-trained model'
    print('load model from [%s]' % args.load_model)
    params = torch.load(args.load_model,
                        map_location=lambda storage, loc: storage)
    vocab = params['vocab']
    saved_args = params['args']
    state_dict = params['state_dict']

    model = NMT(saved_args, vocab)
    model.load_state_dict(state_dict)

    model.eval()

    if args.cuda:
        # model = nn.DataParallel(model).cuda()
        model = model.cuda()

    while True:
        src_sent = input('Source Sentence:')
        src_sent = src_sent.strip().split(' ')
        hyps = model.translate(src_sent)
        for i, hyp in enumerate(hyps, 1):
            print('Hypothesis #%d: %s' % (i, ' '.join(hyp)))

コード例 #5

ファイルを表示

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')
    # print("train data")
    # print(train_data_src)
    # print(len(train_data_tgt))
    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')
    #print("dev data")
    #print(dev_data_src)
    #rint(len(dev_data_tgt))
    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))
    #print(train_data)
    #print(dev_data)
    train_batch_size = int(args['--batch-size'])

    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    vocab = Vocab.load(args['--vocab'])
    # print("vocab")
    # print(vocab.src)
    # print(vocab.tgt)
    model = NMT(embed_size=int(args['--embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                vocab=vocab, no_char_decoder=args['--no-char-decoder'])
    model.train()

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' % (uniform_init, uniform_init), file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    while True:
        epoch += 1

        for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True):
            train_iter += 1
            #print(src_sents)
            #print(src_sents.shape)
            #print(tgt_sents)
            #print(tgt_sents.shape)
            optimizer.zero_grad()

            batch_size = len(src_sents)

            example_losses = -model(src_sents, tgt_sents) # (batch_size,)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                         report_loss / report_examples,
                                                                                         math.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter,
                                                                                         cum_loss / cum_examples,
                                                                                         np.exp(cum_loss / cum_tgt_words),
                                                                                         cum_examples), file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                dev_ppl = evaluate_ppl(model, dev_data, batch_size=128)   # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl), file=sys.stderr)

                is_better = len(hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' % model_save_path, file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(), model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(args['--lr-decay'])
                        print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr)

                        # load model
                        params = torch.load(model_save_path, map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers', file=sys.stderr)
                        optimizer.load_state_dict(torch.load(model_save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

            if epoch == int(args['--max-epoch']):
                print('reached maximum number of epochs!', file=sys.stderr)
                exit(0)

コード例 #6

ファイルを表示

def compute_lm_prob(args):
    """
    given source-target sentence pairs, compute ppl and log-likelihood
    """
    test_data_src = read_corpus(args.test_src, source='src')
    test_data_tgt = read_corpus(args.test_tgt, source='tgt')
    test_data = zip(test_data_src, test_data_tgt)

    if args.load_model:
        print('load model from [%s]' % args.load_model)
        params = torch.load(args.load_model,
                            map_location=lambda storage, loc: storage)
        vocab = params['vocab']
        saved_args = params['args']
        state_dict = params['state_dict']

        model = NMT(saved_args, vocab)
        model.load_state_dict(state_dict)
    else:
        vocab = torch.load(args.vocab)
        model = NMT(args, vocab)

    model.eval()

    if args.cuda:
        # model = nn.DataParallel(model).cuda()
        model = model.cuda()

    f = open(args.save_to_file, 'w')
    for src_sent, tgt_sent in test_data:
        src_sents = [src_sent]
        tgt_sents = [tgt_sent]

        batch_size = len(src_sents)
        src_sents_len = [len(s) for s in src_sents]
        pred_tgt_word_nums = [len(s[1:])
                              for s in tgt_sents]  # omitting leading `<s>`

        # (sent_len, batch_size)
        src_sents_var = to_input_variable(src_sents,
                                          model.vocab.src,
                                          cuda=args.cuda,
                                          is_test=True)
        tgt_sents_var = to_input_variable(tgt_sents,
                                          model.vocab.tgt,
                                          cuda=args.cuda,
                                          is_test=True)

        # (tgt_sent_len, batch_size, tgt_vocab_size)
        scores = model(src_sents_var, src_sents_len, tgt_sents_var[:-1])
        # (tgt_sent_len * batch_size, tgt_vocab_size)
        log_scores = F.log_softmax(scores.view(-1, scores.size(2)))
        # remove leading <s> in tgt sent, which is not used as the target
        # (batch_size * tgt_sent_len)
        flattened_tgt_sents = tgt_sents_var[1:].view(-1)
        # (batch_size * tgt_sent_len)
        tgt_log_scores = torch.gather(
            log_scores, 1, flattened_tgt_sents.unsqueeze(1)).squeeze(1)
        # 0-index is the <pad> symbol
        tgt_log_scores = tgt_log_scores * (
            1. - torch.eq(flattened_tgt_sents, 0).float())
        # (tgt_sent_len, batch_size)
        tgt_log_scores = tgt_log_scores.view(-1, batch_size)  # .permute(1, 0)
        # (batch_size)
        tgt_sent_scores = tgt_log_scores.sum(dim=0).squeeze()
        tgt_sent_word_scores = [
            tgt_sent_scores[i].item() / pred_tgt_word_nums[i]
            for i in range(batch_size)
        ]

        for src_sent, tgt_sent, score in zip(src_sents, tgt_sents,
                                             tgt_sent_word_scores):
            f.write('%s ||| %s ||| %f\n' %
                    (' '.join(src_sent), ' '.join(tgt_sent), score))

    f.close()

コード例 #7

ファイルを表示

def train(args):
    data_path = args['--data_path']
    model_path = args['--model_path']
    use_word2vec = args['--use_word2vec']
    word2vec_fpath = args['--word2vec_fpath']
    loss_info_interval = int(args['--loss_info_interval'])
    dev_info_interval = int(args['--dev_info_interval'])
    max_epoch = int(args['--max_epoch'])
    num_workers = int(args['--num_workers'])
    
    corpus_limit = None if args['--corpus_limit']=='None' else int(args['--corpus_limit'])
    vocab_size = int(args['--vocab_size'])
    freq_cutoff = int(args['--freq_cutoff'])
    batch_size = int(args['--batch_size'])
    hidden_size = int(args['--hidden_size'])
    atten_size = int(args['--atten_size'])
    dropout_rate = float(args['--dropout_rate'])
    embed_size = int(args['--embed_size'])
    lr = float(args['--lr'])
    lr_decay = float(args['--lr_decay'])
    grad_clip = float(args['--grad_clip'])
    patience = int(args['--patience'])
    trial = int(args['--trial'])
    
    # create vocabulary
    print("create source and target vocabulary...")
    es_corpus = load_corpus(data_path+'/train.es', 'es', corpus_limit)
    vocab_src = VocabEntry.build(es_corpus, vocab_size, freq_cutoff)
    en_corpus = load_corpus(data_path+'/train.en', 'en', corpus_limit)
    vocab_tgt = VocabEntry.build(en_corpus, vocab_size, freq_cutoff)
    vocabulary = Vocab(vocab_src, vocab_tgt)
    vocabulary.save()
#     vocabulary = Vocab.load()
    
    # create dataloader
    print("create dataloader...")
    train_dl = generate_dl(data_path, 'train', vocabulary, batch_size, num_workers, corpus_limit)
    dev_dl = generate_dl(data_path, 'dev', vocabulary, batch_size, num_workers, corpus_limit)
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    # load pretrained word2vec
    print("load pretrained word2vec...")
    if use_word2vec:
        word2vec_src = load_word2vec(word2vec_fpath+'/cc.es.300.vec', vocabulary.src, device)
        word2vec_tgt = load_word2vec(word2vec_fpath+'/cc.en.300.vec', vocabulary.tgt, device)
        word2vec = (word2vec_src, word2vec_tgt)
        print("size: src -- {} tgt -- {}".format(word2vec_src.size(), word2vec_tgt.size()))
    else:
        word2vec = None
        
    # create model and optimizer
    model = NMT(hidden_size, atten_size, vocabulary, dropout_rate, word2vec, embed_size)
    model.init()
    
#     model = NMT.load(model_path, vocabulary)
    
    model.to(device)
    if torch.cuda.device_count() > 1:
        print("- - - - - - - - >%d GPUs are being used now!!!"%torch.cuda.device_count())
        model_parallel = nn.DataParallel(model)
    opt = torch.optim.Adam(model.parameters(), lr, betas=(0.9, 0.99))
    
#     opt.load_state_dict(torch.load(model_path+'/model.bin.optim', map_location=lambda storage, loc: storage))
    
    print("now we start to train this model ...... batch_size %d"%batch_size)
    start_time = train_time = time.time()
    epoch = iter_cum = example_cum = loss_cum = words_cum = sents_cum = 0
    n_patience = n_trial = 0
    hist_best_score = float('inf')
    while True:
        epoch += 1
        if epoch>max_epoch: 
            print("reached maximum number of epoches......")
            exit(0)
        for source, src_len, target, tgt_len in train_dl:
            model_parallel.train()
            # print("input size to model {}, {}, {}, {}".format(source.size(), src_len.size(), target.size(), tgt_len.size()))
            
            loss = model_parallel(source.to(device), src_len.to(device), target.to(device), tgt_len.to(device))
            # print("output loss size: {}".format(loss.size()))
            loss = loss.sum()
            loss_avg = loss / len(tgt_len)
            loss_avg.backward()
            
            # gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            
            opt.step()
            opt.zero_grad()
            iter_cum += 1
            example_cum += len(tgt_len)
            loss_cum += loss.item()
            words_cum += (tgt_len-1).sum().item()
            sents_cum += len(tgt_len)
            
            if iter_cum % loss_info_interval == 0:
                pre_time = time.time()
                print("epoch: %d, iter: %d, cum. example: %d, avg. loss: %.2f, avg. ppl: %.2f, speed: %.2fwords/sec, time_eclapsed: %d sec"%
                      (epoch, iter_cum, example_cum, loss_cum/sents_cum, math.exp(loss_cum/words_cum), words_cum/(pre_time-train_time), pre_time-start_time))
                train_time = time.time()
                loss_cum = words_cum = sents_cum = 0
        
            if iter_cum % dev_info_interval == 0:
                print("validation begin ......")
                model_parallel.eval()
                with torch.no_grad():
                    loss_dev = words_dev = sents_dev = 0
                    for source, src_len, target, tgt_len in dev_dl:
                        loss = model_parallel(source.to(device), src_len.to(device), target.to(device), tgt_len.to(device))
                        loss = loss.sum()
                        loss_dev += loss.item()
                        words_dev += (tgt_len-1).sum().item()
                        sents_dev += len(tgt_len)
                    print("avg. loss: %.2f,  avg. ppl: %.2f"%(loss_dev/sents_dev, math.exp(loss_dev/words_dev)))
                
                # compare performance with history
                is_better = hist_best_score > (loss_dev/sents_dev)
                if is_better:
                    print("model improved, saved to %s ......"%model_path)
                    n_patience = 0
                    hist_best_score = loss_dev/sents_dev
                    model.save(model_path)
                    torch.save(opt.state_dict(), model_path+'/model.bin.optim')
                else:
                    n_patience += 1
                    print("hit # %d patience" % n_patience)
                    print("decay learning rate ......")
                    lr = opt.param_groups[0]['lr'] * lr_decay
                    if n_patience > patience:
                        n_trial += 1
                        print("hit # %d trial" % n_trial)
                        if n_trial > trial: 
                            print("early stop!")
                            exit(0)
                        n_patience = 0
                        print("load previous best model")
                        params = torch.load(model_path+'/model.bin', map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model.to(device)
                        if torch.cuda.device_count() > 1:
                            model_parallel = nn.DataParallel(model)
                        opt.load_state_dict(torch.load(model_path+'/model.bin.optim', map_location=lambda storage, loc: storage))
                    for param_group in opt.param_groups:
                        param_group['lr'] = lr

コード例 #8

ファイルを表示

def train(train_data,
          dev_data,
          vocab,
          embed_size=256,
          hidden_size=256,
          dropout_rate=0.2,
          uniform_init=0.1,
          device='cpu',
          lr=0.001,
          batch_size=32,
          clip_grad=5.0,
          log_every=10,
          valid_niter=2000,
          save_path='model.bin',
          patience=5,
          lr_decay=0.5,
          max_trials=5,
          max_epochs=30):
    """ Train the NMT model
        Params:
            train_data (list of (src_sent, tgt_sent)): list of tuples containing source and target 
                sentences for training
            dev_data (list of (src_sent, tgt_sent)): list of tuples containing source and target 
                sentences for dev
            vocab (Vocab): Vocab object for source and target
            embed_size (int): Embedding dimensionality. Default = 256
            hidden_size (int): Dimensionality for hidden states. Default = 256
            dropout_rate (float): Dropout probability. Default: 0.2
            uniform_init (float): If > 0: uniformly initialize all parameters
            device (str): device to perform the calc on. Default = 'cpu'
            lr (float): learning rate. Default = 0.001
            batch_size (int): batch size. Default = 32
            clip_grad (float): used in gradient clipping. Default = 5.0
            log_every (int): number of iterations to print stats. Default = 10
            valid_niter (int): number of iterations to perform validation. Default = 2000
            save_path (str): path to save the best model. Default: 'model.bin' in current dir
            patience (int): number of iterations to decay learning rate. Default = 5
            lr_decay (float): learning rate decay. Default = 0.5
            max_trials (int): terminate training after how many trials. Default = 5
            max_epochs (int): max number of epochs. Default = 30
        Return:
    """
    # Create NMT model and put it in train mode
    model = NMT(embed_size, hidden_size, vocab, dropout_rate)
    model.train()

    # Uniformely initialize model parameters if required
    if np.abs(uniform_init) > 0.:
        print(f'uniformly init parameters [-{uniform_init}, +{uniform_init}]',
              file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    # Create target vocab mask with 0 for 'padding' index and 1 otherwise
    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    # Set model device
    device = torch.device(device)
    model = model.to(device)
    print(f'Using device: {device}', file=sys.stderr)

    # Choose optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Initializations
    num_trial = 0
    train_iter = 0
    current_patience = 0
    cum_loss = 0
    report_loss = 0
    cum_tgt_words = 0
    report_tgt_words = 0
    cum_examples = 0
    report_examples = 0
    epoch = 0
    valid_num = 0
    hist_valid_scores = []
    train_time = time.time()
    begin_time = time.time()

    print('begin Maximum Likelihood training')

    while True:
        epoch += 1
        # Iterate over the batches in the training data
        for src_sents, tgt_sents in batch_iter(train_data,
                                               batch_size,
                                               shuffle=True):
            train_iter += 1
            optimizer.zero_grad()
            current_batch_size = len(src_sents)

            # Calculate loss and backpropagate
            example_losses = -model(src_sents,
                                    tgt_sents)  # (current_batch_size,)
            batch_loss = example_losses.sum()
            loss = batch_loss / current_batch_size  # average loss
            loss.backward()

            # clip gradient and update parameters
            _ = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)
            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print(f'epoch {epoch}, iter {train_iter}, ' \
                      f'avg. loss {report_loss / report_examples:.2f}, '\
                      f'avg. ppl {math.exp(report_loss / report_tgt_words):.2f}, ' \
                      f'cum. examples {cum_examples}, ' \
                      f'speed {report_tgt_words / (time.time() - train_time):.2f} words/sec, ' \
                      f'time elapsed {(time.time() - begin_time):.2f} sec', file=sys.stderr)
                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                print(f'epoch {epoch}, iter {train_iter}, cum. loss {cum_loss / cum_examples:.2f}, '\
                      f'cum. ppl {np.exp(cum_loss / cum_tgt_words):.2f} cum. examples {cum_examples}',
                      file=sys.stderr)
                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1
                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                dev_ppl = evaluate_ppl(
                    model, dev_data,
                    batch_size=128)  # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print(f'validation: iter {train_iter}, dev. ppl {dev_ppl}',
                      file=sys.stderr)

                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    # save model and optimizer state
                    print(f'save the best model to [{save_path}]',
                          file=sys.stderr)
                    model.save(save_path)
                    torch.save(optimizer.state_dict(), save_path + '.optim')
                    current_patience = 0

                elif current_patience < patience:
                    current_patience += 1
                    print(f'hit patience {current_patience}', file=sys.stderr)

                    if current_patience == patience:
                        num_trial += 1
                        print(f'hit #{num_trial} trial', file=sys.stderr)
                        if num_trial == max_trials:
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * lr_decay
                        print(
                            f'load previously best model and decay learning rate to {lr}',
                            file=sys.stderr)

                        # load model
                        params = torch.load(
                            save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimizer.load_state_dict(
                            torch.load(save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        current_patience = 0

                if epoch == max_epochs:
                    print('reached maximum number of epochs!', file=sys.stderr)
                    exit(0)

コード例 #9

ファイルを表示

def train(args):
    """ Train the NMT Model.
    """
    train_data_src = read_corpus(args.train_src, source='src')
    train_data_tgt = read_corpus(args.train_tgt, source='tgt')
    dev_data_src = read_corpus(args.dev_src, source='src')
    dev_data_tgt = read_corpus(args.dev_tgt, source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    vocab = Vocab.load(args.vocab_file)
    model = NMT(embed_size=args.embed_size,
                hidden_size=args.hidden_size,
                dropout_rate=args.dropout,
                vocab=vocab)
    model.train()

    if np.abs(args.uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' % (args.uniform_init, args.uniform_init))
        for p in model.parameters():
            p.data.uniform_(-args.uniform_init, args.uniform_init)

    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args.cuda else "cpu")
    print('use device: %s' % device)

    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    while True:
        epoch += 1
        batch_num = math.ceil(len(train_data) / args.batch_size)
        current_iter = 0
        for src_sents, tgt_sents in batch_iter(train_data, batch_size=args.batch_size, shuffle=True):
            current_iter += 1
            train_iter += 1

            optimizer.zero_grad()
            batch_size = len(src_sents)
            example_losses = -model(src_sents, tgt_sents)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size
            loss.backward()

            # clip gradient
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            # omitting leading `<s>`
            tgt_words_num_to_predict = sum(len(s[1:]) for s in tgt_sents)
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % args.log_every == 0:
                print('epoch %d (%d / %d), iter %d, avg. loss %.2f, avg. ppl %.2f '
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' %
                      (epoch, current_iter, batch_num, train_iter,
                       report_loss / report_examples,
                       math.exp(report_loss / report_tgt_words),
                       cum_examples,
                       report_tgt_words / (time.time() - train_time),
                       time.time() - begin_time))

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % args.valid_niter == 0:
                print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter,
                      cum_loss / cum_examples,
                      np.exp(cum_loss / cum_tgt_words),
                      cum_examples))

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...')

                # compute dev. ppl and bleu
                dev_ppl = evaluate_ppl(model, dev_data, batch_size=128)   # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl))

                is_better = len(hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('epoch %d, iter %d: save currently the best model to [%s]' %
                          (epoch, train_iter, args.model_path))
                    model.save(args.model_path)
                    torch.save(optimizer.state_dict(), args.model_path + '.optim')
                elif patience < args.patience:
                    patience += 1
                    print('hit patience %d' % patience)

                    if patience == args.patience:
                        num_trial += 1
                        print('hit #%d trial' % num_trial)
                        if num_trial == args.max_num_trial:
                            print('early stop!')
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * args.lr_decay
                        print('load previously best model and decay learning rate to %f' % lr)

                        # load model
                        params = torch.load(args.model_path, map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers')
                        optimizer.load_state_dict(torch.load(args.model_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

            if epoch == args.max_epoch:
                print('reached maximum number of epochs!')
                return

コード例 #10

ファイルを表示

ファイル: train.py プロジェクト: WellsCui/speech-to-text

def train(model_config,
          data_config,
          output_path,
          device,
          epoch_size,
          max_epoch,
          batch_size,
          repeats,
          decade_rate,
          clip_grad,
          log_every,
          valid_every,
          learning_rate=0.0005):
    print('use device: %s' % device, file=sys.stderr)
    vocab = Vocab.load(data_config["vacab_file"])
    model = NMT(vocab=vocab, **model_config)
    model = model.to(torch.device(device))
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    data_config.pop("vacab_file", None)
    data_loader = DataLoader(**data_config)
    batch_queue, loss_queue = data_loader.load_train_data(
        epoch_size, max_epoch, batch_size, repeats, decade_rate)
    dev_data = data_loader.load_dev_data()

    hist_valid_scores = []
    train_losses = []
    train_iter = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0

    if os.path.isfile(output_path + '/speech-to-text.model'):
        print('loading saved model...')
        params = torch.load(output_path + '/speech-to-text.model',
                            map_location=lambda storage, loc: storage)
        model.load_state_dict(params['state_dict'])
        print('restoring parameters of the optimizers', file=sys.stderr)
        optimizer.load_state_dict(
            torch.load(output_path + '/speech-to-text.optim'))
        dev_ppl = evaluate_ppl(
            model, dev_data,
            batch_size=128)  # dev batch size can be a bit larger
        valid_metric = -dev_ppl
        hist_valid_scores.append(valid_metric)
        print("saved model ppl: ", dev_ppl)

    model.train()

    train_time = begin_time = time.time()
    epoch, voices, tgt_sents = batch_queue.get(True)
    while voices is not None and tgt_sents is not None:
        train_iter += 1
        optimizer.zero_grad()
        # print("received voices:", len(voices))
        # print("tgt_sents[0]:", len(tgt_sents[0]), tgt_sents[0])
        # print("tgt_sents[1]:", len(tgt_sents[1]), tgt_sents[1])
        optimizer.zero_grad()
        batch_size = len(voices)
        sample_losses = -model(voices, tgt_sents)
        batch_loss = sample_losses.sum()
        loss = batch_loss / batch_size
        loss.backward()

        # clip gradient
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   clip_grad)
        optimizer.step()

        batch_losses_val = batch_loss.item()
        report_loss += batch_losses_val
        cum_loss += batch_losses_val

        tgt_words_num_to_predict = sum(
            len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
        report_tgt_words += tgt_words_num_to_predict
        cum_tgt_words += tgt_words_num_to_predict
        report_examples += batch_size
        cum_examples += batch_size
        loss_queue.put(report_loss / report_examples)
        train_losses.append({
            'epoch':
            epoch,
            'iter':
            train_iter,
            'loss':
            report_loss / report_examples,
            'ppl':
            math.exp(report_loss / report_tgt_words),
            'cum':
            cum_examples,
            'speed':
            report_tgt_words / (time.time() - train_time)
        })

        if train_iter % log_every == 0:
            print(
                'epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f '
                'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec'
                % (epoch, train_iter, report_loss / report_examples,
                   math.exp(report_loss / report_tgt_words), cum_examples,
                   report_tgt_words /
                   (time.time() - train_time), time.time() - begin_time),
                file=sys.stderr)

            train_time = time.time()
            report_loss = report_tgt_words = report_examples = 0.
        # perform validation
        if train_iter % valid_every == 0:
            print(
                'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d'
                % (epoch, train_iter, cum_loss / cum_examples,
                   np.exp(cum_loss / cum_tgt_words), cum_examples),
                file=sys.stderr)

            cum_loss = cum_examples = cum_tgt_words = 0.
            valid_num += 1

            print('begin validation ...', file=sys.stderr)

            # compute dev. ppl and bleu
            dev_ppl = evaluate_ppl(
                model, dev_data,
                batch_size=128)  # dev batch size can be a bit larger
            valid_metric = -dev_ppl

            print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl),
                  file=sys.stderr)

            is_better = len(hist_valid_scores
                            ) == 0 or valid_metric > max(hist_valid_scores)
            hist_valid_scores.append(valid_metric)

            if is_better:
                patience = 0
                print('save currently the best model to [%s]' % output_path,
                      file=sys.stderr)
                model.save(output_path + '/speech-to-text.model')
                torch.save(optimizer.state_dict(),
                           output_path + '/speech-to-text.optim')

        epoch, voices, tgt_sents = batch_queue.get(True)

コード例 #11

ファイルを表示

ファイル: run.py プロジェクト: wanqizhu/nlp-continuous-data-augmentation

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    long_logfile = "long_logfiles/" + str(time.time()) + "long.txt"
    train_logfile = "train_logfiles/" + str(time.time()) + "train.txt"
    dev_logfile = "dev_logfiles/" + str(time.time()) + "dev.txt"
    f_long = open(long_logfile, "w")
    f_train = open(train_logfile, "w")
    # TODO: add hyperparameters
    args_tuples = [(arg, args[arg]) for arg in args]
    f_train.write("#args_tuples: %s\n" % args_tuples)
    for (arg, val) in args_tuples:
        f_train.write("#%s: %s\n" % (arg, val))
    f_train.write("#epoch, train iter, train score\n")
    f_dev = open(dev_logfile, "w")
    f_dev.write("#epoch, train iter, dev score, dev accuracy\n")

    binary = int(args["--num-classes"]) == 2

    train_data = load_train_data(perct=float(args["--train-perct"]),
                                 binary=binary)
    dev_data = load_dev_data(dev_perct=float(args["--dev-perct"]),
                             binary=binary)

    train_batch_size = int(args["--batch-size"])
    clip_grad = float(args["--clip-grad"])
    valid_niter = int(args["--valid-niter"])
    log_every = int(args["--log-every"])
    model_save_path = args["--save-to"]

    embed_size = int(args["--embed-size"])

    # TODO: load train data_augmenter based on args
    data_augmenter = str(args["--data-aug"]).lower()
    print_and_write("Using data augmentation method: %s" % data_augmenter,
                    f_long)
    if data_augmenter == "gaussian":
        data_augmenter = GaussianNoiseDataAugmenter(
            float(args["--data-aug-amount"]), int(args["--data-aug-nx"]))
    elif data_augmenter == "identity":
        data_augmenter = NoisyIdentityDataAugmenter(
            float(args["--data-aug-amount"]), int(args["--data-aug-nx"]))
    elif data_augmenter == "swapdim":
        data_augmenter = EmbedDimensionSwapDataAugmenter(
            int(args["--data-aug-amount"]), int(args["--data-aug-nx"]))
    else:
        data_augmenter = BaseDataAugmenter()

    # perform augmentation
    train_data_aug = data_augmenter.augment(train_data)
    print_and_write(
        "train size: %d, after aug %d" %
        (len(train_data[0]), len(train_data_aug)),
        f_long,
    )

    model = NMT(embed_size=embed_size,
                hidden_size=int(args["--hidden-size"]),
                num_classes=int(args["--num-classes"]),
                dropout_rate=float(args["--dropout"]))
    model.train()

    uniform_init = float(args["--uniform-init"])
    if np.abs(uniform_init) > 0.0:
        print_and_write(
            "uniformly initialize parameters [-%f, +%f]" %
            (uniform_init, uniform_init),
            f_long,
        )
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    device = torch.device("cuda:0" if args["--cuda"] else "cpu")
    print_and_write("use device: %s" % device, f_long)
    model = model.to(device)
    print_and_write("confirming model device %s" % model.device, f_long)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args["--lr"]))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print_and_write("begin Maximum Likelihood training", f_long)

    while True:
        epoch += 1

        for sentences, sentiments in batch_iter(train_data_aug,
                                                batch_size=train_batch_size,
                                                shuffle=True):
            train_iter += 1

            optimizer.zero_grad()

            example_losses = -model(sentences, sentiments)  # (batch_size,)
            batch_size = len(
                example_losses)  # in case data augmentation makes returned
            # number of examples > input batch size
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                # train_accuracy = model.compute_accuracy(sentences, sentiments)
                print_and_write(
                    "epoch %d, iter %d, avg. loss %.2f, "
                    "cum. examples %d, time elapsed %.2f sec" % (
                        epoch,
                        train_iter,
                        report_loss / report_examples,
                        cum_examples,
                        time.time() - begin_time,
                    ),
                    f_long,
                )
                f_train.write(
                    "%d, %d, %.2f\n" %
                    (epoch, train_iter, report_loss / report_examples))

                train_time = time.time()
                report_loss = report_examples = 0.0

            # perform validation
            if train_iter % valid_niter == 0:
                cum_loss = cum_examples = 0.0
                valid_num += 1

                print_and_write("begin validation ...", f_long)

                # compute dev
                dev_score, dev_accuracy = evaluate_dev(
                    model, dev_data,
                    batch_size=5000)  # dev batch size can be a bit larger
                valid_metric = -dev_score  # maybe use accuracy instead?

                print_and_write(
                    "validation: iter %d, dev. score %f, dev. accuracy %f" %
                    (train_iter, dev_score, dev_accuracy),
                    f_long,
                )
                f_dev.write("%d, %d, %f, %f\n" %
                            (epoch, train_iter, dev_score, dev_accuracy))

                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                # train_score = evaluate_dev(model, train_data, batch_size=100000)

                if is_better:
                    patience = 0
                    print_and_write(
                        "save currently the best model to [%s]" %
                        model_save_path,
                        f_long,
                    )
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + ".optim")
                elif patience < int(args["--patience"]):
                    patience += 1
                    print_and_write("hit patience %d" % patience, f_long)

                    if patience == int(args["--patience"]):
                        num_trial += 1
                        print_and_write("hit #%d trial" % num_trial, f_long)
                        if num_trial == int(args["--max-num-trial"]):
                            print_and_write("early stop!", f_long)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]["lr"] * float(
                            args["--lr-decay"])
                        print_and_write(
                            "load previously best model and decay learning rate to %f"
                            % lr,
                            f_long,
                        )

                        # load model
                        params = torch.load(
                            model_save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params["state_dict"])
                        model = model.to(device)

                        print_and_write("restore parameters of the optimizers",
                                        f_long)
                        optimizer.load_state_dict(
                            torch.load(model_save_path + ".optim"))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group["lr"] = lr

                        # reset patience
                        patience = 0

                if epoch == int(args["--max-epoch"]):
                    print_and_write("reached maximum number of epochs!",
                                    f_long)
                    exit(0)

コード例 #12

ファイルを表示

ファイル: run.py プロジェクト: aaniin/cs224n

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])
    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    vocab = Vocab.load(args['--vocab'])

    model = NMT(embed_size=int(args['--embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                vocab=vocab)
    model.train()

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' %
              (uniform_init, uniform_init),
              file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    # Set counters
    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    fwd_time = train_time = begin_time = time.time()

    # Begin training
    print('begin Maximum Likelihood training')

    while True:
        epoch += 1

        #  Loop over all data in selection batches
        for src_sents, tgt_sents in batch_iter(train_data,
                                               batch_size=train_batch_size,
                                               shuffle=True):

            # Sentences must be sorted in length (that is number of words)
            src_sents = sorted(src_sents, key=lambda e: len(e), reverse=True)
            tgt_sents = sorted(tgt_sents, key=lambda e: len(e), reverse=True)

            train_iter += 1
            # Zero out gradients, pytorch accumulates them
            optimizer.zero_grad()

            # Get loss
            train_batch_losses = (-model.forward(src_sents, tgt_sents))
            batch_loss = train_batch_losses.sum()
            loss = batch_loss / train_batch_size

            # Get gradients
            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            # step
            optimizer.step()

            # Report progress
            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            # Get some report metric
            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += train_batch_size
            cum_examples += train_batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                        'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                            report_loss / report_examples,
                                                                                            math.exp(report_loss / report_tgt_words),
                                                                                            cum_examples,
                                                                                            report_tgt_words / (time.time() - train_time),
                                                                                            time.time() - begin_time), file=sys.stderr)

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

                # Test saving and loading the model
                # test_save_load_model(model=model,optimizer=optimizer)

            # perform validation
            if train_iter % valid_niter == 0:
                print(
                    'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f, cum. examples %d'
                    % (epoch, train_iter, cum_loss / cum_examples,
                       np.exp(cum_loss / cum_tgt_words), cum_examples),
                    file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                #dev_ppl = evaluate_ppl(model, dev_data, batch_size=128)   # dev batch size can be a bit larger
                dev_ppl = evaluate_ppl(model,
                                       dev_data,
                                       batch_size=train_batch_size *
                                       2)  # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' %
                      (train_iter, dev_ppl),
                      file=sys.stderr)

                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' %
                          model_save_path,
                          file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(
                            args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)

                        # load model
                        #params = torch.load(model_save_path, map_location=lambda storage, loc: storage)
                        # See https://github.com/pytorch/pytorch/issues/7415 and
                        # https://discuss.pytorch.org/t/on-a-cpu-device-how-to-load-checkpoint-saved-on-gpu-device/349 and
                        # https://github.com/pytorch/pytorch/issues/9139
                        params = torch.load(model_save_path,
                                            map_location='cpu')
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        # optimizer.load_state_dict(torch.load(model_save_path + '.optim')
                        optimizer.load_state_dict(
                            torch.load(model_save_path + '.optim',
                                       map_location='cpu'))
                        optimizer_to(optimizer, device)

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

                if epoch == int(args['--max-epoch']):
                    print('reached maximum number of epochs!', file=sys.stderr)
                    exit(0)

コード例 #13

ファイルを表示

def experiement(args: Dict, test_only, device):
    """ Train and Test the NMT Model.
    @param args (Dict): args from cmd line
    """
    # train_data_src = read_corpus(args['--train-src'], source='src')
    # train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')
    #
    # dev_data_src = read_corpus(args['--dev-src'], source='src')
    # dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')
    #
    # train_data = list(zip(train_data_src, train_data_tgt))
    # dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])
    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    use_pos_embed = False
    if args['--use-pos-embed']:
        use_pos_embed = True

    use_copy = False
    if args['--use-copy']:
        use_copy = True

    SRC, TRG, train_iterator, dev_iterator, test_iterator = load_data(
        args['--train-data'], args['--dev-data'], args['--test-data'], device,
        train_batch_size, (use_pos_embed or use_copy))

    vocab = Vocab(SRC, TRG)

    model = NMT(src_embed_size=int(args['--src-embed-size']),
                dst_embed_size=int(args['--dst-embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                vocab=vocab,
                use_pos_embed=use_pos_embed,
                use_copy=use_copy)

    model.load_pretrained_embeddings(vocab)

    # print("args: {}".format(args))

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' %
              (uniform_init, uniform_init),
              file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    # def init_weights(m):
    #     for name, param in m.named_parameters():
    #         if 'weight' in name:
    #             nn.init.normal_(param.data, mean=0, std=0.01)
    #         else:
    #             nn.init.constant_(param.data, 0)
    #
    # model.apply(init_weights)

    # vocab_mask = torch.ones(len(vocab.tgt))
    # vocab_mask[vocab.tgt['<pad>']] = 0

    print('use device: %s' % device, file=sys.stderr)
    print(model)

    para_count = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'The model has {para_count:,} trainable parameters')
    print("file path: {}".format(model_save_path))

    if test_only:
        model.eval()
        decode(args, test_iterator, vocab, device)
        exit(0)

    # perform training
    model.train()
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    while True:
        epoch += 1

        #perform training
        model.train()
        # for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True):
        for i, batch in enumerate(train_iterator):
            train_iter += 1

            optimizer.zero_grad()
            src_sents, src_sents_lens = batch.src
            tgt_sents = batch.trg
            batch_size = src_sents.shape[1]

            example_losses = -model(src_sents, src_sents_lens,
                                    tgt_sents)  # (batch_size,)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

        # if train_iter % log_every == 0:
        # print("")
        print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
              'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter, report_loss / report_examples,
                                                                                         math.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)

        train_time = time.time()
        report_loss = report_tgt_words = report_examples = 0.

        # perform validation
        # model.eval()
        # if train_iter % valid_niter == 0:
        # print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter,
        #                                                                                      cum_loss / cum_examples,
        #                                                                                      np.exp(cum_loss / cum_tgt_words),
        #                                                                                      cum_examples), file=sys.stderr)

        cum_loss = cum_examples = cum_tgt_words = 0.
        valid_num += 1

        # print('begin validation ...', file=sys.stderr)

        # compute dev. ppl and bleu
        dev_ppl = evaluate_ppl(
            model, dev_iterator,
            batch_size=128)  # dev batch size can be a bit larger
        valid_metric = -dev_ppl

        print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl),
              file=sys.stderr)

        is_better = len(
            hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores)
        hist_valid_scores.append(valid_metric)

        if is_better:
            patience = 0
            # print('save currently the best model to [%s]' % model_save_path, file=sys.stderr)
            model.save(model_save_path)

            # also save the optimizers' state
            torch.save(optimizer.state_dict(), model_save_path + '.optim')
        elif patience < int(args['--patience']):
            patience += 1
            print('hit patience %d' % patience, file=sys.stderr)

            if patience == int(args['--patience']):
                num_trial += 1
                print('hit #%d trial' % num_trial, file=sys.stderr)
                if num_trial == int(args['--max-num-trial']):
                    print('early stop!', file=sys.stderr)
                    # exit(0)
                    break

                # decay lr, and restore from previously best checkpoint
                lr = optimizer.param_groups[0]['lr'] * float(
                    args['--lr-decay'])
                print(
                    'load previously best model and decay learning rate to %f'
                    % lr,
                    file=sys.stderr)

                # load model
                params = torch.load(model_save_path,
                                    map_location=lambda storage, loc: storage)
                model.load_state_dict(params['state_dict'])
                model = model.to(device)

                print('restore parameters of the optimizers', file=sys.stderr)
                optimizer.load_state_dict(
                    torch.load(model_save_path + '.optim'))

                # set new lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

                # reset patience
                patience = 0

        if epoch == int(args['--max-epoch']):
            print('reached maximum number of epochs!', file=sys.stderr)
            break

    # perform testing
    model.eval()
    decode(args, test_iterator, vocab, device)

コード例 #14

ファイルを表示

def train(args: Dict):
    """ Train the NMT Model.
    :param Dict args: arguments from command line
    """
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    # Lists of (src_sent, tgt_sent) tuples
    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])

    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    vocab = Vocab.load(args['--vocab'])

    model = NMT(embed_size=int(args['--embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                vocab=vocab,
                no_char_decoder=args['--no-char-decoder'])
    model.train()  # Set to train mode

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' %
              (uniform_init, uniform_init),
              file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)  # Initialize in-place

    # vocab_mask = torch.ones(len(vocab.tgt))
    # vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)
    print('device name: ', torch.cuda.get_device_name(device))
    print('device available: ', torch.cuda.is_available())

    model = model.to(device)  # Send model parameters to the chosen device

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    # Initialize necessary variables
    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0

    # To keep track of previous scores
    hist_valid_scores = []

    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    # Load the previous model parameters if they exist
    if os.path.isfile('model.bin'):
        print('Load previous best model...', file=sys.stderr)
        # load model
        params = torch.load(model_save_path,
                            map_location=lambda storage, loc: storage)
        model.load_state_dict(params['state_dict'])
        model = model.to(device)

        print('restore parameters of the optimizers', file=sys.stderr)
        optimizer.load_state_dict(torch.load(model_save_path + '.optim'))

        # save the loaded previous best model as current best - otherwise they will be overwritten
        dev_ppl = evaluate_ppl(
            model, dev_data,
            batch_size=64)  # dev batch size can be a bit larger
        valid_metric = -dev_ppl
        hist_valid_scores.append(valid_metric)

    while True:
        epoch += 1
        # Iterate over lazily generated batches (lists) of sentences (each sentence is a list of words)
        for src_sents, tgt_sents in batch_iter(train_data,
                                               batch_size=train_batch_size,
                                               shuffle=True):
            train_iter += 1
            # Zero gradients, otherwise they would be accumulated across batches
            optimizer.zero_grad()

            batch_size = len(src_sents)

            # Calculate the losses for each example in the batch (i.e. forward propagation)
            example_losses = -model(src_sents, tgt_sents)  # Dim: (batch_size,)

            # Average losses over the entire batch
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            # Compute gradients
            loss.backward()

            # Clip gradients
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            # Update parameters
            optimizer.step()

            # Get a number from a tensor containing a single scalar
            batch_losses_val = batch_loss.item()

            # Add to the ''report_loss'' (zeroed every ''log_every'' iterations - for logging)
            report_loss += batch_losses_val

            # Add to the ''cum_loss'' (zeroed every ''valid_niter'' iterations - for validation)
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                         report_loss / report_examples,
                                                                                         math.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # Perform validation
            if train_iter % valid_niter == 0:
                print(
                    'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d'
                    % (epoch, train_iter, cum_loss / cum_examples,
                       np.exp(cum_loss / cum_tgt_words), cum_examples),
                    file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl
                dev_ppl = evaluate_ppl(
                    model, dev_data,
                    batch_size=64)  # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' %
                      (train_iter, dev_ppl),
                      file=sys.stderr)

                # Lower perplexity is better
                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' %
                          model_save_path,
                          file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(
                            args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)

                        # load model
                        params = torch.load(
                            model_save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimizer.load_state_dict(
                            torch.load(model_save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

            if epoch == int(args['--max-epoch']):
                print('reached maximum number of epochs!', file=sys.stderr)
                exit(0)

コード例 #15

ファイルを表示

ファイル: torchtext_run.py プロジェクト: justinxu421/224n-project

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])
    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    #prefer to do our entire train,test,val split in the code itself as opposed to our previous script
    # remove these comments

    #data preprocessing for Qs and As.
    spacy_en = spacy.load('en')

    def tokenizer(text):  # create a tokenizer function
        return [tok.text for tok in spacy_en.tokenizer(text)]

    TEXT = Field(sequential=True,
                 tokenize=tokenizer,
                 lower=True,
                 include_lengths=True,
                 init_token='<s>',
                 eos_token='</s>')
    analogies_datafields = [("abc", TEXT), ("d", TEXT)]

    train, val, test = TabularDataset.splits(
        path="data",  # the root directory where the data lies
        train='ngram_train.csv',
        validation="ngram_val.csv",
        test='ngram_test.csv',
        format='csv',
        skip_header=
        False,  # if your csv header has a header, make sure to pass this to ensure it doesn't get proceesed as data!
        fields=analogies_datafields)

    pretrained_vecs = torchtext.vocab.Vectors('../GloVe-1.2/life_vectors.txt')
    TEXT.build_vocab(
        vectors=pretrained_vecs)  # specials=['<pad>', '<s>', '</s>']

    if args['--cuda'] == 'cpu':
        torch_text_device = -1
    else:
        torch_text_device = 0

    training_iter, val_iter, test_iter = Iterator.splits(
        (train, val, test),
        sort_key=lambda x: len(x.abc),
        batch_sizes=(100, 20, 1),
        device=torch_text_device,
        sort_within_batch=True)

    model = NMT(embed_size=int(args['--embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                vocab=TEXT.vocab)
    model.train()  #sets training = True

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' %
              (uniform_init, uniform_init),
              file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    writer = SummaryWriter('logs')
    is_better_count = 0  #TODO: Remove this and debug the nonstopping part
    while True:
        epoch += 1

        for _, data in enumerate(training_iter):
            (src_sents, src_lengths), (tgt_sents, _) = data.abc, data.d

            train_iter += 1

            optimizer.zero_grad()

            batch_size = src_sents.shape[1]

            example_losses = model(src_sents, src_lengths,
                                   tgt_sents)  # (batch_size,)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                         report_loss / report_examples,
                                                                                         math.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)

                writer.add_scalar('Train/AvgLoss',
                                  report_loss / report_examples, epoch)
                writer.add_scalar('Train/AvgPPL',
                                  math.exp(report_loss / report_tgt_words),
                                  epoch)

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                print(
                    'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d'
                    % (epoch, train_iter, cum_loss / cum_examples,
                       np.exp(cum_loss / cum_tgt_words), cum_examples),
                    file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                dev_ppl, val_loss = evaluate_ppl(
                    model, val_iter)  # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f, dev loss %f' %
                      (train_iter, dev_ppl, val_loss),
                      file=sys.stderr)
                writer.add_scalar('Val/AvgPPL', dev_ppl, epoch)
                writer.add_scalar('Val/AvgLoss', val_loss, epoch)

                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                print(hist_valid_scores)
                print(valid_metric)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' %
                          model_save_path,
                          file=sys.stderr)
                    model.save(model_save_path)
                    is_better_count = is_better_count + 1
                    print(is_better_count)
                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + '.optim')
                    if is_better_count > 3:
                        print('reached maximum number of epochs!',
                              file=sys.stderr)
                        writer.close()
                        exit(0)

                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(
                            args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)

                        # load model
                        params = torch.load(
                            model_save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimizer.load_state_dict(
                            torch.load(model_save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

                if epoch == int(args['--max-epoch']):
                    print('reached maximum number of epochs!', file=sys.stderr)
                    writer.close()
                    exit(0)

コード例 #16

ファイルを表示

ファイル: run.py プロジェクト: Aran00/CS224n2019A5

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])

    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    vocab = Vocab.load(args['--vocab'])

    existing_model = args['--existing-model-path']
    start_from_existing_model = existing_model and os.path.isfile(
        existing_model)
    if start_from_existing_model:
        print("load model from {}".format(existing_model), file=sys.stderr)
        model = NMT.load(existing_model,
                         no_char_decoder=args['--no-char-decoder'])
    else:
        print("Create a new model from hyper parameters")
        model = NMT(embed_size=int(args['--embed-size']),
                    hidden_size=int(args['--hidden-size']),
                    dropout_rate=float(args['--dropout']),
                    vocab=vocab,
                    no_char_decoder=args['--no-char-decoder'])
    model.train()

    print_model_param_count(model)

    # TODO: How to print all the parameters of this model? And is it useful?
    if not start_from_existing_model:
        uniform_init = float(args['--uniform-init'])
        if np.abs(uniform_init) > 0.:
            print('uniformly initialize parameters [-%f, +%f]' %
                  (uniform_init, uniform_init),
                  file=sys.stderr)
            for p in model.parameters():
                p.data.uniform_(-uniform_init, uniform_init)

    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    avg_train_ppls = []
    avg_valid_ppls = []

    # output_file_path = 'outputs/loss_%s' % datetime.datetime.now().strftime("%m-%d-%Y-%I:%M%p")
    output_file_path = os.path.join(
        args['--ppl-save-dir'],
        'ppl.json') if args['--ppl-save-dir'] else 'ppl.json'

    while True:
        epoch += 1

        for src_sents, tgt_sents in batch_iter(train_data,
                                               batch_size=train_batch_size,
                                               shuffle=True):
            train_iter += 1

            optimizer.zero_grad()

            batch_size = len(src_sents)

            example_losses = -model(src_sents, tgt_sents)  # (batch_size,)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                         report_loss / report_examples,
                                                                                         np.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)
                avg_train_ppls.append(np.exp(report_loss / report_tgt_words))
                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                # The printed values are the train loss
                print(
                    'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d'
                    % (epoch, train_iter, cum_loss / cum_examples,
                       np.exp(cum_loss / cum_tgt_words), cum_examples),
                    file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                dev_ppl = evaluate_ppl(
                    model, dev_data,
                    batch_size=128)  # dev batch size can be a bit larger
                avg_valid_ppls.append(dev_ppl)

                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' %
                      (train_iter, dev_ppl),
                      file=sys.stderr)

                is_better = len(hist_valid_scores
                                ) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' %
                          model_save_path,
                          file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(),
                               model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            output_losses(args, log_every, valid_niter,
                                          avg_train_ppls, avg_valid_ppls,
                                          output_file_path)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(
                            args['--lr-decay'])
                        print(
                            'load previously best model and decay learning rate to %f'
                            % lr,
                            file=sys.stderr)

                        # load model
                        params = torch.load(
                            model_save_path,
                            map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers',
                              file=sys.stderr)
                        optimizer.load_state_dict(
                            torch.load(model_save_path + '.optim'))

                        # TODO: len(optimizer.param_groups) == 1 ? Or the below code seems odd
                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

            if epoch == int(args['--max-epoch']):
                print('reached maximum number of epochs!', file=sys.stderr)
                output_losses(args, log_every, valid_niter, avg_train_ppls,
                              avg_valid_ppls, output_file_path)
                exit(0)
        output_losses(args, log_every, valid_niter, avg_train_ppls,
                      avg_valid_ppls, output_file_path)
        if args['--is-google-colab'] and epoch % 2 == 0 and os.path.isfile(
                model_save_path):
            shutil.copy(model_save_path, args['--ppl-save-dir'])
            shutil.copy(model_save_path + '.optim', args['--ppl-save-dir'])
            print("copied model files to google drive!")

コード例 #17

ファイルを表示

def train(args: Dict):
    """ Train the NMT Model.
    @param args (Dict): args from cmd line
    """
    sample_rate = 22000
    resample_rate = 8000
    train_records = 8
    max_epoch = 10
    vocab = Vocab.load('dataset/vocab_full.json')
    # train_voices_files, corpus = get_voice_files_and_corpus('dataset/train/wavs', train_records)
    # voices = load_voices_files(train_voices_files, sample_rate, resample_rate)
    # train_data = list(zip(voices, corpus))

    dev_files, dev_corpus = get_voice_files_and_corpus('dataset/dev', 2)
    dev_data = list(
        zip(load_voices_files(dev_files, sample_rate, resample_rate),
            dev_corpus))

    epoch_size = 4
    train_batch_size = 2

    clip_grad = 5.0
    valid_niter = 100
    log_every = 10
    model_save_path = 'model.bin'

    model = NMT(embed_size=1024, hidden_size=2048, vocab=vocab)
    model.train()
    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    # device = torch.device("cuda:0")
    device = torch.device("cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')
    data_queue = Queue()
    batch_queue = Queue(1)
    loss_queue = Queue(2)

    train_data_to_queue_process = Process(target=load_train_data,
                                          args=('dataset/train', train_records,
                                                epoch_size, data_queue))
    train_data_to_queue_process.start()

    batch_iter_to_queue_process = Process(target=batch_iter_to_queue2,
                                          args=(data_queue, batch_queue,
                                                loss_queue, max_epoch,
                                                train_batch_size, True))
    batch_iter_to_queue_process.start()
    epoch, voices, tgt_sents = batch_queue.get(True)
    current_epoch = -1

    while voices is not None and tgt_sents is not None:

        train_iter += 1
        optimizer.zero_grad()

        # voices = load_voices_files(voice_files, sample_rate, resample_rate)
        # voices = voice_files
        batch_size = len(voices)

        example_losses = -model(voices, tgt_sents)  # (batch_size,)
        batch_loss = example_losses.sum()
        loss = batch_loss / batch_size

        loss.backward()

        # clip gradient
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   clip_grad)

        optimizer.step()

        batch_losses_val = batch_loss.item()
        report_loss += batch_losses_val
        cum_loss += batch_losses_val

        tgt_words_num_to_predict = sum(
            len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
        report_tgt_words += tgt_words_num_to_predict
        cum_tgt_words += tgt_words_num_to_predict
        report_examples += batch_size
        cum_examples += batch_size
        loss_queue.put(report_loss / report_examples)

        if train_iter % log_every == 0:
            print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                    report_loss / report_examples,
                                                                                    math.exp(report_loss / report_tgt_words),
                                                                                    cum_examples,
                                                                                    report_tgt_words / (time.time() - train_time),
                                                                                    time.time() - begin_time), file=sys.stderr)

            train_time = time.time()
            report_loss = report_tgt_words = report_examples = 0.

        # perform validation
        if train_iter % valid_niter == 0:
            print(
                'epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d'
                % (epoch, train_iter, cum_loss / cum_examples,
                   np.exp(cum_loss / cum_tgt_words), cum_examples),
                file=sys.stderr)

            cum_loss = cum_examples = cum_tgt_words = 0.
            valid_num += 1

            print('begin validation ...', file=sys.stderr)

            # compute dev. ppl and bleu
            dev_ppl = evaluate_ppl(
                model, dev_data,
                batch_size=128)  # dev batch size can be a bit larger
            valid_metric = -dev_ppl

            print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl),
                  file=sys.stderr)

            is_better = len(hist_valid_scores
                            ) == 0 or valid_metric > max(hist_valid_scores)
            hist_valid_scores.append(valid_metric)

            if is_better:
                patience = 0
                print('save currently the best model to [%s]' %
                      model_save_path,
                      file=sys.stderr)
                model.save(model_save_path)

                # also save the optimizers' state
                torch.save(optimizer.state_dict(), model_save_path + '.optim')
            elif patience < 10:
                patience += 1
                print('hit patience %d' % patience, file=sys.stderr)

                if patience == 5:
                    num_trial += 1
                    print('hit #%d trial' % num_trial, file=sys.stderr)
                    if num_trial == 3:
                        print('early stop!', file=sys.stderr)
                        exit(0)

                    # decay lr, and restore from previously best checkpoint
                    lr = optimizer.param_groups[0]['lr'] * 0.5
                    print(
                        'load previously best model and decay learning rate to %f'
                        % lr,
                        file=sys.stderr)

                    # load model
                    params = torch.load(
                        model_save_path,
                        map_location=lambda storage, loc: storage)
                    model.load_state_dict(params['state_dict'])
                    model = model.to(device)

                    print('restore parameters of the optimizers',
                          file=sys.stderr)
                    optimizer.load_state_dict(
                        torch.load(model_save_path + '.optim'))

                    # set new lr
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr

                    # reset patience
                    patience = 0

        epoch, voices, tgt_sents = batch_queue.get()
    batch_iter_to_queue_process.join()
    train_data_to_queue_process.join()