Esempio n. 1
0
def main():
    inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2])

    #coloredlogs.install(level='DEBUG')
    num_layers = 2
    rnn_type = 'lstm'
    dropout = 0.5
    emb_size = 50
    hidden_size = 256
    learning_rate = 0.001
    n_tokens = len(idx_to_token)

    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    optimiser = optim.Adam(model.parameters(), lr=learning_rate)

    try:
        model, optimiser, epoch, valid_loss_min = load_ckp(
            checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser)
        generate_sample(model, token_to_idx, idx_to_token, n_tokens=20)
    except KeyboardInterrupt:
        print('Aborted!')
Esempio n. 2
0
def train(opt, th):
    ''' 训练模型
    Args:
        opt -- 参数
        th -- TextConverter对象
    Returns:
        None
    '''
    # 1. 训练数据
    data_set = TextDataset(opt.train_data_path, th)
    train_data = DataLoader(data_set,
                            opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers)
    # 2. 初始化模型
    model = CharRNN(th.vocab_size, opt.embed_size, opt.hidden_size,
                    opt.n_layers, opt.dropout_p, opt.bidir)
    if USE_CUDA:
        model = model.cuda(DEVICE_ID)

    # 3. 优化配置
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate)

    # 4. 训练
    for e in range(opt.max_epochs):
        epoch_loss = 0
        hidden = None
        for input_seqs, labels in train_data:
            # 都是[b, seq_len],最后一个不足b
            # 准备input和hidden
            b = input_seqs.shape[0]
            if hidden is not None:
                hidden = hidden[:, :b, :]
            labels = labels.long().view(-1)
            input_seqs, labels = get_variable(input_seqs), get_variable(labels)

            # 前向计算
            probs, hidden = model(input_seqs, hidden)
            probs = probs.view(-1, th.vocab_size)

            # loss和反向
            loss = criterion(probs, labels)
            optimizer.zero_grad()
            loss.backward(retain_graph=True)

            # 优化
            nn.utils.clip_grad_norm(model.parameters(), 5)
            optimizer.step()

            epoch_loss += loss.data[0]
        # 交叉熵
        entropy_loss = epoch_loss / len(train_data)
        perplexity = np.exp(entropy_loss)
        info = "epoch: {}, perp: {:.3f}".format(e + 1, perplexity)
        print(info)
        if perplexity <= opt.min_perplexity or e == opt.max_epochs - 1:
            print("best model")
            torch.save(model, opt.model_path)
            break
Esempio n. 3
0
def main():

    logging.root.setLevel(logging.NOTSET)

    inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2])

    #coloredlogs.install(level='DEBUG')
    num_layers = 2
    rnn_type = 'lstm'
    dropout = 0.5
    emb_size = 50
    hidden_size = 256
    learning_rate = 0.001
    n_tokens = len(idx_to_token)

    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    optimiser = optim.Adam(model.parameters(), lr=learning_rate)

    s1 = "bababac bababa bacc bac bacc"
    s2 = "bababac baba bac bacc bac"
    s3 = "baba"
    s4 = "ccab cab ccab ababab cababab"

    try:
        model, optimiser, epoch, valid_loss_min = load_ckp(
            checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser)
        score(model, token_to_idx, idx_to_token, seed_phrase=s1)
        score(model, token_to_idx, idx_to_token, seed_phrase=s2)
        score(model, token_to_idx, idx_to_token, seed_phrase=s3)
        score(model, token_to_idx, idx_to_token, seed_phrase=s4)
    except KeyboardInterrupt:
        print('Aborted!')
Esempio n. 4
0
def main():

    # Parse command line arguments
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--train_set', type=str, required=True)
    argparser.add_argument('--valid_set', type=str, required=True)
    argparser.add_argument('--model', type=str, default="gru")
    argparser.add_argument('--model_file', type=str, default='None')
    argparser.add_argument('--n_epochs', type=int, default=30)
    argparser.add_argument('--hidden_size', type=int, default=200)
    argparser.add_argument('--n_layers', type=int, default=3)
    argparser.add_argument('--learning_rate', type=float, default=0.01)
    argparser.add_argument('--chunk_len', type=int, default=200)
    argparser.add_argument('--batch_size', type=int, default=300)
    argparser.add_argument('--num_workers', type=int, default=8)
    argparser.add_argument('--cuda', action='store_true')
    argparser.add_argument('--cpu', action='store_true')
    args = argparser.parse_args()

    # Initialize models and start training

    if args.model_file == 'None':
        decoder = CharRNN(
            n_characters,
            args.hidden_size,
            n_characters,
            model=args.model,
            n_layers=args.n_layers,
        )
        epoch_from = 1
        prev_valid_loss = sys.maxsize
        old_filename = None
    else:
        if args.cpu:
            decoder = torch.load(args.model_file,
                                 map_location=lambda storage, loc: storage)
        else:
            decoder = torch.load(args.model_file)
        info = args.model_file.split('_')
        args.model = info[0]
        epoch_from = int(info[1][5:]) + 1
        args.n_layers = int(info[2][7:])
        args.hidden_size = int(info[5][2:])
        prev_valid_loss = float(info[7][4:-3])
        old_filename = args.model_file

        print(
            "successfully loaded model! Continuing from epoch {0} with valid loss {1}"
            .format(epoch_from, prev_valid_loss))

    optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    if args.cuda:
        decoder.cuda()

    start = time.time()

    train_dataset = WordDataset(args.train_set, args.chunk_len)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=True)

    valid_dataset = WordDataset(args.valid_set, args.chunk_len)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=True)

    try:

        print('Training for maximum {} epochs...'.format(args.n_epochs))
        for epoch in range(epoch_from, args.n_epochs + 1):

            train_loss, num_samples = 0, 0
            for s in tqdm(train_dataloader):
                input_, target = prep_data(s['input'], s['target'], args.cuda)
                train_loss += train(decoder, optimizer, criterion, input_,
                                    target, args.batch_size, args.chunk_len,
                                    args.cuda)
                num_samples += 1
            train_loss /= num_samples

            valid_loss, num_samples = 0, 0
            for s in valid_dataloader:
                input_, target = prep_data(s['input'], s['target'], args.cuda)
                valid_loss += evaluate(decoder, criterion, input_, target,
                                       args.batch_size, args.chunk_len,
                                       args.cuda)
                num_samples += 1
            valid_loss /= num_samples

            elapsed = time_since(start)
            pcnt = epoch / args.n_epochs * 100
            log = (
                '{} elapsed - epoch #{} ({:.1f}%) - training loss (BPC) {:.2f} '
                '- validation loss (BPC) {:.2f}')
            print(log.format(elapsed, epoch, pcnt, train_loss, valid_loss))

            if valid_loss > prev_valid_loss:
                print('No longer learning, just overfitting, stopping here.')
                break
            else:
                filename = model_file_name(decoder, epoch, train_loss,
                                           valid_loss)
                torch.save(decoder, filename)
                print('Saved as {}'.format(filename))
                if old_filename:
                    os.remove(old_filename)
                old_filename = filename

            prev_valid_loss = valid_loss

    except KeyboardInterrupt:
        print("Saving before quit...")
        try:
            valid_loss
        except:
            valid_loss = 'no_val'
        filename = model_file_name(decoder, epoch, train_loss, valid_loss)
        torch.save(decoder, filename)
        print('Saved as {}'.format(filename))
Esempio n. 5
0
def train(filename, rnn_type, num_layers, dropout, emb_size, hidden_size,
          num_epochs, batch_size, learning_rate, num_samples, seed_phrase,
          sample_every, checkpoint_path):
    """ Trains a character-level Recurrent Neural Network in PyTorch.

    Args: optional arguments [python train.py --help]
    """
    logging.info('reading `{}` for character sequences'.format(filename))
    inputs, token_to_idx, idx_to_token = load_dataset(file_name=filename)

    idx_to_token.remove('~')
    idx_to_token.remove('#')
    idx_to_token = ['~'] + idx_to_token + ['#']
    token_to_idx = {token: idx_to_token.index(token) for token in idx_to_token}

    logging.info(idx_to_token)
    logging.info(token_to_idx)

    n_tokens = len(idx_to_token)
    max_length = inputs.size(1)

    logging.debug('creating char-level RNN model')
    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    logging.debug('defining model training operations')
    # define training procedures and operations for training the model
    criterion = nn.NLLLoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     min_lr=1e-6,
                                                     factor=0.1,
                                                     patience=7,
                                                     verbose=True)

    # train-val-test split of the dataset
    split_index = int(0.9 * inputs.size(0))
    train_tensors, inputs = inputs[:split_index], inputs[split_index:]
    split_index = int(0.5 * inputs.size(0))
    val_tensors, test_tensors = inputs[:split_index], inputs[split_index:]
    del inputs
    logging.info('train tensors: {}'.format(train_tensors.size()))
    logging.info('val tensors: {}'.format(val_tensors.size()))
    logging.info('test tensors: {}'.format(test_tensors.size()))

    logging.debug('training char-level RNN model')
    # loop over epochs
    for epoch in range(1, num_epochs + 1):
        epoch_loss, n_iter = 0.0, 0
        # loop over batches
        for tensors in tqdm(iterate_minibatches(train_tensors,
                                                batchsize=batch_size),
                            desc='Epoch[{}/{}]'.format(epoch, num_epochs),
                            leave=False,
                            total=train_tensors.size(0) // batch_size):
            # optimize model parameters
            epoch_loss += optimize(model, tensors, max_length, n_tokens,
                                   criterion, optimizer)
            n_iter += 1
        # evaluate model after every epoch
        val_loss = evaluate(model, val_tensors, max_length, n_tokens,
                            criterion)
        # lr_scheduler decreases lr when stuck at local minima
        scheduler.step(val_loss)
        # log epoch status info
        logging.info(
            'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                epoch, num_epochs, epoch_loss / n_iter, val_loss))

        # sample from the model every few epochs
        if epoch % sample_every == 0:
            print(
                'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                    epoch, num_epochs, epoch_loss / n_iter, val_loss))
            for _ in range(num_samples):
                sample = generate_sample(model,
                                         token_to_idx,
                                         idx_to_token,
                                         max_length,
                                         n_tokens,
                                         seed_phrase=seed_phrase)
                logging.debug(sample)

        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': val_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        # save checkpoint
        best_model_path = checkpoint_path
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
Esempio n. 6
0
def train( filename = "poets.txt", hidden_size = 128, n_layers = 2, 
          learning_rate=0.01, n_epochs = 10000, chunk_len=20, batch_size = 1024,
          print_every =  100 ):
    #%% Global Configuration
    file, file_len, all_characters, n_characters = helpers.read_file( filename )
    
    sentences = file.split("\n")
    
    print( "There are %d unique characters in the dataset" % n_characters )
    print( "There are %d sentences in the dataset with total of %d characters" % ( len(sentences), len(file) ) )
    
    #%% Model Saving and Loading
    model_filename = helpers.pt_name

    if os.path.exists( model_filename ):
        decoder = load( model_filename )
    else:
        decoder = CharRNN(
            n_characters,
            hidden_size,
            n_characters,
            model = helpers.mcell,
            n_layers=n_layers,
        )
        
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    if helpers.USE_CUDA: decoder.cuda()
    
    start = time.time()
    all_losses = []
    
    try:
        print("Training for %d epochs..." % n_epochs)
        for epoch in range(n_epochs):
            
            if epoch != 0 and epoch % 1000 == 0: 
                learning_rate /= 2
                decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

            inp, target = random_training_set( sentences, chunk_len, batch_size )
            
            loss = train_one_entry(decoder, decoder_optimizer, criterion, 
                                        inp, target, chunk_len, batch_size )
            
            all_losses.append( loss )
    
            if epoch != 0 and epoch % print_every == 0:
                print('%s: [%s (%d %d%%) %.4f]' % ( time.ctime(), helpers.time_since(start), epoch, epoch / n_epochs * 100, loss))
                print(generate(decoder, '新年', 100, cuda= helpers.USE_CUDA), '\n')
                    
                save( decoder, model_filename )
    
    except KeyboardInterrupt:
        save( decoder, model_filename )
        
        
    import matplotlib.pyplot as plt
    plt.plot( all_losses )
    plt.xlabel( "iteration" )
    plt.ylabel( "train loss" )
Esempio n. 7
0
    n_characters = len(string.printable)

    decoder = CharRNN(
        n_characters,
        args.hidden_size,
        n_characters,
        model=args.model,
        n_layers=args.n_layers,
    )

    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    if args.cuda:
        decoder.cuda()

    start = time.time()
    all_losses = []
    loss_avg = 0

    try:
        print("Training for %d epochs..." % args.n_epochs)
        for epoch in tqdm(range(1, args.n_epochs + 1)):
            loss = train(*random_training_set(args.chunk_len, args.batch_size))
            loss_avg += loss

            if epoch % args.print_every == 0:
                print('[%s (%d %d%%) %.4f]' %
                      (time_since(start), epoch, epoch / args.n_epochs * 100,
                       loss))