Exemple #1
0
def main(_):
    # Load configuration.
    with open(FLAGS.config, 'r') as f:
        config = yaml.load(f)

    # Initialize CoNLL dataset.
    dataset = CoNLLDataset(fname=config['data']['train'], target='lm')

    # Initialize model.
    language_model = LanguageModel(
        vocab_size=len(dataset.token_vocab),
        embedding_dim=config['model']['embedding_dim'],
        hidden_size=config['model']['hidden_size'],
        num_layers=config['model']['num_layers'])
    if torch.cuda.is_available():
        language_model = language_model.cuda()

    # Initialize loss function. NOTE: Manually setting weight of padding to 0.
    weight = torch.ones(len(dataset.token_vocab))
    weight[0] = 0
    if torch.cuda.is_available():
        weight = weight.cuda()
    loss_function = torch.nn.NLLLoss(weight)
    optimizer = torch.optim.Adam(language_model.parameters())

    # Main training loop.
    data_loader = DataLoader(dataset,
                             batch_size=config['training']['batch_size'],
                             shuffle=True,
                             collate_fn=collate_annotations)
    losses = []
    i = 0
    for epoch in range(config['training']['num_epochs']):
        for batch in data_loader:
            inputs, targets, lengths = batch
            optimizer.zero_grad()
            outputs, _ = language_model(inputs, lengths=lengths)

            outputs = outputs.view(-1, len(dataset.token_vocab))
            targets = targets.view(-1)

            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()

            losses.append(loss.data[0])
            if (i % 100) == 0:
                average_loss = np.mean(losses)
                losses = []
                print('Iteration %i - Loss: %0.6f' % (i, average_loss),
                      end='\r')
            if (i % 1000) == 0:
                torch.save(language_model, config['data']['checkpoint'])
            i += 1
    torch.save(language_model, config['data']['checkpoint'])
Exemple #2
0
from model import LanguageModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


data_dir = 'data/Gutenberg/split/'
txt_files = [data_dir + file_name for file_name in os.listdir(data_dir)][:5]


if __name__ == '__main__':

    # checkpoint = torch.load('models/lm/latest.pth')

    model = LanguageModel(n_vocab=10000).to(device)
    # model.load_state_dict(checkpoint['model_state_dict'])
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.95, patience=100, min_lr=1e-6)
    # lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(f'runs/{time.strftime('%Y%m%d-%I:%M%p', time.localtime())}')
    dummy_input = torch.LongTensor([[1]]).to(device)
    writer.add_graph(model, dummy_input)

    # global_step = checkpoint['global_step']
    global_step = 0

    for epoch in range(10):
        pbar = tqdm(TextDataLoaderIterator(txt_files, batch_size=16, block_len=64))
        for data_loader in pbar:
Exemple #3
0
def train(settings, model_dir):

    # training and sampling
    temperature = 0.5
    how_many = 70
    vocab = generate.get_vocab(args.token, small=args.small)

    # create the vocab, model, (and embedding)
    if args.token == 'word':
        emb = generate.get_embedding('word2vec')
        input_size = emb.vectors.shape[1]
        output_size = emb.vectors.shape[0]
    elif args.token == 'character':
        emb = None
        input_size = vocab.size
        output_size = vocab.size

    model = LanguageModel(args.cell, input_size, args.hidden_size, output_size)

    # create criterion and optimiser
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    # create the validation set
    n_valid = 10000
    valid_gen = generate.generate('valid', token=args.token, max_len=args.max_len, small=args.small, batch_size=n_valid)
    for valid_batch, valid_labels in valid_gen:
        # one hot encode
        if args.token == 'character':
            valid_batch = generate.one_hot_encode(valid_batch, vocab)
        # or embed
        elif args.token == 'word':
            valid_batch = generate.w2v_encode(valid_batch, emb, vocab)
        valid_batch, valid_labels = torch.Tensor(valid_batch), torch.Tensor(valid_labels).long()
        break

    # how many epochs do we need?
    batches_per_epoch = generate.get_n_batches_in_epoch('train', args.token, args.batch_size, args.max_len, args.small)

    # training settings
    every_n = int(batches_per_epoch/args.n_saves) if not args.debug else 50
    running_loss = 0
    training_losses = []
    valid_losses = []
    t0 = time.time()
 
    # dump the settings
    pickle.dump(settings, open(model_dir/ 'settings.pkl', 'wb'))
    out_stream = model_dir / 'out_stream.txt'

    # run the training loop
    for epoch in range(1, args.n_epochs+1):

        opening = ['', '#'*20, '# Epoch {} (t={:2.2f}h)'.format(epoch, (time.time() - t0)/3600.), '#'*20, '']
        for txt in opening:
            utils.report(txt, out_stream)

        # create the generator for each epoch
        train_gen = generate.generate('train', token=args.token, max_len=args.max_len,
                                      small=args.small, batch_size=args.batch_size)
        for i, (batch, labels) in enumerate(train_gen):

            # one hot encode
            if args.token == 'character':
                batch = generate.one_hot_encode(batch, vocab)
            # or embed
            elif args.token == 'word':
                batch = generate.w2v_encode(batch, emb, vocab)

            # turn into torch tensors
            batch = torch.Tensor(batch)
            labels = torch.Tensor(labels).long()

            # zero the gradients
            optimizer.zero_grad()

            # forward and backward pass and optimisation step
            outputs = model(batch)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # monitor the losses
            running_loss += loss
            if i % every_n == (every_n-1):

                # append the training losses
                training_losses.append(float(running_loss/every_n))
                running_loss = 0

                # compute the valid loss
                valid_outputs = model(valid_batch)
                valid_losses.append(float(criterion(valid_outputs, valid_labels)))

                # monitor progress
                monitor = ['\n{}/{} done'.format(i+1, batches_per_epoch)]
                monitor.append(generate.compose(model, vocab, emb, 'The Standard Model of', temperature, how_many))
                for m in monitor:
                    utils.report(m, out_stream)
                
                # save the model
                torch.save(model.state_dict(), model_dir/'checkpoints'/'epoch{}_step_{}.pt'.format(epoch, round(i/every_n)))

            if i >= 1000 and args.debug:
                break
    
    # save information
    dt = (time.time() - t0)
    time_txt = '\ntime taken: {:2.2f}h\n'.format(dt/3600.)
    utils.report(time_txt, out_stream)
    utils.report(str(dt/3600.), model_dir/'time.txt')
        
    loss_dict = {'train':training_losses, 'valid':valid_losses, 'time_taken':dt}
    pickle.dump(loss_dict, open(model_dir/ 'losses.pkl', 'wb'))

    # evaluate
    evaluate.plot_losses(model_dir)
Exemple #4
0
def train(opt):

    # Read preprocessed data
    print_line()
    print('Loading training data ...')
    check_name = re.compile('.*\.prep\.train\.pt')
    assert os.path.exists(
        opt.train_data) or check_name.match(opt.train_data) is None
    train_dataset = torch.load(opt.train_data)
    train_dataset.set_batch_size(opt.batch_size)
    print('Done.')

    print_line()
    print('Loading validation data ...')
    check_name = re.compile('.*\.prep\.val\.pt')
    assert os.path.exists(
        opt.val_data) or check_name.match(opt.val_data) is None
    val_dataset = torch.load(opt.val_data)
    val_dataset.set_batch_size(opt.batch_size)
    print('Done.')

    # Build / load  Model
    if opt.model_reload is None:
        print_line()
        print('Build new model...')

        model = LanguageModel(train_dataset.num_vocb,
                              dim_word=opt.dim_word,
                              dim_rnn=opt.dim_rnn,
                              num_layers=opt.num_layers,
                              dropout_rate=opt.dropout_rate)

        model.dictionary = train_dataset.dictionary
        print('Done')
        train_dataset.describe_dataset()
        val_dataset.describe_dataset()

    else:
        print_line()
        print('Loading existing model...')
        model = torch.load(opt.model_reload)
        print('done')
        train_dataset.change_dict(model.dictionary)
        val_dataset.change_dict(model.dictionary)

    model_start_epoch = model.train_info['epoch idx'] - 1
    model_start_batch = model.train_info['batch idx'] - 1

    # Use GPU / CPU
    print_line()
    if opt.cuda:
        model.cuda()
        print('Using GPU %d' % torch.cuda.current_device())
    else:
        print('Using CPU')

    # Crterion, mask padding
    criterion_weight = torch.ones(train_dataset.num_vocb + 1)
    criterion_weight[const.PAD] = 0
    criterion = nn.CrossEntropyLoss(weight=criterion_weight,
                                    size_average=False)
    if opt.cuda:
        criterion = criterion.cuda()

    # Optimizer
    lr = opt.lr
    optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr)

    if (model_start_epoch > opt.epoch):
        print(
            'This model has already trained more than %d epoch, add epoch parameter is you want to continue'
            % (opt.epoch + 1))
        return

    print_line()
    print('')
    if opt.model_reload is None:
        print('Start training new model, will go through %d epoch' % opt.epoch)
    else:
        print('Continue existing model, from epoch %d, batch %d to epoch %d' %
              (model_start_epoch, model_start_batch, opt.epoch))
    print('')

    best_model = model.train_info

    if opt.save_freq == 0:
        opt.save_freq = train_dataset.num_batch - 1

    # Train
    model.train()

    for epoch_idx in range(model_start_epoch, opt.epoch):
        # New epoch
        acc_loss = 0
        acc_count = 0
        start_time = time.time()
        train_dataset.shuffle()

        print_line()
        print('Start epoch %d, learning rate %f ' % (epoch_idx + 1, lr))
        print_line('-')
        epoch_start_time = start_time

        # If load model and continue training
        if epoch_idx == model_start_epoch and model_start_batch > 0:
            start_batch = model_start_batch
        else:
            start_batch = 0

        for batch_idx in range(start_batch, train_dataset.num_batch):
            # Generate batch data
            batch_data, batch_lengths, target_words = train_dataset[batch_idx]

            if opt.cuda:
                batch_data = batch_data.cuda()
                batch_lengths = batch_lengths.cuda()
                target_words = target_words.cuda()

            batch_data = Variable(batch_data, requires_grad=False)
            batch_lengths = Variable(batch_lengths, requires_grad=False)
            target_words = Variable(target_words, requires_grad=False)

            optimizer.zero_grad()

            # Forward
            output_flat = model.forward(batch_data, batch_lengths)

            # Caculate loss
            loss = criterion(output_flat, target_words.view(-1))

            # Backward
            loss.backward()

            # Prevent gradient explode
            torch.nn.utils.clip_grad_norm(model.parameters(), opt.clip)

            # Update parameters
            optimizer.step()

            # Accumulate loss
            acc_loss += loss.data
            acc_count += batch_lengths.data.sum()

            # Display progress
            if batch_idx % opt.display_freq == 0:
                average_loss = acc_loss[0] / acc_count.item()
                print(
                    'Epoch : %d, Batch : %d / %d, Loss : %f, Perplexity : %f, Time : %f'
                    % (epoch_idx + 1, batch_idx,
                       train_dataset.num_batch, average_loss,
                       math.exp(average_loss), time.time() - start_time))

                acc_loss = 0
                acc_count = 0
                start_time = time.time()

            #Save and validate if it is neccesary
            if (1 + batch_idx) % opt.save_freq == 0:

                print_line('-')
                print('Pause training for save and validate.')

                model.eval()
                val_loss = evaluate(model=model,
                                    eval_dataset=val_dataset,
                                    cuda=opt.cuda,
                                    criterion=criterion)
                model.train()

                print('Validation Loss : %f' % val_loss)
                print('Validation Perplexity : %f' % math.exp(val_loss))

                model_savename = opt.model_name + '-e_' + str(
                    epoch_idx +
                    1) + '-b_' + str(batch_idx + 1) + '-ppl_' + str(
                        int(math.exp(val_loss))) + '.pt'

                model.val_loss = val_loss
                model.val_ppl = math.exp(val_loss)
                model.epoch_idx = epoch_idx + 1
                model.batch_idx = batch_idx + 1

                model.train_info['val loss'] = val_loss
                model.train_info['train loss'] = math.exp(val_loss)
                model.train_info['epoch idx'] = epoch_idx + 1
                model.train_info['batch idx'] = batch_idx + 1
                model.train_info['val ppl'] = math.exp(model.val_loss)
                model.train_info['save name'] = model_savename

                try:
                    torch.save(model, model_savename)
                except:
                    print('Failed to save model!')

                if model.val_loss < best_model['val loss']:
                    print_line('-')
                    print('New best model on validation set')
                    best_model = model.train_info
                    shutil.copy2(best_model['name'],
                                 opt.model_name + '.best.pt')

                print_line('-')
                print('Save model at %s' % (model_savename))
                print_line('-')
                print('Continue Training...')

        print_line('-')
        print('Epoch %d finished, spend %d s' %
              (epoch_idx + 1, time.time() - epoch_start_time))

        # Update lr if needed
        lr *= opt.lr_decay
        optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr)

    # Finish training
    print_line()
    print(' ')
    print('Finish training %d epochs!' % opt.epoch)
    print(' ')
    print_line()
    print('Best model:')
    print('Epoch : %d, Batch : %d ,Loss : %f, Perplexity : %f' %
          (best_model['epoch idx'], best_model['batch idx'],
           best_model['val loss'], best_model['val ppl']))
    print_line('-')

    print('Save best model at %s' % (opt.model_name + '.best.pt'))
    shutil.copy2(best_model['name'], opt.model_name + '.best.pt')
    print_line()
Exemple #5
0
    '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube700.txt'
]

test = [
    '/media/lytic/STORE/ru_open_stt_wav/text/asr_calls_2_val.txt',
    '/media/lytic/STORE/ru_open_stt_wav/text/buriy_audiobooks_2_val.txt',
    '/media/lytic/STORE/ru_open_stt_wav/text/public_youtube700_val.txt'
]

train = TextDataset(train, labels, batch_size)
test = TextDataset(test, labels, batch_size)

test.shuffle(0)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5)
scheduler = StepLR(optimizer, step_size=10000, gamma=0.99)

for epoch in range(20):

    model.train()

    hidden = model.step_init(batch_size)

    err = AverageMeter('loss')
    grd = AverageMeter('gradient')

    train.shuffle(epoch)

    loader = DataLoader(train,
                        pin_memory=True,
        pickle.dump({'data': test_input, 'label': test_label}, f)

with open(train_pkl_path, 'rb') as f:
    train_data = pickle.load(f)
with open(test_pkl_path, 'rb') as f:
    test_data = pickle.load(f)

model = LanguageModel(dict_size,
                      args.hidden_size,
                      args.hidden_size,
                      n_layer=1,
                      drop_rate=args.drop_rate,
                      adaptive_softmax=with_adaptive,
                      cutoff=cutoff_list)
model  #.cuda()
optimizer = optim.Adagrad(model.parameters(),
                          lr=args.learning_rate,
                          lr_decay=args.learning_rate_decay,
                          weight_decay=args.weight_decay)

if with_adaptive:
    print('Use adaptive softmax.')
    criterion = AdaptiveLoss(cutoff_list)
else:
    print('Use common softmax.')
    criterion = nn.CrossEntropyLoss()


def train(batch_size, clip_global_norm_rate):
    pbar = tqdm.tqdm(zip(train_data['data'], train_data['label']))
    hidden = model.init_hidden(batch_size)
Exemple #7
0
    trainSet, vocab = creatDataSet('./data', 'ptb.train.txt')
    testSet, _ = creatDataSet('./data', 'ptb.test.txt')
    validSet, _ = creatDataSet('./data', 'ptb.valid.txt')

    word2idx, idx2word = word2index(vocab)

    ### Parameters Set ##########
    VOCAB_SIZE = len(word2idx)
    EMBEDDING_SIZE = 128
    HIDDEN_SIZE = 1024
    N_LAYERS = 1
    DOPROUT_P = 0.5
    BATCH_SIZE = 20
    SEQ_LENGTH = 30
    EPOCH = 40
    LEARNING_RATE = 0.01
    #############################

    train_data = batchify(prepare_sequence(trainSet, word2idx), BATCH_SIZE)
    test_data = batchify(prepare_sequence(testSet, word2idx), BATCH_SIZE)
    valid_data = batchify(prepare_sequence(validSet, word2idx), BATCH_SIZE)

    model = LanguageModel(VOCAB_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, N_LAYERS,
                          DOPROUT_P).to(device)
    model.weight_init()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    trainModel(model, train_data, valid_data, BATCH_SIZE, SEQ_LENGTH, EPOCH)
    testModel(model, test_data, BATCH_SIZE, SEQ_LENGTH)
Exemple #8
0
    cuda = config.use_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if cuda else 'cpu')

    model = LanguageModel(n_class=len(char2id),
                          n_layers=config.n_layers,
                          rnn_cell='lstm',
                          hidden_size=config.hidden_size,
                          dropout_p=config.dropout_p,
                          max_length=config.max_len,
                          sos_id=SOS_token,
                          eos_id=EOS_token,
                          device=device)
    model.flatten_parameters()
    model = nn.DataParallel(model).to(device)

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Prepare loss
    weight = torch.ones(len(char2id)).to(device)
    perplexity = Perplexity(weight, PAD_token, device)
    optimizer = optim.Adam(model.module.parameters(), lr=config.lr)

    corpus = load_corpus('./data/corpus_df.bin')
    total_time_step = math.ceil(len(corpus) / config.batch_size)

    train_set = CustomDataset(corpus[:-10000], SOS_token, EOS_token,
                              config.batch_size)
    valid_set = CustomDataset(corpus[-10000:], SOS_token, EOS_token,
                              config.batch_size)