Esempio n. 1
0
def freestyle(loc):  # TODO

    # load data
    model_dir = Path(loc)
    settings = pickle.load(open(model_dir / 'settings.pkl', 'rb'))
    print(settings)

    # settings
    cell = settings['cell']
    hidden_size = settings['hidden_size']
    token = settings['token']
    small = settings['small']
    how_many = 100

    # load the models
    vocab = generate.get_vocab(token, small)
    if token == 'word':
        emb = generate.get_embedding('word2vec')
        input_size = emb.vectors.shape[1]
        output_size = emb.vectors.shape[0]
    elif token == 'character':
        emb = None
        input_size = vocab.size
        output_size = vocab.size
    fnames = os.listdir(model_dir / 'checkpoints')
    fname = fnames[-1]

    # load the model
    model = LanguageModel(cell, input_size, hidden_size, output_size)
    model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname))
    model.eval()

    # monitor
    sents = [
        'The Standard ', 'non-abelian', 'silicon pixel detector',
        'estimate the', '[23] ATLAS'
    ]
    temperatures = [0.01 + 0.1 * i for i in range(11)]
    eval_stream = model_dir / 'evaluate_stream.txt'

    for temperature in temperatures:
        txt = '\nTemperature = {}'.format(temperature)
        utils.report(txt, eval_stream)
        for sent in sents:
            txt = generate.compose(model, vocab, emb, sent, temperature,
                                   how_many)
            utils.report(txt, eval_stream)
Esempio n. 2
0
def train(opt):

    # Read preprocessed data
    print_line()
    print('Loading training data ...')
    check_name = re.compile('.*\.prep\.train\.pt')
    assert os.path.exists(
        opt.train_data) or check_name.match(opt.train_data) is None
    train_dataset = torch.load(opt.train_data)
    train_dataset.set_batch_size(opt.batch_size)
    print('Done.')

    print_line()
    print('Loading validation data ...')
    check_name = re.compile('.*\.prep\.val\.pt')
    assert os.path.exists(
        opt.val_data) or check_name.match(opt.val_data) is None
    val_dataset = torch.load(opt.val_data)
    val_dataset.set_batch_size(opt.batch_size)
    print('Done.')

    # Build / load  Model
    if opt.model_reload is None:
        print_line()
        print('Build new model...')

        model = LanguageModel(train_dataset.num_vocb,
                              dim_word=opt.dim_word,
                              dim_rnn=opt.dim_rnn,
                              num_layers=opt.num_layers,
                              dropout_rate=opt.dropout_rate)

        model.dictionary = train_dataset.dictionary
        print('Done')
        train_dataset.describe_dataset()
        val_dataset.describe_dataset()

    else:
        print_line()
        print('Loading existing model...')
        model = torch.load(opt.model_reload)
        print('done')
        train_dataset.change_dict(model.dictionary)
        val_dataset.change_dict(model.dictionary)

    model_start_epoch = model.train_info['epoch idx'] - 1
    model_start_batch = model.train_info['batch idx'] - 1

    # Use GPU / CPU
    print_line()
    if opt.cuda:
        model.cuda()
        print('Using GPU %d' % torch.cuda.current_device())
    else:
        print('Using CPU')

    # Crterion, mask padding
    criterion_weight = torch.ones(train_dataset.num_vocb + 1)
    criterion_weight[const.PAD] = 0
    criterion = nn.CrossEntropyLoss(weight=criterion_weight,
                                    size_average=False)
    if opt.cuda:
        criterion = criterion.cuda()

    # Optimizer
    lr = opt.lr
    optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr)

    if (model_start_epoch > opt.epoch):
        print(
            'This model has already trained more than %d epoch, add epoch parameter is you want to continue'
            % (opt.epoch + 1))
        return

    print_line()
    print('')
    if opt.model_reload is None:
        print('Start training new model, will go through %d epoch' % opt.epoch)
    else:
        print('Continue existing model, from epoch %d, batch %d to epoch %d' %
              (model_start_epoch, model_start_batch, opt.epoch))
    print('')

    best_model = model.train_info

    if opt.save_freq == 0:
        opt.save_freq = train_dataset.num_batch - 1

    # Train
    model.train()

    for epoch_idx in range(model_start_epoch, opt.epoch):
        # New epoch
        acc_loss = 0
        acc_count = 0
        start_time = time.time()
        train_dataset.shuffle()

        print_line()
        print('Start epoch %d, learning rate %f ' % (epoch_idx + 1, lr))
        print_line('-')
        epoch_start_time = start_time

        # If load model and continue training
        if epoch_idx == model_start_epoch and model_start_batch > 0:
            start_batch = model_start_batch
        else:
            start_batch = 0

        for batch_idx in range(start_batch, train_dataset.num_batch):
            # Generate batch data
            batch_data, batch_lengths, target_words = train_dataset[batch_idx]

            if opt.cuda:
                batch_data = batch_data.cuda()
                batch_lengths = batch_lengths.cuda()
                target_words = target_words.cuda()

            batch_data = Variable(batch_data, requires_grad=False)
            batch_lengths = Variable(batch_lengths, requires_grad=False)
            target_words = Variable(target_words, requires_grad=False)

            optimizer.zero_grad()

            # Forward
            output_flat = model.forward(batch_data, batch_lengths)

            # Caculate loss
            loss = criterion(output_flat, target_words.view(-1))

            # Backward
            loss.backward()

            # Prevent gradient explode
            torch.nn.utils.clip_grad_norm(model.parameters(), opt.clip)

            # Update parameters
            optimizer.step()

            # Accumulate loss
            acc_loss += loss.data
            acc_count += batch_lengths.data.sum()

            # Display progress
            if batch_idx % opt.display_freq == 0:
                average_loss = acc_loss[0] / acc_count.item()
                print(
                    'Epoch : %d, Batch : %d / %d, Loss : %f, Perplexity : %f, Time : %f'
                    % (epoch_idx + 1, batch_idx,
                       train_dataset.num_batch, average_loss,
                       math.exp(average_loss), time.time() - start_time))

                acc_loss = 0
                acc_count = 0
                start_time = time.time()

            #Save and validate if it is neccesary
            if (1 + batch_idx) % opt.save_freq == 0:

                print_line('-')
                print('Pause training for save and validate.')

                model.eval()
                val_loss = evaluate(model=model,
                                    eval_dataset=val_dataset,
                                    cuda=opt.cuda,
                                    criterion=criterion)
                model.train()

                print('Validation Loss : %f' % val_loss)
                print('Validation Perplexity : %f' % math.exp(val_loss))

                model_savename = opt.model_name + '-e_' + str(
                    epoch_idx +
                    1) + '-b_' + str(batch_idx + 1) + '-ppl_' + str(
                        int(math.exp(val_loss))) + '.pt'

                model.val_loss = val_loss
                model.val_ppl = math.exp(val_loss)
                model.epoch_idx = epoch_idx + 1
                model.batch_idx = batch_idx + 1

                model.train_info['val loss'] = val_loss
                model.train_info['train loss'] = math.exp(val_loss)
                model.train_info['epoch idx'] = epoch_idx + 1
                model.train_info['batch idx'] = batch_idx + 1
                model.train_info['val ppl'] = math.exp(model.val_loss)
                model.train_info['save name'] = model_savename

                try:
                    torch.save(model, model_savename)
                except:
                    print('Failed to save model!')

                if model.val_loss < best_model['val loss']:
                    print_line('-')
                    print('New best model on validation set')
                    best_model = model.train_info
                    shutil.copy2(best_model['name'],
                                 opt.model_name + '.best.pt')

                print_line('-')
                print('Save model at %s' % (model_savename))
                print_line('-')
                print('Continue Training...')

        print_line('-')
        print('Epoch %d finished, spend %d s' %
              (epoch_idx + 1, time.time() - epoch_start_time))

        # Update lr if needed
        lr *= opt.lr_decay
        optimizer = getattr(optim, opt.optimizer)(model.parameters(), lr=lr)

    # Finish training
    print_line()
    print(' ')
    print('Finish training %d epochs!' % opt.epoch)
    print(' ')
    print_line()
    print('Best model:')
    print('Epoch : %d, Batch : %d ,Loss : %f, Perplexity : %f' %
          (best_model['epoch idx'], best_model['batch idx'],
           best_model['val loss'], best_model['val ppl']))
    print_line('-')

    print('Save best model at %s' % (opt.model_name + '.best.pt'))
    shutil.copy2(best_model['name'], opt.model_name + '.best.pt')
    print_line()
Esempio n. 3
0
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 5)

        optimizer.step()
        scheduler.step()

        err.update(loss.item())
        grd.update(grad_norm)

        lr = scheduler.get_lr()[0]

        progress.set_description('epoch %d lr %.6f %s %s' %
                                 (epoch + 1, lr, err, grd))

    model.eval()

    err = AverageMeter('loss')

    loader = DataLoader(test,
                        pin_memory=True,
                        num_workers=4,
                        batch_size=bptt,
                        drop_last=True)
    progress = tqdm(loader)

    hidden = model.step_init(batch_size)

    with torch.no_grad():
        for inputs, targets in progress:
            inputs = inputs.cuda(non_blocking=True)
Esempio n. 4
0
def plot_switch_prob(loc):

    # load settings
    model_dir = Path(loc)
    settings = pickle.load(open(model_dir / 'settings.pkl', 'rb'))
    cell = settings['cell']
    hidden_size = settings['hidden_size']
    token = settings['token']
    small = settings['small']
    max_len = settings['max_len']

    # load the final model
    vocab = generate.get_vocab(token, small)
    if token == 'word':
        emb = generate.get_embedding('word2vec')
        input_size = emb.vectors.shape[1]
        output_size = emb.vectors.shape[0]
    elif token == 'character':
        emb = None
        input_size = vocab.size
        output_size = vocab.size

    fnames = os.listdir(model_dir / 'checkpoints')
    fname = fnames[-1]

    # load the model
    model = LanguageModel(cell, input_size, hidden_size, output_size)
    model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname))
    model.eval()

    # prepare the base and replacement batch
    N = 100
    gen = generate.generate('valid',
                            token=token,
                            max_len=max_len,
                            small=small,
                            batch_size=N)
    base_batch, _ = next(gen)
    repl_batch, _ = next(gen)

    # compute the average KL divs over the batch
    depths = [i for i in range(max_len)]
    switch_probs = [
        compute_switch_prob(model, base_batch, repl_batch, keep_depth, vocab,
                            emb) for keep_depth in depths
    ]

    # make the plot
    fig, ax = plt.subplots()
    ax.plot(depths, switch_probs, 'tomato')
    ax.plot(depths, [0.01] * len(depths), 'k')
    ax.set_yscale('log')
    ax.set_ylim(0.001, 1)
    ax.set_xlim(0, max_len)
    ax.set_title('Probability of switching predicted character\n{}'.format(
        model_dir.name),
                 fontsize=7)
    ax.set_xlabel('sequence keep-depth')
    ax.set_ylabel('Probabillity')
    ax.grid()
    plt.savefig(model_dir / 'SwitchProbability.pdf')
Esempio n. 5
0
def plot_losses(loc):

    # load data
    model_dir = Path(loc)
    settings = pickle.load(open(model_dir / 'settings.pkl', 'rb'))

    # settings
    cell = settings['cell']
    hidden_size = settings['hidden_size']
    token = settings['token']
    small = settings['small']
    max_len = settings['max_len']
    n_epochs = settings['n_epochs']
    n_saves = settings['n_saves']
    criterion = nn.CrossEntropyLoss()

    # load the models
    models = []
    vocab = generate.get_vocab(token, small)
    if token == 'word':
        emb = generate.get_embedding('word2vec')
        input_size = emb.vectors.shape[1]
        output_size = emb.vectors.shape[0]
    elif token == 'character':
        emb = None
        input_size = vocab.size
        output_size = vocab.size

    for fname in os.listdir(model_dir / 'checkpoints'):
        model = LanguageModel(cell, input_size, hidden_size, output_size)
        model.load_state_dict(torch.load(model_dir / 'checkpoints' / fname))
        model.eval()
        models.append(model)

    # prepare training and validation sets
    N = 10000
    splits = ['train', 'valid']
    gens = {
        split: generate.generate(split,
                                 token=token,
                                 max_len=max_len,
                                 small=small,
                                 batch_size=N)
        for split in splits
    }
    batch, labels = {}, {}
    for split in splits:
        for b, l in gens[split]:

            # one hot encode
            if token == 'character':
                b = generate.one_hot_encode(b, vocab)
            # or embed
            elif token == 'word':
                b = generate.w2v_encode(b, emb, vocab)

            batch[split], labels[split] = torch.Tensor(b), torch.Tensor(
                l).long()
            break

    # evaluate the models
    loss = {split: [] for split in splits}
    acc = {split: [] for split in splits}
    for i, model in enumerate(models):
        t0 = time.time()
        print(i)
        for split in splits:
            # loss
            outputs = model(batch[split])
            l = criterion(outputs, labels[split])
            loss[split].append(float(l))
            # accuracy
            _, preds = torch.max(outputs, 1)
            a = sum(preds == labels[split]) / float(N)
            acc[split].append(float(a))
        print('{:2.2f}s'.format(time.time() - t0))

    for split in splits:
        with open(model_dir / 'best_{}_acc.txt'.format(split), 'w') as handle:
            best = max(acc[split])
            handle.write('{}\n'.format(best))

    # plot both quantities
    for quantity, description in zip([loss, acc], ['Loss', 'Accuracy']):
        fig, ax = plt.subplots()
        for split in splits:
            xs = (1 + np.arange(len(quantity[split]))) / n_saves
            ax.plot(xs, quantity[split], label=split)
        ax.set_xlabel('Training epoch')
        if n_epochs > 1:
            ax.set_xlabel('Epoch')
        ax.set_ylabel(description)
        upper = ax.get_ylim()[1] if description == 'Loss' else 1
        ax.set_ylim(0, upper)
        ax.set_xlim(0, ax.get_xlim()[1])
        ax.set_title(model_dir.name, fontsize=7)
        ax.legend()
        ax.grid(alpha=0.5, which='both')
        plt.savefig(model_dir / '{}.pdf'.format(description))