def main(config):
    mode = config['mode']
    if mode == 'preproc':
        preproc(config)
        return

    print('initializing device ...', end='', file=sys.stderr, flush=True)
    dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu'])
    Device.set_default(dev)
    print("done.", file=sys.stderr, flush=True)

    prefix = config['model_prefix']
    if mode == 'train':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.init(config['vocabulary_size'], config['d_model'],
                   config['d_ff'])
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        train(model, optimizer, config, 1e10)
    elif mode == 'resume':
        print('loading model/optimizer ... ',
              end='',
              file=sys.stderr,
              flush=True)
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        optimizer.load(prefix + '.optimizer')
        with Path(prefix).with_suffix('.valid').open() as f:
            valid_ppl = float(f.read().strip())
        print('done.', file=sys.stderr, flush=True)
        train(model, optimizer, config, valid_ppl)
    elif mode == 'test':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        test(model, config)
Beispiel #2
0
def run_validation(epoch, dataset_name: str):
    dataset = load_data(dataset_name)
    print("Number of %s instances: %d" % (dataset_name, len(dataset)))

    model = Transformer(
        i2w=i2w, use_knowledge=args.use_knowledge, args=args, test=True
    ).cuda()
    model.load("{0}model_{1}.bin".format(args.save_path, epoch))
    model.transformer.eval()
    # Iterate over batches
    num_batches = math.ceil(len(dataset) / args.batch_size)
    cum_loss = 0
    cum_words = 0
    predicted_sentences = []
    indices = list(range(len(dataset)))
    for batch in tqdm(range(num_batches)):
        # Prepare batch
        batch_indices = indices[batch * args.batch_size : (batch + 1) * args.batch_size]
        batch_rows = [dataset[i] for i in batch_indices]

        # Encode batch. If facts are being used, they'll be prepended to the input
        input_seq, input_lens, target_seq, target_lens = model.prep_batch(batch_rows)

        # Decode batch
        predicted_sentences += model.decode(input_seq, input_lens)

        # Evaluate batch
        cum_loss += model.eval_ppl(input_seq, input_lens, target_seq, target_lens)
        cum_words += (target_seq != w2i["_pad"]).sum().item()

        # Log epoch
    ppl = math.exp(cum_loss / cum_words)
    print("{} Epoch: {} PPL: {}".format(dataset_name, epoch, ppl))
    # Save predictions
    open(
        "{0}{1}_epoch_{2}.pred".format(args.save_path, dataset_name, str(epoch)), "w+"
    ).writelines([l + "\n" for l in predicted_sentences])