def main(config): mode = config['mode'] if mode == 'preproc': preproc(config) return print('initializing device ...', end='', file=sys.stderr, flush=True) dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu']) Device.set_default(dev) print("done.", file=sys.stderr, flush=True) prefix = config['model_prefix'] if mode == 'train': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.init(config['vocabulary_size'], config['d_model'], config['d_ff']) optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) train(model, optimizer, config, 1e10) elif mode == 'resume': print('loading model/optimizer ... ', end='', file=sys.stderr, flush=True) model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) optimizer.load(prefix + '.optimizer') with Path(prefix).with_suffix('.valid').open() as f: valid_ppl = float(f.read().strip()) print('done.', file=sys.stderr, flush=True) train(model, optimizer, config, valid_ppl) elif mode == 'test': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') test(model, config)
def run_validation(epoch, dataset_name: str): dataset = load_data(dataset_name) print("Number of %s instances: %d" % (dataset_name, len(dataset))) model = Transformer( i2w=i2w, use_knowledge=args.use_knowledge, args=args, test=True ).cuda() model.load("{0}model_{1}.bin".format(args.save_path, epoch)) model.transformer.eval() # Iterate over batches num_batches = math.ceil(len(dataset) / args.batch_size) cum_loss = 0 cum_words = 0 predicted_sentences = [] indices = list(range(len(dataset))) for batch in tqdm(range(num_batches)): # Prepare batch batch_indices = indices[batch * args.batch_size : (batch + 1) * args.batch_size] batch_rows = [dataset[i] for i in batch_indices] # Encode batch. If facts are being used, they'll be prepended to the input input_seq, input_lens, target_seq, target_lens = model.prep_batch(batch_rows) # Decode batch predicted_sentences += model.decode(input_seq, input_lens) # Evaluate batch cum_loss += model.eval_ppl(input_seq, input_lens, target_seq, target_lens) cum_words += (target_seq != w2i["_pad"]).sum().item() # Log epoch ppl = math.exp(cum_loss / cum_words) print("{} Epoch: {} PPL: {}".format(dataset_name, epoch, ppl)) # Save predictions open( "{0}{1}_epoch_{2}.pred".format(args.save_path, dataset_name, str(epoch)), "w+" ).writelines([l + "\n" for l in predicted_sentences])