def print_components_pairwise(tellers, drawers, teller_splits='ab', drawer_splits='ab', limit=None, split='dev'): print(f"Component evaluations [{split}]") print( "Teller \t Drawer \t Dir \t Expr(human)\t Pose(human)\t Depth \t xy (sq.)\t x-only \t y-only" ) for splits_group in [('ab', 'ba'), ('aa', 'bb')]: for teller_name, teller_pair in tellers: for drawer_name, drawer_pair in drawers: for splits in splits_group: if splits[0] not in teller_splits or splits[ 1] not in drawer_splits: continue components = component_evaluator.eval_fns(make_fns( splits, teller_pair, drawer_pair), limit=limit, split=split) teller_caption = f"{teller_name}_{splits[0]}" drawer_caption = f"{drawer_name}_{splits[1]}" print(f"{teller_caption:17s}\t {drawer_caption:17s}\t", "\t".join(f"{num: .6f}" for num in components)) print()
def print_script(drawers, drawer_splits='ab', limit=None, split='dev'): print("Drawer evaluations against script") print(f"Drawer \t Scene similarity [{split}]") for drawer_name, drawer_pair in drawers: for drawer_split in drawer_splits: sims = eval_fns(make_fns(drawer_split, model.scripted_tell, drawer_pair), limit=limit, split=split) drawer_caption = f"{drawer_name}_{drawer_split}" print(f"{drawer_caption:17s}\t {sims.mean():.2f}")
def print_components_script(drawers, drawer_splits='ab', limit=None, split='dev'): print(f"Drawer evaluations against script [{split}]") print( "Drawer \t Dir \t Expr(human)\t Pose(human)\t Depth \t xy (sq.)\t x-only \t y-only" ) for drawer_name, drawer_pair in drawers: for drawer_split in drawer_splits: components = component_evaluator.eval_fns(make_fns( drawer_split, model.scripted_tell, drawer_pair), limit=limit, split=split) drawer_caption = f"{drawer_name}_{drawer_split}" print(f"{drawer_caption:17s}\t", "\t".join(f"{num: .6f}" for num in components))
def train_teller(split, teller_pair, num_epochs=50, limit=100): splits_pair = split + 'a', split + 'b' if split == 'a': teller = teller_pair[0] elif split == 'b': teller = teller_pair[1] else: assert False optimizer = torch.optim.Adam(teller.parameters()) print('perplexity-dev', model.calc_perplexity(teller)) print('perplexity-a', model.calc_perplexity(teller, 'a')) print('avg-loss-dev', teller.calc_split_loss()) print('avg-loss-a', teller.calc_split_loss('a')) for epoch in range(num_epochs): teller.train() for num, ex in enumerate(teller.datagen.get_examples_batch()): optimizer.zero_grad() loss = teller(ex) loss.backward() optimizer.step() print(f'Done epoch {epoch} loss {float(loss)}') if epoch % 5 == 0: del ex, loss # clean up memory print('perplexity-dev', model.calc_perplexity(teller)) print('perplexity-a', model.calc_perplexity(teller, 'a')) print('avg-loss-dev', teller.calc_split_loss()) print('avg-loss-a', teller.calc_split_loss('a')) for splits in splits_pair: sims = eval_fns( make_fns(splits, teller_pair, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=limit) print(splits, sims.mean())
def print_pairwise(tellers, drawers, teller_splits='ab', drawer_splits='ab', limit=None, split='dev'): print( f"Teller \t Drawer \t Scene similarity [{split}]") for splits_group in [('ab', 'ba'), ('aa', 'bb')]: for teller_name, teller_pair in tellers: for drawer_name, drawer_pair in drawers: for splits in splits_group: if splits[0] not in teller_splits or splits[ 1] not in drawer_splits: continue sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit, split=split) teller_caption = f"{teller_name}_{splits[0]}" drawer_caption = f"{drawer_name}_{splits[1]}" print( f"{teller_caption:17s}\t {drawer_caption:17s}\t {sims.mean():.2f}" ) print()
print( f"Human scene similarity: mean={human_sims.mean():.6f} std={human_sims.std():.6f} median={np.median(human_sims):.6f}" ) # %% print() print() # %% limit = None print("Teller \t Drawer \t Scene similarity") for splits_group in [('ab', 'ba'), ('aa', 'bb')]: for teller_name, teller_pair in tellers: for drawer_name, drawer_pair in drawers: for splits in splits_group: sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit) teller_caption = f"{teller_name}_{splits[0]}" drawer_caption = f"{drawer_name}_{splits[1]}" print(f"{teller_caption:17s}\t {drawer_caption:17s}\t", sims.mean()) print() # %% print() print() # %% limit = None print("Drawer evaluations against script") print("Drawer \t Scene similarity")
def validate(): for inference_method in ['greedy', 'sample']: teller.inference_method = inference_method for splits in splits_pair: sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit) print(splits, f'[{inference_method}]', sims.mean())
def train_teller(split, teller_pair, scenes, utterance_penalty=0.1, gamma=0.999, uninformative_penalty=0.3, batch_size=16, num_batches=12500, eval_every=2000, lr=0.00007, limit=100, base_name="scene2seq_rl", ): print("Training hyperparameters:") for param in ['utterance_penalty', 'gamma', 'uninformative_penalty', 'batch_size', 'num_batches', 'lr', 'limit', ]: print(param, '=', locals()[param]) drawer_pair = drawer_lstmaddonly_a, drawer_lstmaddonly_b splits_pair = split + 'a', split + 'b' if split == 'a': teller = teller_pair[0] elif split == 'b': teller = teller_pair[1] else: assert False teller.disable_dropout() fns = make_fns(split + split, teller_pair, drawer_pair) optimizer = torch.optim.Adam(teller.parameters(), lr=lr) def validate(): for inference_method in ['greedy', 'sample']: teller.inference_method = inference_method for splits in splits_pair: sims = eval_fns(make_fns(splits, teller_pair, drawer_pair), limit=limit) print(splits, f'[{inference_method}]', sims.mean()) validate() teller.inference_method = 'sample' for batch_num in range(num_batches): optimizer.zero_grad() teller.eval() episodes, ex = collect_episodes( fns, teller.datagen, scenes=scenes, batch_size=batch_size, utterance_penalty=utterance_penalty, gamma=gamma, uninformative_penalty=uninformative_penalty, ) teller.train() loss = teller.calc_rl_loss(ex) loss.backward() # grad_norm = nn.utils.clip_grad_norm_(teller.parameters(), float('inf')) # XXX(nikita): clip gradients in an attempt to stabilize. Need to see if # there's an underlying bug, though. grad_norm = nn.utils.clip_grad_norm_(teller.parameters(), 1.5) optimizer.step() mean_reward = float(ex['brw_rewards'].sum().item() / ex['b_scene_mask'].shape[0]) mean_len = np.mean([ len([event for event in episode if isinstance(event, codraw_data.TellGroup)]) for episode in episodes]) sims = np.array([episode.scene_similarity() for episode in episodes]) mean_sim = sims.mean() std_sim = sims.std() print(f'batch {batch_num} mean-reward {mean_reward} loss {float(loss)} grad {float(grad_norm)} mean-len {mean_len} mean-sim {mean_sim} std-sim {std_sim}') if batch_num % 5 == 0: for event in episodes[-1]: if isinstance(event, codraw_data.TellGroup): print(' >', event.msg) if batch_num % 50 == 0: del episodes, ex, loss # clean up memory validate() if batch_num > 0 and batch_num % eval_every == 0: teller.eval() print("Printing representative sampled dialogs") teller.inference_method = 'sample' episodes, ex = collect_episodes(fns, teller.datagen, scenes=scenes[:1], batch_size=5) for episode in episodes: for event in episode: if isinstance(event, codraw_data.TellGroup): print(' >', event.msg) print('similarity', episode.scene_similarity()) print('-----') print("Evaluating on the full dev set") for inference_method in ['greedy', 'sample']: teller.inference_method = inference_method for splits in splits_pair: sims = eval_fns(make_fns(splits, (teller_rl_a, teller_rl_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None) print(splits, f'[{inference_method}]', sims.mean()) if base_name is not None: print("Serializing teller to disk") torch.save(teller.spec, Path(f'rl_models/{base_name}_{split}_{batch_num}.pt'))
# %% # Episode.run(codraw_data.get_scenes('dev')[0], make_fns('aa', (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b))).display() # %% # %% # %% print() print() print("Final evaluation on full dev set") # %% for splits in ('aa', 'ab', 'ba', 'bb'): sims = eval_fns(make_fns(splits, (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None) print(splits, sims.mean()) # aa 1.3095491909624886 # ab 1.3115692170881366 # nohier aa 2.229799264350204 # nohier ab 2.255167911899865 # %% for splits in ('ba', 'bb'): sims = eval_fns(make_fns(splits, (teller_scenenn_a, teller_scenenn_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None) print(splits, sims.mean())
#%% for epoch in range(500): drawer_sim_a.train() for num, ex in enumerate(drawer_sim_a.datagen.get_examples_batch()): optimizer_sim_a.zero_grad() loss = drawer_sim_a.forward(ex) loss.backward() optimizer_sim_a.step() print(f'Done epoch {epoch} loss {float(loss)}') if epoch % 25 == 0: drawer_sim_a.prepare_for_inference() for splits in ('aa', 'ba'): sims = eval_fns(make_fns(splits, (teller_nn_a, teller_nn_b), (drawer_sim_a, drawer_sim_b)), limit=100) print(splits, sims.mean()) drawer_sim_a.prepare_for_inference() # %% for epoch in range(500): drawer_sim_b.train() for num, ex in enumerate(drawer_sim_b.datagen.get_examples_batch()): optimizer_sim_b.zero_grad() loss = drawer_sim_b.forward(ex) loss.backward() optimizer_sim_b.step() print(f'Done epoch {epoch} loss {float(loss)}')
drawers = [ ('drawer_nn', (models['drawer_nn_a'], models['drawer_nn_b'])), ('drawer_sim', (models['drawer_sim_a'], models['drawer_sim_b'])), ('drawer_bow2c', (models['drawer_bow2c_a'], models['drawer_bow2c_b'])), ('drawer_bow2bce', (models['drawer_bow2bce_a'], models['drawer_bow2bce_b'])), ('drawer_bowcanvas2bce', (models['drawer_bowcanvas2bce_a'], models['drawer_bowcanvas2bce_b'])), ] # %% limit = None print("Drawer evaluations against script") for drawer_name, drawer_pair in drawers: for split in ('a', 'b'): sims = eval_fns(make_fns(split, scripted_tell, drawer_pair), limit=limit) print(f"{drawer_name}_{split}", sims.mean()) # %% limit = None print("Drawer evaluations against script before peek") for drawer_name, drawer_pair in drawers: for split in ('a', 'b'): sims = eval_fns(make_fns(split, scripted_tell_before_peek, drawer_pair), limit=limit) print(f"{drawer_name}_{split}", sims.mean()) # %% limit = None print("Drawer evaluations against script after peek")
) # %% print() print() print("Saving models") torch.save(scene2seq_specs, Path('models/scene2seq.pt')) # %% print() print("Final evaluation on full dev set (scene2seq)") for splits in ('aa', 'ab', 'ba', 'bb'): sims = eval_fns(make_fns(splits, (teller_scene2seq_a, teller_scene2seq_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None) print(splits, sims.mean()) print("Final evaluation on full dev set (scene2seq_aux)") for splits in ('aa', 'ab', 'ba', 'bb'): sims = eval_fns(make_fns(splits, (teller_scene2seq_aux_a, teller_scene2seq_aux_b), (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=None) print(splits, sims.mean()) print("Final evaluation on full dev set (scene2seq_aux2)") for splits in ('aa', 'ab', 'ba', 'bb'): sims = eval_fns( make_fns(splits, (teller_scene2seq_aux2_a, teller_scene2seq_aux2_b),
optimizer_lstmaddonly_b = torch.optim.Adam(drawer_lstmaddonly_b.parameters()) #%% for epoch in range(15): drawer_lstmaddonly_a.train() for num, ex in enumerate(drawer_lstmaddonly_a.datagen.get_examples_batch()): optimizer_lstmaddonly_a.zero_grad() loss = drawer_lstmaddonly_a.forward(ex) loss.backward() optimizer_lstmaddonly_a.step() print(f'Done epoch {epoch} loss {float(loss)}') if epoch % 1 == 0: for split in ('a',): sims = eval_fns(make_fns(split, scripted_tell, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100) print(split, sims.mean()) sims = eval_fns(make_fns(split, scripted_tell_before_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100) print(split, 'before', sims.mean()) sims = eval_fns(make_fns(split, scripted_tell_after_peek, (drawer_lstmaddonly_a, drawer_lstmaddonly_b)), limit=100) print(split, 'after', sims.mean()) #%% for epoch in range(15): drawer_lstmaddonly_b.train() for num, ex in enumerate(drawer_lstmaddonly_b.datagen.get_examples_batch()): optimizer_lstmaddonly_b.zero_grad() loss = drawer_lstmaddonly_b.forward(ex) loss.backward()