batch_idx = 0 best_bleu = None for epoch in range(MAX_EPOCHES): random.shuffle(train_data) dial_shown = False total_samples = 0 skipped_samples = 0 bleus_argmax = [] bleus_sample = [] for batch in data.iterate_batches(train_data, BATCH_SIZE): batch_idx += 1 optimiser.zero_grad() input_seq, input_batch, output_batch = \ model.pack_batch_no_out(batch, net.emb, device) enc = net.encode(input_seq) net_policies = [] net_actions = [] net_advantages = [] beg_embedding = net.emb(beg_token) for idx, inp_idx in enumerate(input_batch): total_samples += 1 ref_indices = [ indices[1:] for indices in output_batch[idx] ] item_enc = net.get_encoded_item(enc, idx) r_argmax, actions = net.decode_chain_argmax( item_enc,
optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) batch_idx = 0 best_bleu = None for epoch in range(MAX_EPOCHES): random.shuffle(train_data) dial_shown = False total_samples = 0 skipped_samples = 0 bleus_argmax = [] bleus_sample = [] for batch in data.iterate_batches(train_data, BATCH_SIZE): batch_idx += 1 optimiser.zero_grad() input_seq, input_batch, output_batch = model.pack_batch_no_out( batch, net.emb, device) enc = net.encode(input_seq) net_policies = [] net_actions = [] net_advantages = [] beg_embedding = net.emb(beg_token) for idx, inp_idx in enumerate(input_batch): total_samples += 1 ref_indices = [ indices[1:] for indices in output_batch[idx] ] item_enc = net.get_encoded_item(enc, idx) r_argmax, actions = net.decode_chain_argmax( item_enc,
optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) batch_idx = 0 best_bleu = None for epoch in range(MAX_EPOCHES): random.shuffle(train_data) dial_shown = False total_samples = 0 skipped_samples = 0 bleus_argmax = [] bleus_sample = [] for batch in data.iterate_batches(train_data, BATCH_SIZE): batch_idx += 1 optimiser.zero_grad() input_seq, input_batch, output_batch = model.pack_batch_no_out( batch, net.emb, cuda=args.cuda) enc = net.encode(input_seq) net_policies = [] net_actions = [] net_advantages = [] beg_embedding = net.emb(beg_token) for idx, inp_idx in enumerate(input_batch): total_samples += 1 ref_indices = [ indices[1:] for indices in output_batch[idx] ] item_enc = net.get_encoded_item(enc, idx) r_argmax, actions = net.decode_chain_argmax( item_enc,
optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) batch_idx = 0 best_bleu = None for epoch in range(MAX_EPOCHES): random.shuffle(train_data) dial_shown = False total_samples = 0 skipped_samples = 0 bleus_argmax = [] bleus_sample = [] for batch in data.iterate_batches(train_data, BATCH_SIZE): batch_idx += 1 optimiser.zero_grad() input_seq, input_batch, output_batch = model.pack_batch_no_out(batch, net.emb, device) enc = net.encode(input_seq) net_policies = [] net_actions = [] net_advantages = [] beg_embedding = net.emb(beg_token) for idx, inp_idx in enumerate(input_batch): total_samples += 1 ref_indices = [ indices[1:] for indices in output_batch[idx] ] item_enc = net.get_encoded_item(enc, idx) r_argmax, actions = net.decode_chain_argmax(item_enc, beg_embedding, data.MAX_TOKENS,