def sample(self, batch, mode, word_based=True): """used for actor in ELF and visually evaulating model return inst: [batch, max_sentence_len], even inst is one-hot inst_len: [batch] """ output = self.rl_forward(batch, mode) samples = self.sampler.sample(output["cont_pi"], output["inst_pi"], batch["prev_inst_idx"]) log_prob_reply = { "samples": samples, "probs": { self.sampler.cont_prob_key: output["cont_pi"], self.sampler.prob_key: output["inst_pi"], }, "value": output["v"], } reply = { "cont": samples["cont"].unsqueeze(1), "cont_pi": output["cont_pi"], "inst": samples["inst"].unsqueeze(1), "inst_pi": output["inst_pi"], } # convert format needed by executor samples = [] lengths = [] raws = [] for idx in reply["inst"]: inst = self.inst_dict.get_inst(int(idx.item())) tokens, length = self.inst_dict.parse(inst, True) samples.append(tokens) lengths.append(length) raw = convert_to_raw_instruction(inst, self.max_raw_chars) raws.append(convert_to_raw_instruction(inst, self.max_raw_chars)) device = reply["cont"].device if word_based: # for word based inst = torch.LongTensor(samples).to(device) else: inst = reply["inst"] inst_len = torch.LongTensor(lengths).to(device) reply["raw_inst"] = torch.LongTensor(raws).to(device) return inst, inst_len, reply["cont"], reply, log_prob_reply
def _get_human_instruction(self, batch): assert_eq(batch['prev_inst'].size(0), 1) device = batch['prev_inst'].device inst = input('Please input your instruction\n') # inst = 'build peasant' import pdb pdb.set_trace() inst_idx = torch.zeros((1, )).long().to(device) inst_idx[0] = self.executor.inst_dict.get_inst_idx(inst) inst_cont = torch.zeros((1, )).long().to(device) if len(inst) == 0: # inst = batch['prev_inst'] inst = self.prev_inst inst_cont[0] = 1 self.prev_inst = inst raw_inst = convert_to_raw_instruction(inst, self.max_raw_chars) inst, inst_len = self.executor.inst_dict.parse(inst, True) inst = torch.LongTensor(inst).unsqueeze(0).to(device) inst_len = torch.LongTensor([inst_len]).to(device) raw_inst = torch.LongTensor([raw_inst]).to(device) reply = { 'inst': inst_idx.unsqueeze(1), 'inst_pi': torch.ones(1, self.num_insts).to(device) / self.num_insts, 'cont': inst_cont.unsqueeze(1), 'cont_pi': torch.ones(1, 2).to(device) / 2, 'raw_inst': raw_inst } return inst, inst_len, inst_cont, reply
def sample(self, batch, mode, word_based=True, agent_mask=None): """used for actor in ELF and visually evaulating model return inst: [batch, max_sentence_len], even inst is one-hot inst_len: [batch] """ output = self.rl_forward(batch, mode, agent_mask=agent_mask) samples = self.sampler.sample(output['cont_pi'], output['inst_pi'], batch['prev_inst_idx']) reply = { 'cont': samples['cont'].unsqueeze(1), 'cont_pi': output['cont_pi'], 'inst': samples['inst'].unsqueeze(1), 'inst_pi': output['inst_pi'], } # convert format needed by executor samples = [] lengths = [] raws = [] for idx in reply['inst']: inst = self.inst_dict.get_inst(int(idx.item())) tokens, length = self.inst_dict.parse(inst, True) samples.append(tokens) lengths.append(length) raw = convert_to_raw_instruction(inst, self.max_raw_chars) raws.append(convert_to_raw_instruction(inst, self.max_raw_chars)) device = reply['cont'].device if word_based: # for word based inst = torch.LongTensor(samples).to(device) else: inst = reply['inst'] inst_len = torch.LongTensor(lengths).to(device) reply['raw_inst'] = torch.LongTensor(raws).to(device) return inst, inst_len, reply['cont'], reply