Example #1
0
    def sample(self, batch, mode, word_based=True):
        """used for actor in ELF and visually evaulating model

        return
            inst: [batch, max_sentence_len], even inst is one-hot
            inst_len: [batch]
        """
        output = self.rl_forward(batch, mode)
        samples = self.sampler.sample(output["cont_pi"], output["inst_pi"],
                                      batch["prev_inst_idx"])

        log_prob_reply = {
            "samples": samples,
            "probs": {
                self.sampler.cont_prob_key: output["cont_pi"],
                self.sampler.prob_key: output["inst_pi"],
            },
            "value": output["v"],
        }

        reply = {
            "cont": samples["cont"].unsqueeze(1),
            "cont_pi": output["cont_pi"],
            "inst": samples["inst"].unsqueeze(1),
            "inst_pi": output["inst_pi"],
        }

        # convert format needed by executor
        samples = []
        lengths = []
        raws = []
        for idx in reply["inst"]:
            inst = self.inst_dict.get_inst(int(idx.item()))
            tokens, length = self.inst_dict.parse(inst, True)
            samples.append(tokens)
            lengths.append(length)
            raw = convert_to_raw_instruction(inst, self.max_raw_chars)
            raws.append(convert_to_raw_instruction(inst, self.max_raw_chars))

        device = reply["cont"].device
        if word_based:
            # for word based
            inst = torch.LongTensor(samples).to(device)
        else:
            inst = reply["inst"]

        inst_len = torch.LongTensor(lengths).to(device)
        reply["raw_inst"] = torch.LongTensor(raws).to(device)

        return inst, inst_len, reply["cont"], reply, log_prob_reply
Example #2
0
    def _get_human_instruction(self, batch):
        assert_eq(batch['prev_inst'].size(0), 1)
        device = batch['prev_inst'].device

        inst = input('Please input your instruction\n')
        # inst = 'build peasant'

        import pdb
        pdb.set_trace()

        inst_idx = torch.zeros((1, )).long().to(device)
        inst_idx[0] = self.executor.inst_dict.get_inst_idx(inst)
        inst_cont = torch.zeros((1, )).long().to(device)
        if len(inst) == 0:
            # inst = batch['prev_inst']
            inst = self.prev_inst
            inst_cont[0] = 1

        self.prev_inst = inst
        raw_inst = convert_to_raw_instruction(inst, self.max_raw_chars)
        inst, inst_len = self.executor.inst_dict.parse(inst, True)
        inst = torch.LongTensor(inst).unsqueeze(0).to(device)
        inst_len = torch.LongTensor([inst_len]).to(device)
        raw_inst = torch.LongTensor([raw_inst]).to(device)

        reply = {
            'inst': inst_idx.unsqueeze(1),
            'inst_pi':
            torch.ones(1, self.num_insts).to(device) / self.num_insts,
            'cont': inst_cont.unsqueeze(1),
            'cont_pi': torch.ones(1, 2).to(device) / 2,
            'raw_inst': raw_inst
        }

        return inst, inst_len, inst_cont, reply
Example #3
0
    def sample(self, batch, mode, word_based=True, agent_mask=None):
        """used for actor in ELF and visually evaulating model

        return
            inst: [batch, max_sentence_len], even inst is one-hot
            inst_len: [batch]
        """
        output = self.rl_forward(batch, mode, agent_mask=agent_mask)
        samples = self.sampler.sample(output['cont_pi'], output['inst_pi'],
                                      batch['prev_inst_idx'])

        reply = {
            'cont': samples['cont'].unsqueeze(1),
            'cont_pi': output['cont_pi'],
            'inst': samples['inst'].unsqueeze(1),
            'inst_pi': output['inst_pi'],
        }

        # convert format needed by executor
        samples = []
        lengths = []
        raws = []
        for idx in reply['inst']:
            inst = self.inst_dict.get_inst(int(idx.item()))
            tokens, length = self.inst_dict.parse(inst, True)
            samples.append(tokens)
            lengths.append(length)
            raw = convert_to_raw_instruction(inst, self.max_raw_chars)
            raws.append(convert_to_raw_instruction(inst, self.max_raw_chars))

        device = reply['cont'].device
        if word_based:
            # for word based
            inst = torch.LongTensor(samples).to(device)
        else:
            inst = reply['inst']

        inst_len = torch.LongTensor(lengths).to(device)
        reply['raw_inst'] = torch.LongTensor(raws).to(device)
        return inst, inst_len, reply['cont'], reply