def __call__(self, obss, device=None): obs_ = torch_rl.DictList() if "image" in self.obs_space.keys(): images = numpy.array([obs["image"] for obs in obss]) images = torch.tensor(images, device=device, dtype=torch.float) obs_.image = images if "instr" in self.obs_space.keys(): raw_instrs = [] max_instr_len = 0 for obs in obss: tokens = re.findall("([a-z]+)", obs["mission"].lower()) instr = numpy.array([self.vocab[token] for token in tokens]) raw_instrs.append(instr) max_instr_len = max(len(instr), max_instr_len) instrs = numpy.zeros((len(obss), max_instr_len)) for i, instr in enumerate(raw_instrs): instrs[i, :len(instr)] = instr instrs = torch.tensor(instrs, device=device, dtype=torch.long) obs_.instr = instrs return obs_
def __call__(self, obss, device=None): """Converts a list of MiniGrid observations, i.e. a list of (image, instruction) tuples into two PyTorch tensors. The images are concatenated. The instructions are tokenified, then tokens are converted into lists of ids using a Vocabulary object, and finally, the lists of ids are concatenated. Returns ------- preprocessed_obss : DictList Contains preprocessed images and preprocessed instructions. """ preprocessed_obss = torch_rl.DictList() if "image" in self.obs_space.keys(): images = numpy.array([obs["image"] for obs in obss]) images = torch.tensor(images, device=device, dtype=torch.float) preprocessed_obss.image = images if "carrying" in self.obs_space: carryings = numpy.array([obs["carrying"] for obs in obss]) carryings = torch.tensor(carryings, device=device, dtype=torch.float) preprocessed_obss.carrying = carryings return preprocessed_obss
def preprocess_obss(obss, device=None): return torch_rl.DictList({ "image": preprocess_images([obs["image"] for obs in obss], device=device), "instr": preprocess_instrs([obs["mission"] for obs in obss], vocab, device=device) })
def preprocess_obss(obss, device=None): return torch_rl.DictList({ "image": preprocess_matrix([obs["image"] for obs in obss], device=device), "text": preprocess_texts([obs["mission"] for obs in obss], vocab, device=device) })
def preprocess_obss(obss, device=None): return torch_rl.DictList({ "image": preprocess_natural_images([obs["image"] for obs in obss], device=device), "goal": preprocess_visible_text([obs["mission"] for obs in obss], device=device), "rel_gps": preprocess_matrix([obs["rel_gps"] for obs in obss], device=device), "visible_text": preprocess_visible_text([obs["visible_text"] for obs in obss], device=device) })
def rollouts_batch(self, observations, introspect=False): batch_size = observations.size()[0] observations_shape = observations.size()[1:] if batch_size == 1: old_observations = observations.expand(batch_size * self.n_actions, *observations_shape) else: old_observations = observations.unsqueeze(1) old_observations = old_observations.expand(batch_size, self.n_actions, *observations_shape) old_observations = old_observations.contiguous().view(-1, *observations_shape) actions = torch.tensor(np.tile(np.arange(0, self.n_actions, dtype=np.int64), batch_size)) predicted_observations, predicted_rewards = [], [] if introspect: predicted_actions = [] for step_idx in range(self.imagination_steps): if introspect: predicted_actions.append(actions) new_observations, new_rewards = self.environment_model(old_observations, actions) predicted_observations.append(new_observations.detach()) predicted_rewards.append(new_rewards.detach()) # don't need actions for the last step if step_idx == self.imagination_steps - 1: break # combine the delta from EM into new observation old_observations = torch.transpose(torch.transpose(new_observations, 2, 3), 1, 3) # select actions dictlist = torch_rl.DictList() dictlist.image = old_observations distributions, _, _ = self.imagination_policy(dictlist, None) actions = distributions.sample() encoded = self.encoder(torch.stack(predicted_observations), torch.stack(predicted_rewards)).view(batch_size, -1) if introspect: transposed = [torch.transpose(torch.transpose(observation, 2, 3), 1, 3) for observation in predicted_observations] return encoded, predicted_actions, transposed, predicted_rewards else: return encoded
def __call__(self, obss, device=None): """Converts a list of MiniGrid observations, i.e. a list of (image, instruction) tuples into two PyTorch tensors. The images are concatenated. The instructions are tokenified, then tokens are converted into lists of ids using a Vocabulary object, and finally, the lists of ids are concatenated. Returns ------- preprocessed_obss : DictList Contains preprocessed images and preprocessed instructions. """ preprocessed_obss = torch_rl.DictList() if "image" in self.obs_space.keys(): images = numpy.array([obs["image"] for obs in obss]) images = torch.tensor(images, device=device, dtype=torch.float) preprocessed_obss.image = images if "instr" in self.obs_space.keys(): raw_instrs = [] max_instr_len = 0 for obs in obss: tokens = re.findall("([a-z]+)", obs["mission"].lower()) instr = numpy.array([self.vocab[token] for token in tokens]) raw_instrs.append(instr) max_instr_len = max(len(instr), max_instr_len) instrs = numpy.zeros((len(obss), max_instr_len)) for i, instr in enumerate(raw_instrs): instrs[i, :len(instr)] = instr instrs = torch.tensor(instrs, device=device, dtype=torch.long) preprocessed_obss.instr = instrs return preprocessed_obss
def preprocess_obss(obss, device=None): return torch_rl.DictList({ "image": preprocess_images([obs["image"] for obs in obss], device=device) })
def preprocess_obss(obss, device=None): return torch_rl.DictList( {"image": preprocess_matrix(obss, device=device)})