def __getitem__(self, index): file_index = index // self.traj_per_file path = self.filenames[file_index] try: with h5py.File(path, 'r') as F: ex_index = index % self.traj_per_file # get the index key = 'traj{}'.format(ex_index) # Fetch data into a dict if key + '/images' in F.keys(): data_dict = AttrDict(images=(F[key + '/images'].value)) else: data_dict = AttrDict() for name in F[key].keys(): if name in ['states', 'actions', 'pad_mask']: data_dict[name] = F[key + '/' + name].value.astype(np.float32) # Make length consistent end_ind = np.argmax(data_dict.pad_mask * np.arange(data_dict.pad_mask.shape[0], dtype=np.float32), 0) start_ind = np.random.randint(0, end_ind - 1) if self.randomize_start else 0 start_ind, end_ind, data_dict = self.sample_max_len_video(data_dict, start_ind, end_ind) # Randomize length if self.randomize_length: end_ind = self._randomize_length(start_ind, end_ind, data_dict) # Collect data into the format the model expects data_dict.end_ind = end_ind data_dict.start_ind = start_ind self.process_data_dict(data_dict) except: raise ValueError("Problem when loading file from {}".format(path)) return data_dict
def forward(self, inputs, phase='train'): """ forward pass at training time :param images shape = batch x time x height x width x channel pad mask shape = batch x time, 1 indicates actual image 0 is padded :return: """ if self._hp.non_goal_conditioned: if 'demo_seq' in inputs: inputs.demo_seq[torch.arange(inputs.demo_seq.shape[0]), inputs.end_ind] = 0.0 inputs.demo_seq_images[torch.arange(inputs.demo_seq.shape[0]), inputs.end_ind] = 0.0 inputs.I_g = torch.zeros_like(inputs.I_g) if "I_g_image" in inputs: inputs.I_g_image = torch.zeros_like(inputs.I_g_image) if inputs.I_0.shape[-1] == 5: # special hack for maze inputs.I_0[..., -2:] = 0.0 if "demo_seq" in inputs: inputs.demo_seq[..., -2:] = 0.0 # swap in actions if we want to train action sequence decoder if self._hp.train_on_action_seqs: inputs.demo_seq = torch.cat([inputs.actions, torch.zeros_like(inputs.actions[:, :1])], dim=1) model_output = AttrDict() inputs.reference_tensor = find_tensor(inputs) if 'start_ind' not in inputs: start_ind = torch.zeros(self._hp.batch_size, dtype=torch.long, device=inputs.reference_tensor.device) else: start_ind = inputs.start_ind self.run_encoder(inputs, start_ind) end_ind = inputs.end_ind if 'end_ind' in inputs else None if self._hp.regress_length: # predict total sequence length model_output.update(self.length_pred(inputs.enc_e_0, inputs.enc_e_g)) if self._use_pred_length and (self._hp.length_pred_weight > 0 or end_ind is None): end_ind = torch.argmax(model_output.seq_len_pred.sample().long(), dim=1) if self._hp.action_conditioned_pred or self._hp.non_goal_conditioned: # don't use predicted length when action conditioned end_ind = torch.ones_like(end_ind) * (self._hp.max_seq_len - 1) # TODO clean this up. model_output.end_ind is not currently used anywhere model_output.end_ind = end_ind # Run the model to generate sequences model_output.update(self.predict_sequence(inputs, model_output, start_ind, end_ind, phase)) if self.prune_sequences: if phase == 'train': inputs.model_enc_seq = self.get_matched_pruned_seqs(inputs, model_output) else: inputs.model_enc_seq = self.get_predicted_pruned_seqs(inputs, model_output) inputs.model_enc_seq = pad_sequence(inputs.model_enc_seq, batch_first=True) if len(inputs.model_enc_seq.shape) == 5: inputs.model_enc_seq = inputs.model_enc_seq[..., 0, 0] if self._hp.attach_inv_mdl and phase == 'train': model_output.update(self.inv_mdl(inputs, full_seq=self._inv_mdl_full_seq or self._hp.train_inv_mdl_full_seq)) if self._hp.attach_state_regressor: regressor_inputs = inputs.model_enc_seq if not self._hp.supervised_decoder: regressor_inputs = regressor_inputs.detach() model_output.regressed_state = batch_apply(regressor_inputs, self.state_regressor) if self._hp.attach_cost_mdl and self._hp.run_cost_mdl and phase == 'train': # There is an issue here since SVG doesn't output a latent for the first imagge # Beyong conceptual problems, this breaks if end_ind = 199 model_output.update(self.cost_mdl(inputs)) return model_output
def __getitem__(self, index): if 'one_datum' in self.data_conf and self.data_conf.one_datum: index = 1 file_index = index // self.traj_per_file path = self.filenames[file_index] try: with h5py.File(path, 'r') as F: ex_index = index % self.traj_per_file # get the index key = 'traj{}'.format(ex_index) # Fetch data into a dict if key + '/images' in F.keys(): data_dict = AttrDict(images=(F[key + '/images'].value)) else: data_dict = AttrDict() for name in F[key].keys(): if name in ['states', 'actions', 'pad_mask']: data_dict[name] = F[key + '/' + name].value.astype( np.float32) # remove spurious states at end of trajectory if self.filter_repeated_tail: data_dict = self._filter_tail(data_dict) # maybe subsample seqs if self.subsampler is not None: data_dict = self._subsample_data(data_dict) if 'robosuite_full_state' in F[key].keys(): data_dict.robosuite_full_state = F[ key + '/robosuite_full_state'].value if 'regression_state' in F[key].keys(): data_dict.states = F[key + '/regression_state'].value.astype( np.float32) # Make length consistent end_ind = np.argmax( data_dict.pad_mask * np.arange(data_dict.pad_mask.shape[0], dtype=np.float32), 0) start_ind = np.random.randint(0, end_ind - 1) if self.randomize_start else 0 start_ind, end_ind, data_dict = self.sample_max_len_video( data_dict, start_ind, end_ind) # Randomize length if self.randomize_length: end_ind = self._randomize_length(start_ind, end_ind, data_dict) # repeat last frame until end of sequence data_dict.norep_end_ind = end_ind if self.repeat_tail: data_dict, end_ind = self._repeat_tail(data_dict, end_ind) # Collect data into the format the model expects data_dict.end_ind = end_ind data_dict.start_ind = start_ind # for roboturk env rendering if 'robosuite_env_name' in F[key].keys(): data_dict.robosuite_env_name = F[ key + '/robosuite_env_name'].value if 'robosuite_xml' in F[key].keys(): data_dict.robosuite_xml = F[key + '/robosuite_xml'].value self.process_data_dict(data_dict) except: # KeyError: raise ValueError("Problem when loading file from {}".format(path)) return data_dict