def forward(self, inputs): self.get_timesteps(inputs) enc = self.encoder.forward( torch.cat([inputs.img_t0, inputs.img_t1], dim=1))[0] output = AttrDict() out = self.action_pred(enc) if self._hp.pred_states: output.actions, output.states = torch.split( torch.squeeze(out), [2, 2], 1) else: output.actions = torch.squeeze(out) return output
def forward(self, inputs, phase='train'): """ forward pass at training time """ if not 'enc_traj_seq' in inputs: enc_traj_seq, _ = self.encoder(inputs.traj_seq[:, 0]) if self._hp.train_first_action_only \ else batch_apply(self.encoder, inputs.traj_seq) if self._hp.train_first_action_only: enc_traj_seq = enc_traj_seq[:, None] enc_traj_seq = enc_traj_seq.detach( ) if self.detach_enc else enc_traj_seq enc_goal, _ = self.encoder(inputs.I_g) n_dim = len(enc_goal.shape) fused_enc = torch.cat((enc_traj_seq, enc_goal[:, None].repeat( 1, enc_traj_seq.shape[1], *([1] * (n_dim - 1)))), dim=2) #fused_enc = torch.cat((enc_traj_seq, enc_goal[:, None].repeat(1, enc_traj_seq.shape[1], 1, 1, 1)), dim=2) if self._hp.reactive: actions_pred = batch_apply(self.policy, fused_enc) else: policy_output = self.policy(fused_enc) actions_pred = policy_output # remove last time step to match ground truth if training on full sequence actions_pred = actions_pred[:, : -1] if not self._hp.train_first_action_only else actions_pred output = AttrDict() output.actions = remove_spatial(actions_pred) if len( actions_pred.shape) > 3 else actions_pred return output
def forward(self, inputs): self.get_timesteps(inputs) actions_pred = self.action_pred(inputs.state_t0[:, :, None, None], inputs.state_t1[:, :, None, None]) output = AttrDict() output.actions = torch.squeeze(actions_pred) return output
def full_seq_forward(self, inputs): if 'model_enc_seq' in inputs: enc_seq_1 = inputs.model_enc_seq[:, 1:] if self._hp.train_im0_enc and 'enc_traj_seq' in inputs: enc_seq_0 = inputs.enc_traj_seq.reshape( inputs.enc_traj_seq.shape[:2] + (self._hp.nz_enc, ))[:, :-1] enc_seq_0 = enc_seq_0[:, :enc_seq_1.shape[1]] else: enc_seq_0 = inputs.model_enc_seq[:, :-1] else: enc_seq = batch_apply(self.encoder, inputs.traj_seq) enc_seq_0, enc_seq_1 = enc_seq[:, :-1], enc_seq[:, 1:] if self.detach_enc: enc_seq_0 = enc_seq_0.detach() enc_seq_1 = enc_seq_1.detach() # TODO quite sure the concatenation is automatic actions_pred = batch_apply(self.action_pred, torch.cat([enc_seq_0, enc_seq_1], dim=2)) output = AttrDict() output.actions = actions_pred #remove_spatial(actions_pred) if 'actions' in inputs: output.action_targets = inputs.actions output.pad_mask = inputs.pad_mask return output
def make_traj(self, agent_data, obs, policy_out): traj = AttrDict() if not self.do_not_save_images: traj.images = obs['images'] traj.states = obs['state'] action_list = [action['actions'] for action in policy_out] traj.actions = np.stack(action_list, 0) traj.pad_mask = get_pad_mask(traj.actions.shape[0], self.max_num_actions) traj = pad_traj_timesteps(traj, self.max_num_actions) if 'robosuite_xml' in obs: traj.robosuite_xml = obs['robosuite_xml'][0] if 'robosuite_env_name' in obs: traj.robosuite_env_name = obs['robosuite_env_name'][0] if 'robosuite_full_state' in obs: traj.robosuite_full_state = obs['robosuite_full_state'] # minimal state that contains all information to position entities in the env if 'regression_state' in obs: traj.regression_state = obs['regression_state'] return traj
def act(self, t=None, i_tr=None, images=None, state=None, goal=None, goal_image=None): # Note: goal_image provides n (2) images starting from the last images of the trajectory self.t = t self.i_tr = i_tr self.goal_image = goal_image if self.policy.has_image_input: inputs = AttrDict(I_0=self._preprocess_input(images[t]), I_g=self._preprocess_input(goal_image[-1] if len( goal_image.shape) > 4 else goal_image), hidden_var=self.hidden_var) else: current = state[-1:, :2] goal = goal[ -1:, : 2] #goal_state = np.concatenate([state[-1:, -2:], state[-1:, 2:]], axis=-1) inputs = AttrDict(I_0=current, I_g=goal, hidden_var=self.hidden_var) actions, self.hidden_var = self.policy(inputs) output = AttrDict() output.actions = actions.data.cpu().numpy()[0] return output
def act(self, t=None, i_tr=None, qpos_full=None, goal=None): self.i_tr = i_tr output = AttrDict() if self.action_plan is None or \ self._check_deviate(qpos_full[t, :2], self.state_plan[:, min(self.current_action, self.state_plan.shape[1]-1)]): self._plan(qpos_full[t], goal[t], t) self.current_action = 0 done = False if self.current_action < self.action_plan.shape[1]: output.actions = self.action_plan[:, self.current_action] else: # if required number of steps > planned steps done = True output.actions = np.zeros(2) self.current_action = self.current_action + 1 output.done = done return output
def forward(self, inputs, full_seq=None): """ forward pass at training time :arg full_seq: if True, outputs actions for the full sequence, expects input encodings """ if full_seq is None: full_seq = self._hp.train_full_seq if full_seq: return self.full_seq_forward(inputs) t0, t1 = self.sample_offsets(inputs.norep_end_ind if 'norep_end_ind' in inputs else inputs.end_ind) im0 = self.index_input(inputs.traj_seq, t0) im1 = self.index_input(inputs.traj_seq, t1) if 'model_enc_seq' in inputs: if self._hp.train_im0_enc and 'enc_traj_seq' in inputs: enc_im0 = self.index_input( inputs.enc_traj_seq, t0).reshape(inputs.enc_traj_seq.shape[:1] + (self._hp.nz_enc, )) else: enc_im0 = self.index_input(inputs.model_enc_seq, t0) enc_im1 = self.index_input(inputs.model_enc_seq, t1) else: assert self._hp.build_encoder # need encoder if no encoded latents are given enc_im0 = self.encoder.forward(im0)[0] enc_im1 = self.encoder.forward(im1)[0] if self.detach_enc: enc_im0 = enc_im0.detach() enc_im1 = enc_im1.detach() selected_actions = self.index_input( inputs.actions, t0, aggregate=self._hp.aggregate_actions, t1=t1) selected_states = self.index_input(inputs.traj_seq_states, t0) if self._hp.pred_states: actions_pred, states_pred = torch.split( self.action_pred(enc_im0, enc_im1), 2, 1) else: actions_pred = self.action_pred(enc_im0, enc_im1) output = AttrDict() output.actions = remove_spatial(actions_pred) output.action_targets = selected_actions output.state_targets = selected_states output.img_t0, output.img_t1 = im0, im1 return output
def act(self, t=None, i_tr=None, goal=None, qpos_full=None, images=None): # Note: goal_image provides n (2) images starting from the last images of the trajectory self.t = t self.i_tr = i_tr self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr) output = AttrDict() if self.image_plan is None or ( t % self._hp.replan_interval == 0) or self.current_action >= self.image_plan.shape[0]: self.image_plan = self._plan(qpos_full, goal) output.actions = self.get_action(images[t]) self.current_action = self.current_action + 1 return output
def rollout(self, state, goal_state, samples, rollout_len, prune=False): """Performs one model rollout.""" # prepare inputs batch_size = samples.shape[0] state, goal_state = state.repeat(batch_size, 0), goal_state.repeat(batch_size, 0) input_dict = AttrDict( I_0=torch.tensor(state, device=self._model.device, dtype=torch.float32), I_g=torch.tensor(goal_state, device=self._model.device, dtype=torch.float32), start_ind=torch.tensor(np.zeros((batch_size, )), device=self._model.device).long(), end_ind=torch.tensor(np.ones((batch_size, )) * (rollout_len - 1), device=self._model.device).long(), z=torch.tensor(samples, device=self._model.device, dtype=torch.float32)) input_dict = self._postprocess_inputs(input_dict) # perform rollout, collect outputs outputs = AttrDict() with self._model.val_mode(): model_output = self._model(input_dict) end_ind = torch.max(model_output.end_ind, torch.ones_like(model_output.end_ind)) # self._logs.append(model_output) if prune: outputs.predictions = self._list2np(model_output.pruned_prediction) else: outputs.predictions = self._list2np( self._get_state_rollouts(input_dict, model_output, end_ind)) outputs.actions = self._list2np( self._cap_to_length(model_output.actions, end_ind)) outputs.states = self._list2np( self._cap_to_length(model_output.regressed_state, end_ind)) outputs.latents = self._list2np( self._cap_to_length(input_dict.model_enc_seq, end_ind)) return outputs
def act(self, t=None, i_tr=None, images=None, goal_image=None): # Note: goal_image provides n (2) images starting from the last images of the trajectory self.t = t self.i_tr = i_tr self.goal_image = goal_image self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr) output = AttrDict() if self.image_plan is None \ or self.image_plan.shape[0] - 1 <= self.current_exec_step \ or (t % self._hp.replan_interval == 0 and self.num_replans < self._hp.num_max_replans)\ or (self._hp.replan_if_deviated and self._deviated(images[t], self.image_plan[self.current_exec_step]) and \ self.num_replans < self._hp.num_max_replans): self._plan(images[t], goal_image, t) self.num_replans += 1 output.actions = self.get_action(images[t]) self.current_exec_step = self.current_exec_step + 1 return output
def act(self, t=None, i_tr=None, images=None, goal_image=None): """ Triggers planning if no plan is made yet / last plan is completely executed. Then executes current plan. :param t: current time step in task execution :param i_tr: index of currently executed task :param images: images of so-far executed trajectory :param goal_image: goal-image that should be planned towards """ self.t = t self.i_tr = i_tr self.goal_image = goal_image self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr) output = AttrDict() if self.image_plan is None \ or self.image_plan.shape[0] - 1 <= self.current_exec_step \ or (t % self._hp.replan_interval == 0 and self.num_replans < self._hp.num_max_replans): self._plan(images[t], goal_image, t) self.num_replans += 1 output.actions = self.get_action(images[t]) self.current_exec_step = self.current_exec_step + 1 return output