コード例 #1
0
ファイル: inverse_mdl.py プロジェクト: orybkin/video-gcp
 def forward(self, inputs):
     self.get_timesteps(inputs)
     enc = self.encoder.forward(
         torch.cat([inputs.img_t0, inputs.img_t1], dim=1))[0]
     output = AttrDict()
     out = self.action_pred(enc)
     if self._hp.pred_states:
         output.actions, output.states = torch.split(
             torch.squeeze(out), [2, 2], 1)
     else:
         output.actions = torch.squeeze(out)
     return output
コード例 #2
0
ファイル: bc_mdl.py プロジェクト: orybkin/video-gcp
    def forward(self, inputs, phase='train'):
        """
        forward pass at training time
        """
        if not 'enc_traj_seq' in inputs:
            enc_traj_seq, _ = self.encoder(inputs.traj_seq[:, 0]) if self._hp.train_first_action_only \
                                    else batch_apply(self.encoder, inputs.traj_seq)
            if self._hp.train_first_action_only:
                enc_traj_seq = enc_traj_seq[:, None]
            enc_traj_seq = enc_traj_seq.detach(
            ) if self.detach_enc else enc_traj_seq

        enc_goal, _ = self.encoder(inputs.I_g)
        n_dim = len(enc_goal.shape)
        fused_enc = torch.cat((enc_traj_seq, enc_goal[:, None].repeat(
            1, enc_traj_seq.shape[1], *([1] * (n_dim - 1)))),
                              dim=2)
        #fused_enc = torch.cat((enc_traj_seq, enc_goal[:, None].repeat(1, enc_traj_seq.shape[1], 1, 1, 1)), dim=2)

        if self._hp.reactive:
            actions_pred = batch_apply(self.policy, fused_enc)
        else:
            policy_output = self.policy(fused_enc)
            actions_pred = policy_output

        # remove last time step to match ground truth if training on full sequence
        actions_pred = actions_pred[:, :
                                    -1] if not self._hp.train_first_action_only else actions_pred

        output = AttrDict()
        output.actions = remove_spatial(actions_pred) if len(
            actions_pred.shape) > 3 else actions_pred
        return output
コード例 #3
0
ファイル: inverse_mdl.py プロジェクト: orybkin/video-gcp
 def forward(self, inputs):
     self.get_timesteps(inputs)
     actions_pred = self.action_pred(inputs.state_t0[:, :, None, None],
                                     inputs.state_t1[:, :, None, None])
     output = AttrDict()
     output.actions = torch.squeeze(actions_pred)
     return output
コード例 #4
0
ファイル: inverse_mdl.py プロジェクト: orybkin/video-gcp
    def full_seq_forward(self, inputs):
        if 'model_enc_seq' in inputs:
            enc_seq_1 = inputs.model_enc_seq[:, 1:]
            if self._hp.train_im0_enc and 'enc_traj_seq' in inputs:
                enc_seq_0 = inputs.enc_traj_seq.reshape(
                    inputs.enc_traj_seq.shape[:2] +
                    (self._hp.nz_enc, ))[:, :-1]
                enc_seq_0 = enc_seq_0[:, :enc_seq_1.shape[1]]
            else:
                enc_seq_0 = inputs.model_enc_seq[:, :-1]
        else:
            enc_seq = batch_apply(self.encoder, inputs.traj_seq)
            enc_seq_0, enc_seq_1 = enc_seq[:, :-1], enc_seq[:, 1:]

        if self.detach_enc:
            enc_seq_0 = enc_seq_0.detach()
            enc_seq_1 = enc_seq_1.detach()

        # TODO quite sure the concatenation is automatic
        actions_pred = batch_apply(self.action_pred,
                                   torch.cat([enc_seq_0, enc_seq_1], dim=2))

        output = AttrDict()
        output.actions = actions_pred  #remove_spatial(actions_pred)
        if 'actions' in inputs:
            output.action_targets = inputs.actions
            output.pad_mask = inputs.pad_mask
        return output
コード例 #5
0
    def make_traj(self, agent_data, obs, policy_out):
        traj = AttrDict()

        if not self.do_not_save_images:
            traj.images = obs['images']
        traj.states = obs['state']
        
        action_list = [action['actions'] for action in policy_out]
        traj.actions = np.stack(action_list, 0)
        
        traj.pad_mask = get_pad_mask(traj.actions.shape[0], self.max_num_actions)
        traj = pad_traj_timesteps(traj, self.max_num_actions)

        if 'robosuite_xml' in obs:
            traj.robosuite_xml = obs['robosuite_xml'][0]
        if 'robosuite_env_name' in obs:
            traj.robosuite_env_name = obs['robosuite_env_name'][0]
        if 'robosuite_full_state' in obs:
            traj.robosuite_full_state = obs['robosuite_full_state']

        # minimal state that contains all information to position entities in the env
        if 'regression_state' in obs:
            traj.regression_state = obs['regression_state']

        return traj
コード例 #6
0
ファイル: bc_policy.py プロジェクト: codeaudit/video-gcp
    def act(self,
            t=None,
            i_tr=None,
            images=None,
            state=None,
            goal=None,
            goal_image=None):
        # Note: goal_image provides n (2) images starting from the last images of the trajectory
        self.t = t
        self.i_tr = i_tr
        self.goal_image = goal_image

        if self.policy.has_image_input:
            inputs = AttrDict(I_0=self._preprocess_input(images[t]),
                              I_g=self._preprocess_input(goal_image[-1] if len(
                                  goal_image.shape) > 4 else goal_image),
                              hidden_var=self.hidden_var)
        else:
            current = state[-1:, :2]
            goal = goal[
                -1:, :
                2]  #goal_state = np.concatenate([state[-1:, -2:], state[-1:, 2:]], axis=-1)
            inputs = AttrDict(I_0=current,
                              I_g=goal,
                              hidden_var=self.hidden_var)

        actions, self.hidden_var = self.policy(inputs)

        output = AttrDict()
        output.actions = actions.data.cpu().numpy()[0]
        return output
コード例 #7
0
ファイル: prm_policy.py プロジェクト: orybkin/video-gcp
    def act(self, t=None, i_tr=None, qpos_full=None, goal=None):
        self.i_tr = i_tr
        output = AttrDict()

        if self.action_plan is None or \
                self._check_deviate(qpos_full[t, :2],
                                    self.state_plan[:, min(self.current_action, self.state_plan.shape[1]-1)]):
            self._plan(qpos_full[t], goal[t], t)
            self.current_action = 0

        done = False
        if self.current_action < self.action_plan.shape[1]:
            output.actions = self.action_plan[:, self.current_action]
        else:  # if required number of steps > planned steps
            done = True
            output.actions = np.zeros(2)
        self.current_action = self.current_action + 1
        output.done = done
        return output
コード例 #8
0
ファイル: inverse_mdl.py プロジェクト: orybkin/video-gcp
    def forward(self, inputs, full_seq=None):
        """
        forward pass at training time
        :arg full_seq: if True, outputs actions for the full sequence, expects input encodings
        """
        if full_seq is None:
            full_seq = self._hp.train_full_seq

        if full_seq:
            return self.full_seq_forward(inputs)

        t0, t1 = self.sample_offsets(inputs.norep_end_ind if 'norep_end_ind' in
                                     inputs else inputs.end_ind)
        im0 = self.index_input(inputs.traj_seq, t0)
        im1 = self.index_input(inputs.traj_seq, t1)
        if 'model_enc_seq' in inputs:
            if self._hp.train_im0_enc and 'enc_traj_seq' in inputs:
                enc_im0 = self.index_input(
                    inputs.enc_traj_seq,
                    t0).reshape(inputs.enc_traj_seq.shape[:1] +
                                (self._hp.nz_enc, ))
            else:
                enc_im0 = self.index_input(inputs.model_enc_seq, t0)
            enc_im1 = self.index_input(inputs.model_enc_seq, t1)
        else:
            assert self._hp.build_encoder  # need encoder if no encoded latents are given
            enc_im0 = self.encoder.forward(im0)[0]
            enc_im1 = self.encoder.forward(im1)[0]

        if self.detach_enc:
            enc_im0 = enc_im0.detach()
            enc_im1 = enc_im1.detach()

        selected_actions = self.index_input(
            inputs.actions, t0, aggregate=self._hp.aggregate_actions, t1=t1)
        selected_states = self.index_input(inputs.traj_seq_states, t0)

        if self._hp.pred_states:
            actions_pred, states_pred = torch.split(
                self.action_pred(enc_im0, enc_im1), 2, 1)
        else:
            actions_pred = self.action_pred(enc_im0, enc_im1)

        output = AttrDict()
        output.actions = remove_spatial(actions_pred)
        output.action_targets = selected_actions
        output.state_targets = selected_states
        output.img_t0, output.img_t1 = im0, im1

        return output
コード例 #9
0
    def act(self, t=None, i_tr=None, goal=None, qpos_full=None, images=None):
        # Note: goal_image provides n (2) images starting from the last images of the trajectory

        self.t = t
        self.i_tr = i_tr
        self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr)
        output = AttrDict()

        if self.image_plan is None or (
                t % self._hp.replan_interval
                == 0) or self.current_action >= self.image_plan.shape[0]:
            self.image_plan = self._plan(qpos_full, goal)

        output.actions = self.get_action(images[t])
        self.current_action = self.current_action + 1
        return output
コード例 #10
0
ファイル: cem_simulator.py プロジェクト: codeaudit/video-gcp
    def rollout(self, state, goal_state, samples, rollout_len, prune=False):
        """Performs one model rollout."""
        # prepare inputs
        batch_size = samples.shape[0]
        state, goal_state = state.repeat(batch_size,
                                         0), goal_state.repeat(batch_size, 0)
        input_dict = AttrDict(
            I_0=torch.tensor(state,
                             device=self._model.device,
                             dtype=torch.float32),
            I_g=torch.tensor(goal_state,
                             device=self._model.device,
                             dtype=torch.float32),
            start_ind=torch.tensor(np.zeros((batch_size, )),
                                   device=self._model.device).long(),
            end_ind=torch.tensor(np.ones((batch_size, )) * (rollout_len - 1),
                                 device=self._model.device).long(),
            z=torch.tensor(samples,
                           device=self._model.device,
                           dtype=torch.float32))
        input_dict = self._postprocess_inputs(input_dict)

        # perform rollout, collect outputs
        outputs = AttrDict()
        with self._model.val_mode():
            model_output = self._model(input_dict)
            end_ind = torch.max(model_output.end_ind,
                                torch.ones_like(model_output.end_ind))
        # self._logs.append(model_output)

        if prune:
            outputs.predictions = self._list2np(model_output.pruned_prediction)
        else:
            outputs.predictions = self._list2np(
                self._get_state_rollouts(input_dict, model_output, end_ind))

        outputs.actions = self._list2np(
            self._cap_to_length(model_output.actions, end_ind))
        outputs.states = self._list2np(
            self._cap_to_length(model_output.regressed_state, end_ind))
        outputs.latents = self._list2np(
            self._cap_to_length(input_dict.model_enc_seq, end_ind))

        return outputs
コード例 #11
0
    def act(self, t=None, i_tr=None, images=None, goal_image=None):
        # Note: goal_image provides n (2) images starting from the last images of the trajectory

        self.t = t
        self.i_tr = i_tr
        self.goal_image = goal_image
        self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr)
        output = AttrDict()

        if self.image_plan is None \
              or self.image_plan.shape[0] - 1 <= self.current_exec_step \
              or (t % self._hp.replan_interval == 0 and self.num_replans < self._hp.num_max_replans)\
              or (self._hp.replan_if_deviated and self._deviated(images[t], self.image_plan[self.current_exec_step]) and \
                  self.num_replans < self._hp.num_max_replans):
            self._plan(images[t], goal_image, t)
            self.num_replans += 1

        output.actions = self.get_action(images[t])
        self.current_exec_step = self.current_exec_step + 1
        return output
コード例 #12
0
    def act(self, t=None, i_tr=None, images=None, goal_image=None):
        """
        Triggers planning if no plan is made yet / last plan is completely executed. Then executes current plan.
        :param t: current time step in task execution
        :param i_tr: index of currently executed task
        :param images: images of so-far executed trajectory
        :param goal_image: goal-image that should be planned towards
        """
        self.t = t
        self.i_tr = i_tr
        self.goal_image = goal_image
        self.log_dir_verb = self.log_dir + '/verbose/traj{}'.format(self.i_tr)
        output = AttrDict()

        if self.image_plan is None \
              or self.image_plan.shape[0] - 1 <= self.current_exec_step \
              or (t % self._hp.replan_interval == 0 and self.num_replans < self._hp.num_max_replans):
            self._plan(images[t], goal_image, t)
            self.num_replans += 1

        output.actions = self.get_action(images[t])
        self.current_exec_step = self.current_exec_step + 1
        return output