Ejemplo n.º 1
0
    def step(self,
             obs,
             expert_qv,
             expert_action,
             apply_noise=True,
             compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {
            self.obs0:
            U.adjust_shape(self.obs0, [obs]),
            self.expert_qv:
            U.adjust_shape(self.expert_qv, [expert_qv]),
            self.expert_actions:
            U.adjust_shape(self.expert_actions, [expert_action])
        }
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action, q, None, None
Ejemplo n.º 2
0
    def act_interpolate(self, obs, states, b_states, dones):
        sess = self.model.sess
        act_model = self.model.act_model
        b_act_model = self.model_burnin.act_model
        feed_dict = {
            act_model.X: adjust_shape(act_model.X, obs),
            b_act_model.X: adjust_shape(b_act_model.X, obs),
        }

        if states is not None:
            feed_dict.update({
                act_model.S:
                adjust_shape(act_model.S, states),
                b_act_model.S:
                adjust_shape(b_act_model.S, b_states),
            })

        variables = [
            self.model.act_model.action_run, self.model.act_model.vf_run,
            self.model.act_model.state, self.model_burnin.act_model.vf_run,
            self.model_burnin.act_model.state,
            self.model.act_model.neglogp_run, self.model.act_model.latent_mean,
            self.model_burnin.act_model.latent_mean,
            self.model_burnin.act_model.action_run
        ]

        a, v, state, b_v, b_state, neglogp, lm, b_lm, b_a = sess.run(
            variables, feed_dict)

        if state.size == 0:
            state = None
        if b_state.size == 0:
            b_state = None

        return a, v, b_v, state, b_state, neglogp, lm, b_lm, b_a
Ejemplo n.º 3
0
    def step(self,
             obs,
             apply_noise=True,
             compute_Q=True,
             states=None,
             masks=None):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        if states is not None and masks is not None:
            feed_dict = {
                self.obs0: U.adjust_shape(self.obs0, [obs]),
                self.mask0: U.adjust_shape(self.mask0, [masks]),
                self.state0: U.adjust_shape(self.state0, [states])
            }
        else:
            feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}

        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action, q, None, None
Ejemplo n.º 4
0
 def prob(self, observation, a):
     sess = self.sess or tf.get_default_session()
     feed_dict = {
         self.X: adjust_shape(self.X, observation),
         self.action_ph: adjust_shape(self.action_ph, a)
     }
     return sess.run([self.pdf], feed_dict)[0]
Ejemplo n.º 5
0
 def __call__(self, obs, action):
     # with self.graph.as_default():
     print("Expert call")
     feed_dict = {self.obs0: U.adjust_shape(self.obs0, obs),
         self.actions: U.adjust_shape(self.actions, action)}
     # import IPython; IPython.embed()
     q = self.sess.run([self.critic_tf], feed_dict=feed_dict)
     print("Expert return")
     return q
Ejemplo n.º 6
0
 def get_batch_bonus_and_update(self, observation, **extra_feed):
     sess = self.sess
     feed_dict = {self.X: adjust_shape(self.X, observation)}
     for inpt_name, data in extra_feed.items():
         if inpt_name in self.__dict__.keys():
             inpt = self.__dict__[inpt_name]
             if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
                 feed_dict[inpt] = adjust_shape(inpt, data)
     result = sess.run({"bonus": self.bonus, "train": self.train}, feed_dict)
     return result["bonus"]
Ejemplo n.º 7
0
    def _evaluate(self, variables, observation, **extra_feed):
        sess = self.sess
        feed_dict = {self.X: adjust_shape(self.X, observation)}
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
Ejemplo n.º 8
0
    def _evaluate(self, variables, observation, **extra_feed):
        sess = self.sess
        feed_dict = {self.X: adjust_shape(self.X, observation)}
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
Ejemplo n.º 9
0
    def _evaluate(self, variables, observation, **extra_feed):
        sess = self.sess or tf.get_default_session()
        feed_dict = {self.X: adjust_shape(self.X, observation)}
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt,
                              tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        #how to make sess.run choose a specific output?
        return sess.run(variables, feed_dict)
Ejemplo n.º 10
0
 def _evaluate(self, variables, observation, **extra_feed):
     sess = self.sess
     feed_dict = {self.X: adjust_shape(self.X, observation)}
     for inpt_name, data in extra_feed.items():
         if inpt_name in self.__dict__.keys():
             inpt = self.__dict__[inpt_name]
             if isinstance(inpt,
                           tf.Tensor) and inpt._op.type == 'Placeholder':
                 feed_dict[inpt] = adjust_shape(inpt, data)
     #print(feed_dict)
     """
     if 'dropoutpi_keep_prob' in extra_feed.keys():
         feed_dict.update({self.dropoutpi_keep_prob: extra_feed['dropoutpi_keep_prob']})
     """
     return sess.run(variables, feed_dict)
Ejemplo n.º 11
0
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        # feed_dict={ph: [data] for ph, data in zip(self.obs0, obs)}
        # feed_dict = {self.obs0: [obs]}

        # Get the normalized obs first
        # norm_obs0 = self.sess.run(self.norm_obs0, feed_dict=feed_dict)
        # use the normalized obs for training
        # feed_dict = {ph: data for ph, data in zip(self.norm_obs0_ph, norm_obs0)}

        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action[0], q, None, None
Ejemplo n.º 12
0
    def step(self, obs, apply_noise=True, compute_Q=True):
        """Apply the policy.

        Note the noise: for DDPG if we are *deploying* it, we should probably
        set the noise to False, such as for the `--play` option.
        """
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            #assert noise.shape == action[0].shape # daniel: with my fix, both are (numenv, acdim)
            assert noise.shape == action.shape, '{} {}'.format(
                noise.shape, action.shape)
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action, q, None, None
Ejemplo n.º 13
0
    def _evaluate(self, variables, observation, **extra_feed):
        sess = self.sess
        feed_dict = {}
        if self.is_list_obs:
            for idx, X in enumerate(self.X):
                feed_dict[X] = adjust_shape(X, observation[idx])
        else:
            feed_dict[self.X] = adjust_shape(self.X, observation)
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt,
                              tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
Ejemplo n.º 14
0
    def adv_gradient(self, obs, reward, actions, old_obs):
        feed_dict = {
            self.X: adjust_shape(self.X, obs),
            self.reward: adjust_shape(self.reward, reward),
            self.action: adjust_shape(self.action, actions),
            self.old_X: adjust_shape(self.old_X, old_obs),
        }
        # For debugging purpose
        #a = self.sess.run(-self.neglogp * (self.reward - self.vf), feed_dict)
        #b = self.sess.run(self.adv_gamma * tf.square(
        #    tf.reduce_sum(self.X - self.old_X, self.axes)), feed_dict)
        #c = self.sess.run(self.loss, feed_dict)
        #print(a[64], b[64], c[64])
        #print(self.sess.run(self.grads, feed_dict)[0][0])

        return self.sess.run(self.grads, feed_dict)
Ejemplo n.º 15
0
    def step(self, obs, apply_noise=True, compute_Q=True):

        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {
            self.obs0: U.adjust_shape(self.obs0, [obs])
        }  #obs0에만 obs feed해준다

        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        #Exploration을 위해 액션에 노이즈 추가
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action, q, None, None
Ejemplo n.º 16
0
 def __call__(self, obs):
     # with self.graph.as_default():
     print("Expert Actor call")
     feed_dict = {self.obs0: U.adjust_shape(self.obs0, obs)}
     # import IPython; IPython.embed()
     action = self.sess.run([self.actor_tf], feed_dict=feed_dict)
     print("Expert Actor return")
     return action
Ejemplo n.º 17
0
    def learnt_step(self, obs):

        actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        action = self.sess.run(actor_tf, feed_dict=feed_dict)
        q = None
        action = np.clip(action, self.action_range[0], self.action_range[1])
        return action, q, None, None
Ejemplo n.º 18
0
    def _evaluate(self, variables, observation, **extra_feed):
        #print("GUGU1")
        sess = self.sess
        #print("GUGU2")
        feed_dict = {self.X: adjust_shape(self.X, observation)}
        #print("GUGU3")
        for inpt_name, data in extra_feed.items():
            #print("GUGU4")
            if inpt_name in self.__dict__.keys():
                #print("GUGU5")
                inpt = self.__dict__[inpt_name]
                #print("GUGU6")
                if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
                    #print("GUGU7")
                    feed_dict[inpt] = adjust_shape(inpt, data)
        #print("GUGU8")
        #print(variables, feed_dict)

        return sess.run(variables, feed_dict)
Ejemplo n.º 19
0
 def cal_neglogp(self, observation, a):
     """
     Tgli defined, given a, calculate its negative log prob
     """
     sess = self.sess
     feed_dict = {
         self.X: adjust_shape(self.X, observation),
         self.action_modified: a
     }
     return sess.run(self.neglogp_modified, feed_dict)
Ejemplo n.º 20
0
    def step(self, obs, compute_Q=True):
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([self.actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(self.actor_tf, feed_dict=feed_dict)
            q = None

        action = np.clip(action, self.action_range[0], self.action_range[1])

        return action, q, None, None
Ejemplo n.º 21
0
    def _evaluate(self,
                  variables,
                  observation,
                  action_mask=None,
                  **extra_feed):
        sess = self.sess

        if action_mask is None:
            action_mask = np.ones((5, len(observation[0])), dtype=np.bool)

        feed_dict = {
            self.X: adjust_shape(self.X, observation),
            self._action_mask_ph: action_mask
        }  #messing things up for other algs
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt,
                              tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
Ejemplo n.º 22
0
    def _evaluate(self, variables, observation,
                  **extra_feed):  # 480,640,3 ,1,1
        sess = self.sess
        # feed_dict = deep.copy of onservation

        if (isinstance(self.X, list)):

            for eachKeys in self.obs_space:
                count = 0
                feed_dict = {
                    eachKeys: adjust_shape(self.X[count], observation[count])
                }
                count += 1

        feed_dict = {self.X: adjust_shape(self.X, observation)}  #
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt,
                              tf.Tensor) and inpt._op.type == 'Placeholder':
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
Ejemplo n.º 23
0
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])


        return action, q, None, None
Ejemplo n.º 24
0
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None

        #print('\nACTION BEFORE NOISE:',action)
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action[0].shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])[0]
        #print('\nACTION AFTER NOISE:',action)
        #print(self.action_range[0], self.action_range[1])
        return action, q, None, None
Ejemplo n.º 25
0
    def step(self, obs, apply_noise=False):
        """Apply the policy, no noise added.

        Returns a 4-tuple, only for compatibility with other code. We just care
        about returning the action as the first argument.

        Leaving apply_noise for compatibility with `baselines/run.py`.
        """
        if not self.use_keras:
            # obs is from env, which is (B,224,224,3), but for calling our
            # model, we want it (B,x,x,3) where x is our chosen smaller dimension
            obs_new = []
            for b in range(obs.shape[0]):
                resized = cv2.resize(obs[b], (self.obs_shape[0], self.obs_shape[1]))
                obs_new.append(resized)
            obs_new = np.array(obs_new)
            assert obs_new.shape == (obs.shape[0], self.obs_shape[0], self.obs_shape[1], 4)
            obs = obs_new

        actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        action = self.sess.run(actor_tf, feed_dict=feed_dict)
        action = np.clip(action, self.action_range[0], self.action_range[1])
        return action, None, None, None
Ejemplo n.º 26
0
 def make_feed_dict(self, data):
     if isinstance(self._placeholder, list):
         adj_data = adjust_shape(self._placeholder, data)
         return dict(zip(self._placeholder, adj_data))
     else:
         return {self._placeholder: adjust_shape(self._placeholder, data)}
Ejemplo n.º 27
0
    def evaluate(self, vars, input):
        sess = get_session()
        feed_dict = {self.X: adjust_shape(self.X, input)}

        return sess.run(vars, feed_dict)
Ejemplo n.º 28
0
 def compute_Q(self, obs0_n, actions_n):
     q = self.sess.run(self.critic_with_actor_tf, feed_dict={
         self.obs0_n: U.adjust_shape(self.obs0_n, [obs0_n]),
         self.actions_n: U.adjust_shape(self.actions_n, [actions_n])
     })
     return q[0, 0]
Ejemplo n.º 29
0
 def make_feed_dict(self, data):
     return {self._placeholder: adjust_shape(self._placeholder, data)}
Ejemplo n.º 30
0
 def make_feed_dict(self, data):
     return {self._placeholder: adjust_shape(self._placeholder, data)}
Ejemplo n.º 31
0
 def evaluate(self, obs):
     eval_X = observation_placeholder(self.ob_space, batch_size=self.nenvs)
     sess = get_session()
     feed_dict = {eval_X: adjust_shape(eval_X, obs)}
     return sess.run(self.out, feed_dict)