Ejemplo n.º 1
0
 def choose_action(self, s, visual_s, evaluation=False):
     if self.use_epsilon and np.random.uniform(
     ) < self.expl_expt_mng.get_esp(self.episode, evaluation=evaluation):
         a = np.random.randint(0, self.a_counts, len(s))
     else:
         a = self._get_action(s, visual_s)[-1].numpy()
     return sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 2
0
 def choose_inference_action(self, s, visual_s):
     a = self.sess.run(self.action, feed_dict={
         self.pl_visual_s: visual_s,
         self.pl_s: s,
         self.sigma_offset: np.full(self.a_counts, 0.01)
     })
     return a if self.action_type == 'continuous' else sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 3
0
Archivo: dqn.py Proyecto: kasimte/RLs
 def choose_inference_action(self, s, visual_s):
     return sth.int2action_index(
         self.sess.run(self.action,
                       feed_dict={
                           self.pl_visual_s: visual_s,
                           self.pl_s: s
                       }), self.a_dim_or_list)
Ejemplo n.º 4
0
 def step(self, actions):
     actions = deepcopy(actions)
     for i, k in enumerate(actions.keys()):
         if self.is_continuous[i]:
             pass
         else:
             actions[k] = sth.int2action_index(actions[k], self.discrete_action_dim_list[i])
     return self._env.step(actions)
Ejemplo n.º 5
0
 def choose_action(self, s, visual_s):
     if self.action_type == 'continuous':
         return self._get_action(s, visual_s).numpy()
     else:
         if np.random.uniform() < self.epsilon:
             a = np.random.randint(0, self.a_counts, len(s))
         else:
             a = self._get_action(s, visual_s).numpy()
         return sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 6
0
 def choose_action(self, s, visual_s):
     if np.random.uniform() < self.epsilon:
         a = np.random.randint(0, self.a_counts, len(s))
     else:
         a = self.sess.run(self.action, feed_dict={
             self.pl_visual_s: visual_s,
             self.pl_s: s
         })
     return sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 7
0
 def choose_action(self, s, visual_s, evaluation=False):
     self._check_agents(s)
     a = [
         model(s_).numpy() for model, s_ in zip(
             self.cem_models, np.split(s, self.populations, axis=0))
     ]
     if self.is_continuous:
         a = np.vstack(a)
     else:
         a = np.hstack(a)
         a = sth.int2action_index(a, self.a_dim_or_list)
     return a
Ejemplo n.º 8
0
Archivo: a2c.py Proyecto: kasimte/RLs
 def choose_action(self, s, visual_s):
     if self.action_type == 'continuous':
         return self.sess.run(self.action, feed_dict={
             self.pl_visual_s: visual_s,
             self.pl_s: s,
             self.sigma_offset: np.full(self.a_counts, 0.01)
         })
     else:
         if np.random.uniform() < self.epsilon:
             a = np.random.randint(0, self.a_counts, len(s))
         else:
             a = self.sess.run(self.action, feed_dict={
                 self.pl_visual_s: visual_s,
                 self.pl_s: s
             })
         return sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 9
0
 def step(self, actions):
     actions = np.array(actions)
     if not self.is_continuous:
         actions = sth.int2action_index(actions, self.a_dim_or_list)
         if self.action_type == 'discrete':
             actions = actions.reshape(-1,)
         elif self.action_type == 'Tuple(Discrete)':
             actions = actions.reshape(self.n, -1).tolist()
     results = Asyn.op_func(self.envs, Asyn.OP.STEP, actions)
     obs, reward, done, info = [np.asarray(e) for e in zip(*results)]
     reward = reward.astype('float32')
     dones_index = np.where(done)[0]
     if dones_index.shape[0] > 0:
         correct_new_obs = self.partial_reset(obs, dones_index)
     else:
         correct_new_obs = obs
     if self.obs_type == 'visual':
         obs = obs[:, np.newaxis, ...]
         correct_new_obs = correct_new_obs[:, np.newaxis, ...]
     return obs, reward, done, info, correct_new_obs
Ejemplo n.º 10
0
 def choose_action(self, adj, x, visual_s, evaluation=False):
     a = self._get_action(adj, x, visual_s, evaluation).numpy()
     return a if self.is_continuous else sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 11
0
 def choose_inference_action(self, s, visual_s):
     a = self._get_action(s, visual_s)[0].numpy()
     return a if self.action_type == 'continuous' else sth.int2action_index(
         a, self.a_dim_or_list)
Ejemplo n.º 12
0
 def choose_inference_action(self, s, visual_s):
     return sth.int2action_index(
         self._get_action(s, visual_s).numpy(), self.a_dim_or_list)
Ejemplo n.º 13
0
 def choose_action(self, s, visual_s):
     if self.action_type == 'continuous':
         return self._get_action(s, visual_s).numpy()
     else:
         a = self._get_action(s, visual_s).numpy()
         return sth.int2action_index(a, self.a_dim_or_list)
Ejemplo n.º 14
0
 def choose_action(self, s, visual_s):
     if self.use_epsilon and np.random.uniform() < self.epsilon:
         a = np.random.randint(0, self.a_counts, len(s))
     else:
         a = self._get_action(s, visual_s)[-1].numpy()
     return sth.int2action_index(a, self.a_dim_or_list)