コード例 #1
0
    def step(self, action):
      #  self.forward_dynamics(action)
      #  comvel = self.get_body_comvel("torso")
      #  forward_reward = self.goal_direction*comvel[0]
      #  lb, ub = self.action_bounds
      #  scaling = (ub - lb) * 0.5
      #  ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
      #  contact_cost = 0.5 * 1e-3 * np.sum(
      #      np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
      #  survive_reward = 0.05
      #  reward = forward_reward - ctrl_cost - contact_cost + survive_reward
      #  state = self._state
      #  notdone = np.isfinite(state).all() \
      #      and state[2] >= 0.2 and state[2] <= 1.0
      #  done = not notdone
      #  ob = self.get_current_obs()
      #  return Step(ob, float(reward), done)
      #  obs = self.get_current_obs()

      #  selected_arm_mean = self.arm_means[action]
      #  reward = float(np.random.random() < selected_arm_mean)
      #  self.ts += 1
      #  done = self.ts >= self.max_path_length
      #  state = np.zeros((2))
      #  state[0] = reward
      #  state[1] = 1

      #  return Step(state, reward, done)
      ps = self.Ps[self.state, action]
      next_state = special.weighted_sample(ps, np.arange(self.n_states))
      reward_mean = self.Rs[self.state, action]
      reward = reward_mean + np.random.normal() * 1 / np.sqrt(self.tau)
      self.ts += 1
      self.state = next_state
      done = self.ts >= self.max_path_length
      state = np.zeros((2 + self.n_states))
      state[self.state] = 1
      state[self.n_states] = reward
      state[self.n_states + 1] = done
      return Step(state, reward, done)
コード例 #2
0
 def get_action(self, observation):
     if self.state_include_action:
         if self.prev_action is None:
             prev_action = np.zeros((self.action_space.flat_dim,))
         else:
             prev_action = self.action_space.flatten(self.prev_action)
         all_input = np.concatenate([
             self.observation_space.flatten(observation),
             prev_action
         ])
     else:
         all_input = self.observation_space.flatten(observation)
         # should not be used
         prev_action = np.nan
     probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])]
     action = special.weighted_sample(probs, range(self.action_space.n))
     self.prev_action = action
     self.prev_hidden = hidden_vec
     agent_info = dict(prob=probs)
     if self.state_include_action:
         agent_info["prev_action"] = prev_action
     return action, agent_info
コード例 #3
0
ファイル: discrete.py プロジェクト: yaohuic/accel_rl
 def weighted_sample(self, weights):
     return special.weighted_sample(weights, self._items_arr)
コード例 #4
0
ファイル: discrete.py プロジェクト: AtousaTorabi/rllab
 def weighted_sample(self, weights):
     return special.weighted_sample(weights, xrange(self.n))
コード例 #5
0
 def weighted_sample(self, weights):
     return special.weighted_sample(weights, range(self.n))
コード例 #6
0
 def weighted_sample(self, weights):
     if config.TF_NN_SETTRACE:
         ipdb.set_trace()
     return special.weighted_sample(weights, range(self.n))
コード例 #7
0
ファイル: product.py プロジェクト: thuang/voltvar_package
 def weighted_sample_normalized(self, weights):
     return [
         (special.weighted_sample(weights[s:s + n], range(n)) - b) / b
         for s, n, b in zip(self._slice, self._comp_dim, self._comp_base)
     ]
コード例 #8
0
ファイル: product.py プロジェクト: thuang/voltvar_package
 def weighted_sample(self, weights):
     return [
         special.weighted_sample(weights[s:s + n], range(n))
         for s, n, b in zip(self._slice, self._comp_dim, self._comp_base)
     ]