Esempio n. 1
0
    def get_actions(self, observations):
        flat_obs = self.observation_space.flatten_n(observations)
        if self.state_include_action:
            assert self.prev_actions is not None
            all_input = np.concatenate([flat_obs, self.prev_actions], axis=-1)
        else:
            all_input = flat_obs

        probs, hidden_vec = self.f_step_prob(all_input, self.prev_hiddens)
        # print("probs: ",probs)
        # R0,z_os = self.debug(all_input, self.prev_hiddens)
        # print('R0: ',R0)
        # print("z_os: ",z_os)
        # sess = tf.get_default_session()
        # print('q*w: ',sess.run(tf.matmul(q,w)))
        # print("action_pred: ",action_pred)

        actions = special.weighted_sample_n(probs,
                                            np.arange(self.action_space.n))
        #print("actions: ",actions)
        prev_actions = self.prev_actions
        self.prev_actions = self.action_space.flatten_n(actions)
        #print("prve_actions: ",self.prev_actions)
        self.prev_hiddens = hidden_vec
        agent_info = dict(prob=probs)
        if self.state_include_action:
            agent_info["prev_action"] = np.copy(prev_actions)
        return actions, agent_info
Esempio n. 2
0
    def get_actions(self, observations):
        # Figure out which agents need valid actions
        agents_to_act = [
            i for i, j in enumerate(observations) if j != [None] * len(j)
        ]
        agents_not_to_act = [
            x for x in list(range(len(observations))) if x not in agents_to_act
        ]

        if (len(agents_to_act) == 0):
            # no agents are acting (shouldn't happen)
            return [None] * len(observations)
        else:
            # copy a valid observation into locations that have [None]
            valid_obs = next(obs for obs in observations
                             if obs != [None] * len(obs))
            observations = [
                obs if obs != [None] * len(obs) else valid_obs
                for obs in observations
            ]

        flat_obs = self.observation_space.flatten_n(observations)
        if self.state_include_action:
            assert self.prev_actions is not None
            try:
                all_input = np.concatenate([flat_obs, self.prev_actions],
                                           axis=-1)
            except ValueError:
                all_input = np.concatenate([flat_obs, self.prev_actions.T],
                                           axis=-1)

        else:
            all_input = flat_obs
        probs, hidden_vec = self.f_step_prob(all_input, self.prev_hiddens)
        actions = special.weighted_sample_n(probs,
                                            np.arange(self.action_space.n))

        #  dont update prev_actions, hidden_vec for non-acting agents
        #  replace those actions with None before returning
        prev_actions = self.prev_actions
        prev_actions_flattened = self.action_space.flatten_n(actions)
        actions = actions.tolist()
        for i in agents_not_to_act:
            hidden_vec[i] = self.prev_hiddens[i]
            prev_actions_flattened[i, :] = prev_actions[i, :]
            actions[i] = None

        self.prev_actions = prev_actions_flattened
        self.prev_hiddens = hidden_vec

        agent_info = dict(prob=probs)
        if self.state_include_action:
            agent_info["prev_action"] = np.copy(prev_actions)
        return actions, agent_info
 def get_actions(self, observations):
     flat_obs = self.observation_space.flatten_n(observations)
     if self.state_include_action:
         assert self.prev_actions is not None
         all_input = np.concatenate([flat_obs, self.prev_actions], axis=-1)
     else:
         all_input = flat_obs
     probs, hidden_vec = self.f_step_prob(all_input, self.prev_hiddens)
     actions = special.weighted_sample_n(probs,
                                         np.arange(self.action_space.n))
     prev_actions = self.prev_actions
     self.prev_actions = self.action_space.flatten_n(actions)
     self.prev_hiddens = hidden_vec
     agent_info = dict(prob=probs)
     if self.state_include_action:
         agent_info["prev_action"] = np.copy(prev_actions)
     return actions, agent_info
Esempio n. 4
0
    def get_actions(self, observations):
        flat_obs = self.observation_space.flatten_n(observations)
        if self.hardcoded_q is not None:
            q_func = self.hardcoded_q
        else:
            q_func = tf.get_default_session().run(self.q_func)
        q_vals = flat_obs.dot(q_func)

        # softmax
        qv = (1.0 / self.ent_wt) * q_vals
        qv = qv - np.max(qv, axis=1, keepdims=True)
        probs = np.exp(qv)
        probs = probs / np.sum(probs, axis=1, keepdims=True)

        actions = special.weighted_sample_n(probs,
                                            np.arange(self.action_space.n))
        agent_info = dict(prob=probs)
        return actions, agent_info
 def get_actions(self, observations):
     flat_obs = self.observation_space.flatten_n(observations)
     if self.state_include_action:
         assert self.prev_actions is not None
         all_input = np.concatenate([
             flat_obs,
             self.prev_actions
         ], axis=-1)
     else:
         all_input = flat_obs
     probs, hidden_vec = self.f_step_prob(all_input, self.prev_hiddens)
     actions = special.weighted_sample_n(probs, np.arange(self.action_space.n))
     prev_actions = self.prev_actions
     self.prev_actions = self.action_space.flatten_n(actions)
     self.prev_hiddens = hidden_vec
     agent_info = dict(prob=probs)
     if self.state_include_action:
         agent_info["prev_action"] = np.copy(prev_actions)
     return actions, agent_info
Esempio n. 6
0
 def weighted_sample_n(self, weights_matrix):
     return special.weighted_sample_n(weights_matrix, self._items_arr)