def choose_action_softmax(self, state, epsilon): q_vals = self.q_vals(state) tau = epsilon q_vals = q_vals / tau exp_q_vals = np.exp(q_vals) weights = dict() for idx, val in enumerate(exp_q_vals): weights[idx] = val action = weightedRandomChoice(weights) return action
def choose_action_softmax(self, state, epsilon): q_vals, compression_loss = self.q_vals_and_compression_loss(state) compression_loss_component = compression_loss / self.max_compression_loss tau = epsilon + compression_loss_component q_vals = q_vals / tau exp_q_vals = np.exp(q_vals) weights = dict() for idx, val in enumerate(exp_q_vals): weights[idx] = val action = weightedRandomChoice(weights) return action