def epsilon_greedy_linear_constant(self, state, eps_ = 0.1): """ epsilon greedy exploration with constant exploration epsilon """ eps = eps_ if random() < eps or state.terminal: return Action.getRandomAction() else: actionHit_value = sum(self.get_feature_vector(state, Action.hit) * self.weights) actionStick_value = sum(self.get_feature_vector(state, Action.stick) * self.weights) action = Action.hit if actionHit_value > actionStick_value else Action.stick return action
def epsilon_greedy(self, state): """ epsilon greedy exploration """ if state.terminal: min_num_action = 0 else: min_num_action = min(self.N[state.dealer_card - 1, state.player_sum - 1, :]) eps = self.N0 / (self.N0 + min_num_action) # print (eps) if random() < eps: return Action.getRandomAction() else: action_value = np.argmax(self.Q[state.dealer_card - 1, state.player_sum - 1,:]) return Action.get_action(action_value)