Esempio n. 1
0
 def epsilon_greedy_linear_constant(self, state, eps_ = 0.1):        
     """ 
     epsilon greedy exploration with constant exploration epsilon
     """
     eps = eps_
     if random() < eps or state.terminal:
         return Action.getRandomAction()
     else:
         actionHit_value = sum(self.get_feature_vector(state, Action.hit) * self.weights)
         actionStick_value = sum(self.get_feature_vector(state, Action.stick) * self.weights)
         action = Action.hit if actionHit_value > actionStick_value else Action.stick
         return action
Esempio n. 2
0
    def epsilon_greedy(self, state):   
        """ 
        epsilon greedy exploration
        """
        if state.terminal:
            min_num_action = 0
        else:
            min_num_action = min(self.N[state.dealer_card - 1, state.player_sum - 1, :]) 
                
        eps = self.N0 / (self.N0 + min_num_action)

#        print (eps)
        if random() < eps:
            return Action.getRandomAction()
        else:
            action_value = np.argmax(self.Q[state.dealer_card - 1, state.player_sum - 1,:])
            return Action.get_action(action_value)