Exemple #1
0
 def update(self, last_state, state):
   '''Updates/learns feature weights based on a transition from one state to another.  We learn from the transition from the previous state to this one, now that we can see what happened after our last decision and how it turned out.'''
   
   #Calculate the correction term:
   current_state_value = qLearn.state_value(state, self)
   previous_state_value = qLearn.state_value(self.previous_game_state, self)
   reward = qLearn.transition_reward(self.previous_game_state, state, self)
   correction = (reward + current_state_value) - previous_state_value
   print "current state value: \t", current_state_value
   print "previous state value: \t", previous_state_value
   print "correction: \t\t", correction
   #Calculate the feature values for the current state:
   feature_values = qLearn.state_feature_values(state, self)
   #What if we calculated for previous state and took the difference?
   feature_values_old = qLearn.state_feature_values(self.previous_game_state, self)
   
   #Update learned weights:
   for f_name in feature_values.keys():
     f_val = feature_values[f_name]
     f_val_old = feature_values_old[f_name]
     f_val_delta = f_val - f_val_old
     #Theoretically correct:
     self.weights[f_name] += self.alpha * correction * f_val
     #Desperate hack, which at least sets the signs correctly on the feature weights:
     # self.weights[f_name] += self.alpha * correction * f_val_delta
   self.save_weights_to_db()
   
Exemple #2
0
 def getPolicy(self, state):
   '''Get the best action to take in this state.  If there are no legal actions, return None.'''
   print "In getPolicy.  "
   
   
   #Return None if there are no legal actions:
   if not len(self.getLegalActions(state)) > 0:
     return None
   
   #Calculate the best action(s)
   chosen_action = None
   max_val = None
   best_actions = []
   for action in self.getLegalActions(state):
     #Set what the state would look like if we took this action:
     successor_state = self.getSuccessor(state, action)
     #Evaluate how good that state would be PLUS THE MOTHERFUCKING REWARD THAT HE WOULD GET FOR GOING THERE
     print "\n\n\t\t(Calculating state value for successor state that comes after action: "+str(action)+")"
     successor_val = qLearn.state_value(successor_state, self)
     transition_reward = qLearn.transition_reward(state, successor_state, self)
     val = transition_reward + successor_val
     print "\t\tIf action is: "+str(action)
     print "\t\tthen transition reward will be: "+str(transition_reward)
     print "\t\tand new state value will be: "+str(successor_val)
     print "\t\tfor a total worth of: "+str(val)
     
     if val == max_val:
       best_actions.append(action)
     elif val > max_val:
       best_actions = [action]
       max_val = val
       
   #Choose a random action from the best actions
   chosen_action = random.choice(best_actions)
   return chosen_action