def agent_step(self, reward, observation):
        lastState = self.lastObservation.intArray
        lastAction = self.lastAction.intArray
        lastStateId = SamplingUtility.getStateId(lastState)
        lastActionIdx = self.all_allowed_actions[lastStateId].index(tuple(lastAction))
        if reward == self.Bad_Action_Penalty:
            self.all_allowed_actions[lastStateId].pop(lastActionIdx)
            self.Q_value_function[lastStateId].pop(lastActionIdx)
            newAction = self.egreedy(self.lastObservation.intArray)
            returnAction = Action()
            returnAction.intArray = newAction
            self.lastAction = copy.deepcopy(returnAction)
            return returnAction

        newState = observation.intArray
        newAction = self.egreedy(newState)
        if type(newAction) is tuple:
            newAction = list(newAction)
        Q_sa = self.Q_value_function[lastStateId][lastActionIdx]
        Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(newState)][
                          self.all_allowed_actions[SamplingUtility.getStateId(newState)].index(tuple(newAction))]
        new_Q_sa = Q_sa + self.sarsa_stepsize * (reward + self.sarsa_gamma * Q_sprime_aprime - Q_sa)
        if not self.policyFrozen:
            self.Q_value_function[SamplingUtility.getStateId(lastState)][
            self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa
        returnAction = Action()
        returnAction.intArray = newAction
        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)
        return returnAction
Esempio n. 2
0
 def agent_end(self, reward):
     lastState = self.lastObservation.intArray
     lastAction = self.lastAction.intArray
     Q_sa = self.Q_value_function[SamplingUtility.getStateId(lastState)][
            self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))]
     new_Q_sa = Q_sa + self.sarsa_stepsize * (reward - Q_sa)
     if not self.policyFrozen:
         self.Q_value_function[SamplingUtility.getStateId(lastState)][
         self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa
Esempio n. 3
0
    def random_player(self,state):
		#find the actions for the state
        stateId = SamplingUtility.getStateId(state)
        #print 'state '+ str(state)[1:-1]
        #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object. 
        self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize)
            #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]
            
        index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1)
        return self.all_allowed_actions[stateId][index]
Esempio n. 4
0
    def agent_step(self, reward, observation):

        lastState = self.lastObservation.intArray
        lastAction = self.lastAction.intArray
        lastStateId = SamplingUtility.getStateId(lastState)
        lastActionIdx = self.all_allowed_actions[lastStateId].index(
            tuple(lastAction))
        if reward == self.Bad_Action_Penalty:
            self.all_allowed_actions[lastStateId].pop(lastActionIdx)
            self.Q_value_function[lastStateId].pop(lastActionIdx)
            newAction = self.egreedy(self.lastObservation.intArray)
            returnAction = Action()
            returnAction.intArray = newAction
            self.lastAction = copy.deepcopy(returnAction)
            return returnAction

        newState = observation.intArray
        newAction = self.egreedy(
            newState)  #for random player, egreedy=random_player

        if type(newAction) is tuple:
            newAction = list(newAction)
            #print newAction
        #we kept the same names from sarsa because it was a bit convenient ---> test test sarsa again, just replace max(blah,blah), with Q_sprime_aprime and uncomment the code below
        Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(
            newState)][self.all_allowed_actions[SamplingUtility.getStateId(
                newState)].index(tuple(newAction))]
        #------>comment lines 133-139 when you want random player
        Q_sa = self.Q_value_function[lastStateId][lastActionIdx]
        new_Q_sa = Q_sa + self.stepsize * (
            reward + self.discount * Q_sprime_aprime - Q_sa)

        if not self.policyFrozen:
            self.Q_value_function[SamplingUtility.getStateId(lastState)][
                self.all_allowed_actions[SamplingUtility.getStateId(
                    lastState)].index(tuple(lastAction))] = new_Q_sa
        #------>comment lines<-----
        returnAction = Action()
        returnAction.intArray = newAction
        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)
        return returnAction
Esempio n. 5
0
 def egreedy(self, state):
     #find the actions for the state
     stateId = SamplingUtility.getStateId(state)
     #print 'state '+ str(state)[1:-1]
     if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId):
         self.all_allowed_actions[stateId] = InvasiveUtility.getActions(state, self.nbrReaches, self.habitatSize)
         self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]
     if not self.exploringFrozen and self.randGenerator.random() < self.sarsa_epsilon:
         index = self.randGenerator.randint(0, len(self.all_allowed_actions[stateId]) - 1)
     else:
         index = self.Q_value_function[stateId].index(max(self.Q_value_function[stateId]))
     #print 'a '+str(self.all_allowed_actions[stateId][index])[1:-1]
     return self.all_allowed_actions[stateId][index]
Esempio n. 6
0
    def random_player(self, state):
        #find the actions for the state
        stateId = SamplingUtility.getStateId(state)
        #print 'state '+ str(state)[1:-1]
        #if len(self.Q_value_function) == 0 or not self.Q_value_function.has_key(stateId): #len() : Return the length (the number of items) of an object.
        self.all_allowed_actions[stateId] = InvasiveUtility.getActions(
            state, self.nbrReaches, self.habitatSize)
        #self.Q_value_function[stateId] = len(self.all_allowed_actions[stateId]) * [0.0]

        index = self.randGenerator.randint(
            0,
            len(self.all_allowed_actions[stateId]) - 1)
        return self.all_allowed_actions[stateId][index]
Esempio n. 7
0
    def agent_step(self, reward, observation):
		
        lastState = self.lastObservation.intArray
        lastAction = self.lastAction.intArray
        lastStateId = SamplingUtility.getStateId(lastState)
        lastActionIdx = self.all_allowed_actions[lastStateId].index(tuple(lastAction))
        if reward == self.Bad_Action_Penalty:
            self.all_allowed_actions[lastStateId].pop(lastActionIdx)
            self.Q_value_function[lastStateId].pop(lastActionIdx)
            newAction = self.egreedy(self.lastObservation.intArray)
            returnAction = Action()
            returnAction.intArray = newAction
            self.lastAction = copy.deepcopy(returnAction)
            return returnAction

        newState = observation.intArray
        newAction = self.egreedy(newState) #for random player, egreedy=random_player
        
        if type(newAction) is tuple:
            newAction = list(newAction)
            #print newAction
        #we kept the same names from sarsa because it was a bit convenient ---> test test sarsa again, just replace max(blah,blah), with Q_sprime_aprime and uncomment the code below
        Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(newState)][
                          self.all_allowed_actions[SamplingUtility.getStateId(newState)].index(tuple(newAction))]   
        #------>comment lines 133-139 when you want random player
        Q_sa = self.Q_value_function[lastStateId][lastActionIdx]
        new_Q_sa = Q_sa + self.stepsize * (reward + self.discount *Q_sprime_aprime - Q_sa)
        
        if not self.policyFrozen:
            self.Q_value_function[SamplingUtility.getStateId(lastState)][
            self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa
        #------>comment lines<-----
        returnAction = Action()
        returnAction.intArray = newAction
        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)
        return returnAction
Esempio n. 8
0
    def agent_step(self, reward, observation):
		
        lastState = self.lastObservation.intArray
        lastAction = self.lastAction.intArray
        lastStateId = SamplingUtility.getStateId(lastState)
        lastActionIdx = self.all_allowed_actions[lastStateId].index(tuple(lastAction))
        if reward == self.Bad_Action_Penalty:
            self.all_allowed_actions[lastStateId].pop(lastActionIdx)
            self.Q_value_function[lastStateId].pop(lastActionIdx)
            newAction = self.egreedy(self.lastObservation.intArray)
            print InvasiveUtility.get_budget_cost_actions(lastAction, lastState, self.actionParameterObj)
            returnAction = Action()
            returnAction.intArray = newAction
            self.lastAction = copy.deepcopy(returnAction)
            return returnAction

        newState = observation.intArray
        newAction = self.egreedy(newState)
        if type(newAction) is tuple:
            newAction = list(newAction)
        Q_sa = self.Q_value_function[lastStateId][lastActionIdx]
        #print "THE Q_sa IS : "
        #print Q_sa
        Q_sprime_aprime = self.Q_value_function[SamplingUtility.getStateId(newState)][
                          self.all_allowed_actions[SamplingUtility.getStateId(newState)].index(tuple(newAction))]
        new_Q_sa = Q_sa + self.sarsa_stepsize * (reward + self.sarsa_gamma * Q_sprime_aprime - Q_sa)
        #print "THE new_Q_sa IS : "
        #print new_Q_sa
        if not self.policyFrozen:
            self.Q_value_function[SamplingUtility.getStateId(lastState)][
            self.all_allowed_actions[SamplingUtility.getStateId(lastState)].index(tuple(lastAction))] = new_Q_sa
        returnAction = Action()
        returnAction.intArray = newAction
        self.lastAction = copy.deepcopy(returnAction)
        self.lastObservation = copy.deepcopy(observation)
        return returnAction