コード例 #1
0
    def __init__(self, seed=12345, alpha=0.9, epsilon=0.1):

        self.functions = Agent_Utilities()
        self.alpha = alpha
        self.epsilon = epsilon
        self.qTable = {}

        super(QLearning, self).__init__(seed=seed)
        self.activatedTL = False
コード例 #2
0
ファイル: dooq.py プロジェクト: queenstina/TiRL_Leno_et_al
 def __init__(self, seed=12345, numAg=3, gamma=0.9, T=0.4):
     self.sortFriends = None
     self.gamma = gamma
     self.T = T
     self.functions = Agent_Utilities()
     self.qTable = {}
     self.policy = {}
     self.friends = None
     super(DOOQ, self).__init__(seed=seed, numAg=numAg)
コード例 #3
0
ファイル: maql.py プロジェクト: queenstina/TiRL_Leno_et_al
 def __init__(self, seed=12345, numAg=3, alpha=0.1, gamma=0.9, T=0.4):
     self.sortFriends = True
     self.alpha = alpha
     self.gamma = gamma
     self.T = T
     self.functions = Agent_Utilities()
     self.qTable = {}
     self.friends = None
     self.lastAction = None
     super(MAQL, self).__init__(seed=seed, numAg=numAg)
コード例 #4
0
    def __init__(self,
                 seed=12345,
                 numAg=3,
                 alpha=0.1,
                 decayRate=0.9,
                 initialEpsilon=0.5,
                 epsilonDecay=0.999):

        self.functions = Agent_Utilities()
        self.stateActionTrace = {}
        self.alpha = alpha
        self.epsilon = initialEpsilon
        self.epsilonDecay = epsilonDecay
        self.decayRate = 0.9
        super(QLearning, self).__init__(seed=seed, numAg=numAg)
コード例 #5
0
ファイル: ofqlearning.py プロジェクト: amoliu/Curriculum_HFO
    def __init__(self, seed=12345, alpha=0.5, epsilon=0.1, initQ=0):

        self.functions = Agent_Utilities()
        self.alpha = alpha
        self.epsilon = epsilon
        self.qTables = {}
        self.initQ = initQ
        super(OFQLearning, self).__init__(seed=seed)
コード例 #6
0
ファイル: saql.py プロジェクト: queenstina/TiRL_Leno_et_al
class SAQL(Agent):

    alpha = None
    gamma = None
    T = None
    qTable = None
    functions = None
    lastStateAction = None
    lastState = None

    def __init__(self, seed=12345, numAg=3, alpha=0.1, gamma=0.9, T=0.4):
        self.sortFriends = True
        self.alpha = alpha
        self.gamma = gamma
        self.T = T
        self.functions = Agent_Utilities()
        self.qTable = {}
        self.lastStateAction = None
        self.lastState = None

        super(SAQL, self).__init__(seed=seed, numAg=numAg)

    def get_proc_state(self, agentIndex):
        """ Returns a processed version of the current state """
        if self.lastState is None:
            state = []
            for i in range(self.numAg):
                state.append(self.environment.get_state(i, self.sortFriends))
            self.lastState = tuple(state)
        return self.lastState

    def initiate_agent_refs(self, numAg, seed):
        """ Create the references to be executed by experiment.py """
        agents = []
        for i in range(numAg):
            agents.append(self)

        return agents

    def select_action(self, state, agentIndex):
        """ When this method is called, the agent executes an action. """
        #Computes the best action for each agent
        if self.lastStateAction is None:
            self.compute_action(state)
        #Returns the best action
        return self.lastStateAction[agentIndex]

    def compute_action(self, state):
        """Computes the best action for all agents"""
        #If the agent is exploring, the exploration is strategy is called
        if self.exploring:
            self.lastStateAction = self.exp_strategy(state)
        #Else the best action is picked
        else:
            self.lastStateAction = self.max_Q_action(state)
        #return self.max_Q_action(state)

    def max_Q_action(self, state):
        """Returns the action that corresponds to the highest Q-value"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring)
        return a

    def get_max_Q_value(self, state):
        """Returns the maximum Q value for a state"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring)
        return v

    def exp_strategy(self, state):
        """Returns the result of the exploration strategy"""
        useBoltz = False
        allActions = self.getPossibleActions()
        if useBoltz:
            #Boltzmann exploration strategy
            valueActions = []
            sumActions = 0

            for action in allActions:
                qValue = self.qTable.get((state, action), 0.0)
                vBoltz = math.pow(math.e, qValue / self.T)
                valueActions.append(vBoltz)
                sumActions += vBoltz

            probAct = []
            for index in range(len(allActions)):
                probAct.append(valueActions[index] / sumActions)

            rndVal = random.random()

            sumProbs = 0
            i = -1

            while sumProbs <= rndVal:
                i = i + 1
                sumProbs += probAct[i]

            return allActions[i]
        else:
            prob = random.random()
            if prob <= 0.1:
                return random.choice(allActions)
            return self.max_Q_action(state)

    def get_Q_size(self, agentIndex):
        """Returns the size of the QTable"""
        return len(self.qTable)

    def observe_reward(self, state, action, statePrime, reward, agentIndex):
        """Performs the standard Q-Learning Update"""
        if self.exploring and self.lastState:
            state = self.lastState
            qValue = self.qTable.get((self.lastState, self.lastStateAction),
                                     None)
            self.lastState = None
            statePrime = self.get_proc_state(agentIndex)
            V = self.get_max_Q_value(statePrime)
            if qValue is None:
                newQ = reward
            else:
                newQ = qValue + (self.alpha) * (reward + self.gamma * V -
                                                qValue)
            self.qTable[(state, self.lastStateAction)] = newQ
        self.lastState = None
        self.lastStateAction = None

    def getPossibleActions(self):
        """Returns the possible actions"""
        #Cartesian product of all for all agents
        allActions = []
        for ag in range(0, self.numAg):
            allActions.append(tuple(actions.all_agent_actions()))
        #Returns all possible combined actions
        listAct = self.cartesian(allActions).tolist()
        ret = []
        for ele in listAct:
            ret.append(tuple(ele))
        return ret

    def cartesian(self, arrays, out=None):
        """
        Generate a cartesian product of input arrays.
        http://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays
        """
        import numpy as np
        arrays = [np.asarray(x) for x in arrays]
        dtype = arrays[0].dtype

        n = np.prod([x.size for x in arrays])
        if out is None:
            out = np.zeros([n, len(arrays)], dtype=dtype)

        m = n / arrays[0].size
        out[:, 0] = np.repeat(arrays[0], m)
        if arrays[1:]:
            self.cartesian(arrays[1:], out=out[0:m, 1:])
            for j in xrange(1, arrays[0].size):
                out[j * m:(j + 1) * m, 1:] = out[0:m, 1:]
        return out
コード例 #7
0
class QLearning(Agent):

    alpha = None

    epsilon = None

    functions = None
    policy = None

    qTable = None

    initQ = None  #Value to initiate Q-table

    def __init__(self, seed=12345, alpha=0.9, epsilon=0.1):

        self.functions = Agent_Utilities()
        self.alpha = alpha
        self.epsilon = epsilon
        self.qTable = {}

        super(QLearning, self).__init__(seed=seed)
        self.activatedTL = False

    def select_action(self, state):
        """ When this method is called, the agent executes an action based on its Q-table """

        #If exploring, an exploration strategy is executed
        if self.exploring:
            action = self.exp_strategy(state)
        #Else the best action is selected
        else:
            #action =  self.exp_strategy(state)
            action = self.policy_check(state)

        return action

    def policy_check(self, state):
        """In case a fixed action is included in the policy cache, that action is returned
        else, the maxQ action is returned"""
        return self.max_Q_action(state)

    def max_Q_action(self, state):
        """Returns the action that corresponds to the highest Q-value"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring,
                                                     self)
        return a

    def get_max_Q_value(self, state):
        """Returns the maximum Q value for a state"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring,
                                                     self)
        return v

    def exp_strategy(self, state):
        """Returns the result of the exploration strategy"""
        useBoltz = False
        allActions = self.getPossibleActions()
        if useBoltz:
            #Boltzmann exploration strategy
            valueActions = []
            sumActions = 0

            for action in allActions:
                qValue = self.readQTable(state, action)
                vBoltz = math.pow(math.e, qValue / self.T)
                valueActions.append(vBoltz)
                sumActions += vBoltz

            probAct = []
            for index in range(len(allActions)):
                probAct.append(valueActions[index] / sumActions)

            rndVal = random.random()

            sumProbs = 0
            i = -1

            while sumProbs <= rndVal:
                i = i + 1
                sumProbs += probAct[i]

            return allActions[i]
        else:
            prob = random.random()
            if prob <= self.epsilon:
                return random.choice(allActions)
            return self.max_Q_action(state)

    def get_Q_size(self):
        """Returns the size of the QTable"""
        return len(self.qTable)

    def observe_reward(self, state, action, statePrime, reward):
        """Performs the standard Q-Learning Update"""
        if self.exploring:
            qValue = self.readQTable(state, action)
            V = self.get_max_Q_value(statePrime)
            newQ = qValue + self.alpha * (reward + self.gamma * V - qValue)
            self.qTable[(state, action)] = newQ

#        if self.exploring:
#            qValue= self.readQTable(state,action)
#            V = self.get_max_Q_value(statePrime)
#            TDError = reward + self.gamma * V - qValue
#            self.stateActionTrace[(state, action)] = self.stateActionTrace.get((state, action), 0) + 1
#            for stateAction in self.stateActionTrace:
#                # update update ALL Q values and eligibility trace values
#                newQ = qValue + self.alpha * TDError * self.stateActionTrace.get(stateAction, 0)
#                self.qTable[stateAction] = newQ
#                # update eligibility trace Function for state and action
#                self.stateActionTrace[stateAction] = self.gamma * self.decayRate * self.stateActionTrace.get(stateAction, 0)
#            if self.environment.is_terminal_state():
#                    self.stateActionTrace = {}
#                    self.epsilon = self.epsilon #* self.epsilonDecay

#        if self.exploring:
#            qValue= self.readQTable(state,action)
#            V = self.get_max_Q_value(statePrime)
#            newQ = qValue + self.alpha * (reward + self.gamma * V - qValue)
#            self.qTable[(state,action)] = newQ
#

    def getPossibleActions(self):
        """Returns the possible actions"""

        return actions.all_agent_actions()
コード例 #8
0
ファイル: dooq.py プロジェクト: queenstina/TiRL_Leno_et_al
class DOOQ(Agent):

    gamma = None
    T = None
    qTable = None
    functions = None
    policy = None
    friends = None

    def __init__(self, seed=12345, numAg=3, gamma=0.9, T=0.4):
        self.sortFriends = None
        self.gamma = gamma
        self.T = T
        self.functions = Agent_Utilities()
        self.qTable = {}
        self.policy = {}
        self.friends = None
        super(DOOQ, self).__init__(seed=seed, numAg=numAg)

    def initiate_agent_refs(self, numAg, seed):
        """ Create the references to be executed by experiment.py """
        agents = []
        for i in range(numAg):
            agents.append(copy.deepcopy(self))
            agents[i].sortFriends = True

        return agents

    def get_proc_state(self, agentIndex):
        """ Returns a processed version of the current state """
        return self.environment.get_state(agentIndex, self.sortFriends)

    def select_action(self, state, agentIndex):
        """ When this method is called, the agent executes an action. """

        if self.blind_state(state):
            return random.choice(self.getPossibleActions())
        #Computes the best action for each agent
        if self.exploring:
            action = self.exp_strategy(state)
        #Else the best action is picked
        else:
            action = self.policy_check(state)

        return action

    def blind_state(self, state):
        """Returns if the agent can see anything"""
        for i in range(len(state)):
            if state[i] != float('inf'):
                return False
        return True

    def policy_check(self, state):
        """In case a fixed action is included in the policy cache, that action is returned
        else, the maxQ action is returned"""
        if state in self.policy:
            return self.policy[state]
        return self.max_Q_action(state)

    def max_Q_action(self, state):
        """Returns the action that corresponds to the highest Q-value"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring)
        return a

    def get_max_Q_value(self, state):
        """Returns the maximum Q value for a state"""
        actions = self.getPossibleActions()
        v, a = self.functions.get_max_Q_value_action(self.qTable, state,
                                                     actions, self.exploring)
        return v

    def exp_strategy(self, state):
        """Returns the result of the exploration strategy"""
        useBoltz = False
        allActions = self.getPossibleActions()
        if useBoltz:
            #Boltzmann exploration strategy
            valueActions = []
            sumActions = 0

            for action in allActions:
                qValue = self.qTable.get((state, action), 0.0)
                vBoltz = math.pow(math.e, qValue / self.T)
                valueActions.append(vBoltz)
                sumActions += vBoltz

            probAct = []
            for index in range(len(allActions)):
                probAct.append(valueActions[index] / sumActions)

            rndVal = random.random()

            sumProbs = 0
            i = -1

            while sumProbs <= rndVal:
                i = i + 1
                sumProbs += probAct[i]

            return allActions[i]
        else:
            prob = random.random()
            if prob <= 0.1:
                return random.choice(allActions)
            return self.max_Q_action(state)

    def get_Q_size(self, agentIndex):
        """Returns the size of the QTable"""
        return len(self.qTable)

    def observe_reward(self, state, action, statePrime, reward, agentIndex):
        """Performs the standard Q-Learning Update"""
        if self.exploring:
            qValue = self.qTable.get((state, action), None)
            V = self.get_max_Q_value(statePrime)
            if qValue is None:
                newQ = reward
            else:
                newQ = max(qValue, reward + self.gamma * V)
                if newQ > qValue:
                    #Check if the policy needs to be updated
                    if self.functions.check_various_max_Q(
                            self.qTable, state, self.getPossibleActions()):
                        self.policy[state] = action
                    else:
                        if state in self.policy:
                            del self.policy[state]
            self.qTable[(state, action)] = newQ

    def getPossibleActions(self):
        """Returns the possible actions"""

        return actions.all_agent_actions()