コード例 #1
0
ファイル: saql.py プロジェクト: queenstina/TiRL_Leno_et_al
 def getPossibleActions(self):
     """Returns the possible actions"""
     #Cartesian product of all for all agents
     allActions = []
     for ag in range(0, self.numAg):
         allActions.append(tuple(actions.all_agent_actions()))
     #Returns all possible combined actions
     listAct = self.cartesian(allActions).tolist()
     ret = []
     for ele in listAct:
         ret.append(tuple(ele))
     return ret
コード例 #2
0
ファイル: qbias.py プロジェクト: queenstina/TiRL_Leno_et_al
    def initiateFromTL(self, state, action):
        """Reuses Q-values according to the paper description"""
        sourceStates = self.translate_state(state)

        #Defines the PITAM Mapping
        okStates, totalSimilarityValue = PITAMUtil.pitam_mappings(
            sourceStates, self.storedQTable)

        #Finds best action according to PITAM
        maxQ = -float('inf')
        bestAct = None
        for act in actions.all_agent_actions():
            q = self.calculateQValue(okStates, act, totalSimilarityValue)
            if maxQ < q:
                maxQ = q
                bestAct = act
        #Initiates Q-entries
        for act in actions.all_agent_actions():
            self.qTable[(state, act)] = maxQ
        #Only the best one receives the bias
        self.qTable[(state, bestAct)] = self.bias + maxQ

        return self.qTable[(state, action)]
コード例 #3
0
    def initiateFromTL(self, state, action):
        """Reuses Q-values according to the paper description"""

        #Preventing errors regarding last state transition
        if state == ('e', 'n', 'd'):
            return 0
        sourceStates = self.translate_state(state)
        acts = actions.all_agent_actions()

        voteActions = [0.0] * len(acts)

        useBias = False

        maxV = -float('inf')
        maxAct = None
        for sState in sourceStates:
            maxV = -float('inf')
            maxAct = None
            for act in acts:
                qV = self.storedQTable.get((sState, act), 0.0)
                if qV > maxV:
                    maxV = qV
                    maxAct = act

            if maxV > 0.0:
                voteActions[maxAct] += maxV
                useBias = True

        for act in acts:
            if (state, act) not in self.qTable:
                self.qTable[(state, act)] = 0.0
        #Bias transfer
        if useBias:
            maxAct = voteActions.index(max(voteActions))
            self.qTable[(state, maxAct)] = self.bias

        return self.qTable[(state, action)]
コード例 #4
0
    def getPossibleActions(self):
        """Returns the possible actions"""

        return actions.all_agent_actions()
コード例 #5
0
 def select_action(self, state, agentIndex):
     """ When this method is called, the agent executes an action. """
     return random.choice(actions.all_agent_actions())