Beispiel #1
0
    def getReward(self, oldstate, newstate):
        old = ConfigTable.getIndexForDirtyCellState(oldstate)
        new = ConfigTable.getCellIndex(newstate)
        reward = self.rtable[old, new]
        #print(f"state = ({state.row}, {state.column}), idx = {self.index}, reward = {reward}")

        return reward
Beispiel #2
0
 def __init__(self):
     self.reward = RewardsTable()
     self.qTable = QTable(self.reward.rtable)
     self.policy = Policy()
     self.noOfStepsList = []
     ConfigTable.createTableIds()
     self.log = []
Beispiel #3
0
 def explore(self):
     self.noExplore += 1
     oldState = copy.copy(self.state)
     reward = self.actionAndReward()
     self.qTable.update(self.newState, oldState, reward)
     if reward == ConfigRewards.cell_dirty:
         ConfigTable.dirtyCellIndexIncrement(self.newState)
     self.checkIfFinishCell(reward)
Beispiel #4
0
 def update(self, newState, oldState, reward):
     alpha = Hyperparam.learning_rate
     gamma = Hyperparam.discount_factor
     _state = ConfigTable.getIndexForDirtyCellState(oldState)
     _action = ConfigTable.getCellIndex(newState)
     q_old = self.Q_Table[_state, _action]
     self.getMaxQAction(newState)
     self.q_max = self.bestQ
     q_new = q_old + alpha * (reward + gamma * self.q_max - q_old)
     self.Q_Table[_state, _action] = q_new
Beispiel #5
0
    def getNextQValue(self, state, nextState):
        if nextState.row < 0 or nextState.row >= ConfigTable.rows:
            return

        if nextState.column < 0 or nextState.column >= ConfigTable.columns:
            return 

        old = ConfigTable.getIndexForDirtyCellState(state)
        new = ConfigTable.getCellIndex(nextState)
        Q = self.Q_Table[old, new]
        if Q > self.bestQ:
            self.bestQ = Q
            self.bestState = copy.copy(nextState)
Beispiel #6
0
    def exploit(self):
        oldState = copy.copy(self.state)
        newState = self.qTable.getBestQvalue(self.state)
        if oldState == newState:
            # No Q value found to go to a new location, so explore to a new one instead
            self.explore()
            return

        self.noExploit += 1
        #print("Q value found, move to next cell")
        reward = self.reward.getReward(self.state, newState)
        self.qTable.update(newState, oldState, reward)
        if reward == ConfigRewards.cell_dirty:
            ConfigTable.dirtyCellIndexIncrement(newState)
        self.checkIfFinishCell(reward)
        self.state = copy.copy(newState)
Beispiel #7
0
    def isInaccessible(self, state):
        cellIdx = ConfigTable.getCellIndex(state)
        if cellIdx == self.inaccessible1Cell or cellIdx == self.inaccessible2Cell or cellIdx == self.inaccessible3Cell:
            return True

        return False