Example #1
0
class World():
    def __init__(self, N, M, r, specials, p1, p2, possibleActions):
        self.buildMap(N, M, r, specials)
        self.p1 = p1
        self.p2 = p2

        self.actions = Actions(possibleActions)

    def buildMap(self, N, M, r, specials):
        self.rewards = [[r for x in range(N)] for y in range(M)]  # [y][x]
        self.terminals = []

        for s in specials:
            self.rewards[s[1]][s[0]] = specials[s][1]
            if specials[s][0] == "F":
                self.rewards[s[1]][s[0]] = None
            elif specials[s][0] == "G":
                self.terminals.append(s)
            elif specials[s][0] == "S":
                self.start = s

    def getY(self):
        return len(self.rewards)

    def getX(self):
        return len(self.rewards[0])

    # state : tuple(x,y)
    def getReward(self, state):
        return self.rewards[state[1]][state[0]]

    def getActions(self, state):
        return self.actions.getActions() if state not in self.terminals else [
            self.actions.terminalAction
        ]

    def bellmanOperator(self, state, action):
        if self.actions.isTerminalAction(action):
            return [(0, state)]  #state is reachable for sure
        else:
            possibleAct = self.actions.getOtherPossibleActionsActions(action)
            probState = [
                (self.p2, self.getNewState(state, pa)) for pa in possibleAct
            ] + [(self.p1, self.getNewState(state, action))]
            return [s for s in probState if self.isStateReachable(s[1])]

    def getNewState(self, state, action):
        x = state[0] + action['x']
        y = state[1] + action['y']
        if x in range(self.getX()) and y in range(self.getY()):
            return x, y
        return state

    def isStateReachable(self, state):
        return True if self.rewards[state[1]][state[0]] is not None else False