Beispiel #1
0
def encodeMazeProbabilistic(filename, p=1):

    maze = None
    with open(filename, 'r') as file:
        maze = file.readlines()

    maze = [line.split() for line in maze]
    maze = [[int(num) for num in line] for line in maze]
    maze = np.array(maze)

    currState = 0
    startState = -1
    endState = -1

    states = np.zeros(maze.shape)

    for i in range(len(states)):
        for j in range(len(states[i])):
            if maze[i][j] != 1:
                states[i][j] = currState
                currState += 1
            else:
                states[i][j] = -1
                continue

            if maze[i][j] == 2:
                startState = currState - 1
            if maze[i][j] == 3:
                endState = currState - 1

    mdp = MDP()

    mdp.numStates = currState
    mdp.numActions = 4
    mdp.startState = startState
    mdp.endStates = [endState]
    mdp.allStates = [None] * mdp.numStates

    for idx in range(len(mdp.allStates)):
        mdp.allStates[idx] = State(idx, mdp.numActions)

    for i in range(len(maze)):
        for j in range(len(maze[i])):

            if states[i][j] == -1:
                continue

            validStates = float(
                int(states[i - 1][j] != -1) + int(states[i + 1][j] != -1) +
                int(states[i][j - 1] != -1) + int(states[i][j + 1] != -1))
            valid = [None] * 4
            valid[0] = (states[i - 1][j] != -1)
            valid[1] = (states[i][j + 1] != -1)
            valid[2] = (states[i + 1][j] != -1)
            valid[3] = (states[i][j - 1] != -1)
            goal = [None] * 4
            goal[0] = (maze[i - 1][j] == 3)
            goal[1] = (maze[i][j + 1] == 3)
            goal[2] = (maze[i + 1][j] == 3)
            goal[3] = (maze[i][j - 1] == 3)

            if valid[0]:
                if goal[0]:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 0,
                        int(states[i - 1][j]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])
                else:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 0,
                        int(states[i - 1][j]), -1, p + (1.0 - p) / validStates
                    ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i - 1][j]), -1, (1.0 - p) / validStates
                        ])

            if valid[1]:
                if goal[1]:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 1,
                        int(states[i][j + 1]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])
                else:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 1,
                        int(states[i][j + 1]), -1, p + (1.0 - p) / validStates
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i][j + 1]), -1, (1.0 - p) / validStates
                        ])

            if valid[2]:
                if goal[2]:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 2,
                        int(states[i + 1][j]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])
                else:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 2,
                        int(states[i + 1][j]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])
                    if valid[3]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 3,
                            int(states[i + 1][j]), -1, (1.0 - p) / validStates
                        ])

            if valid[3]:
                if goal[3]:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 3,
                        int(states[i][j - 1]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])
                else:
                    mdp.allStates[int(states[i][j])].addTransition([
                        int(states[i][j]), 3,
                        int(states[i][j - 1]), -1,
                        p + ((1.0 - p) / validStates)
                    ])
                    if valid[0]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 0,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[1]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 1,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])
                    if valid[2]:
                        mdp.allStates[int(states[i][j])].addTransition([
                            int(states[i][j]), 2,
                            int(states[i][j - 1]), -1, (1.0 - p) / validStates
                        ])

    mdp.gamma = 1
    return mdp
Beispiel #2
0
def encodeMazeDeterministic(filename):

    maze = None
    with open(filename, 'r') as file:
        maze = file.readlines()

    maze = [line.split() for line in maze]
    maze = [[int(num) for num in line] for line in maze]
    maze = np.array(maze)

    currState = 0
    startState = -1
    endState = -1

    states = np.zeros(maze.shape)

    for i in range(len(states)):
        for j in range(len(states[i])):
            if maze[i][j] != 1:
                states[i][j] = currState
                currState += 1
            else:
                states[i][j] = -1
                continue

            if maze[i][j] == 2:
                startState = currState - 1
            if maze[i][j] == 3:
                endState = currState - 1

    mdp = MDP()

    mdp.numStates = currState
    mdp.numActions = 4
    mdp.startState = startState
    mdp.endStates = [endState]
    mdp.allStates = [None] * mdp.numStates

    for idx in range(len(mdp.allStates)):
        mdp.allStates[idx] = State(idx, mdp.numActions)

    for i in range(len(maze)):
        for j in range(len(maze[i])):

            if states[i][j] == -1:
                continue

            if states[i - 1][j] != -1:
                if maze[i - 1][j] == 3:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 0,
                         int(states[i - 1][j]), -1, 1])
                else:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 0,
                         int(states[i - 1][j]), -1, 1])

            if states[i][j + 1] != -1:
                if maze[i][j + 1] == 3:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 1,
                         int(states[i][j + 1]), -1, 1])
                else:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 1,
                         int(states[i][j + 1]), -1, 1])

            if states[i + 1][j] != -1:
                if maze[i + 1][j] == 3:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 2,
                         int(states[i + 1][j]), -1, 1])
                else:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 2,
                         int(states[i + 1][j]), -1, 1])

            if states[i][j - 1] != -1:
                if maze[i][j - 1] == 3:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 3,
                         int(states[i][j - 1]), -1, 1])
                else:
                    mdp.allStates[int(states[i][j])].addTransition(
                        [int(states[i][j]), 3,
                         int(states[i][j - 1]), -1, 1])

    mdp.gamma = 0.9
    return mdp