Esempio n. 1
0
    maze = Maze()

    π = create_policy_s(maze)

    # Value Iteration

    Θ = 0.00000000001
    V = create_value_funtion(maze)

    iterations_number = 0

    while True:
        iterations_number += 1
        Δ = 0  
        for state in maze.all_states():
            if not maze.is_terminal(state):
                v = V[state]
    
                actions = maze._actions[state]
                p = 1 / len(actions)
    
                for action in actions:
                    maze.set_state(state)
                    r = maze.move(action)
                    calculation = p * (r + γ * V[maze.current_state()])
                    if V[state] < calculation:
                        V[state] = calculation 
                    Δ = max(Δ, abs(v - V[state]))
        if Δ < Θ:
            print("Δ:",Δ)