コード例 #1
0
ファイル: smoothness.py プロジェクト: Sephora-M/graph-rl
def obstacles_room(plotV=True, num_sample=100, computeV=False):
    height = 10
    width = 10
    reward_location = 18
    initial_state = None  # np.array([25])
    obstacles_location = [
        12, 13, 22, 23, 35, 36, 45, 46, 62, 63, 72, 73, 67, 77
    ]
    walls_location = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 9,
        19, 29, 39, 49, 59, 69, 79, 89, 99, 90, 91, 92, 93, 94, 95, 96, 97, 98,
        99
    ]
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                    walls_location, obstacles_location,
                                    initial_state,
                                    obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample)

    V = None
    if computeV:
        V = value_iteration(maze.domain.graph, reward_location,
                            obstacles_location, walls_location,
                            obstacles_transition_probability)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('graphs/obstacleRoom_trueV.pdf')
        plt.close()

    return maze, V
コード例 #2
0
ファイル: smoothness.py プロジェクト: Sephora-M/graph-rl
def oneroom(plotV=True, num_sample=100, computeV=False):
    height = 10
    width = 10
    reward_location = 9
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = []
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                    walls_location, obstacles_location,
                                    initial_state,
                                    obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample)

    V = None
    if computeV:
        V = value_iteration(maze.domain.graph, reward_location,
                            obstacles_location, walls_location,
                            obstacles_transition_probability)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('plots/one_room_trueV.pdf')
        plt.close()

    return maze, V
コード例 #3
0
def example_grid_maze(plotV=True):
    height = 10
    width = 10
    reward_location = 9
    initial_state = None  # np.array([25])
    obstacles_location = [14, 13, 24, 23, 29, 28, 39,
                          38]  # range(height*width)
    walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79]
    obstacles_transition_probability = .2
    maze = LearningMazeDomain(height,
                              width,
                              reward_location,
                              walls_location,
                              obstacles_location,
                              initial_state,
                              obstacles_transition_probability,
                              num_sample=2000)

    def value_iteration(G, finish_state, obstacles, walls):
        V = [0] * G.N
        R = [0] * G.N
        R[finish_state] = 100
        gamma = 0.9
        success_prob = [1] * G.N
        for i in obstacles:
            success_prob[i] = obstacles_transition_probability
        for i in walls:
            success_prob[i] = .0
        epsilon = .0001
        diff = 100
        iterations = 0
        while diff > epsilon:
            iterations = iterations + 1
            diff = 0
            for s in xrange(G.N):
                if s == finish_state:
                    max_a = success_prob[s] * R[s]
                else:
                    max_a = float('-inf')
                    for s_prime in G.W.getcol(s).nonzero()[0]:
                        new_v = success_prob[s] * (R[s] + gamma * V[s_prime])
                        if new_v > max_a:
                            max_a = new_v
                diff = diff + abs(V[s] - max_a)
                V[s] = max_a
        print "number of iterations in Value Iteration:"
        print iterations
        return V

    V = value_iteration(maze.domain.graph, reward_location, obstacles_location,
                        walls_location)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('graphs/simpleMaze_trueV.pdf')
        plt.close()

    return maze, V
コード例 #4
0
ファイル: PVF_simulation.py プロジェクト: SongFGH/Graph-RL
def main():
    for discount in DISCOUNT:
        for dimension in DIMENSION:
            for grid_size in GRID_SIZES:
                print('>>>>>>>>>>>>>>>>>>>>>>>>>> Simulation grid of size : ' +
                      str(grid_size) + 'x' + str(grid_size))
                print(
                    '>>>>>>>>>>>>>>>>>>>>>>>>>> dimension basis function : ' +
                    str(dimension))
                print('>>>>>>>>>>>>>>>>>>>>>>>>>> discount factor : ' +
                      str(discount))
                height = width = grid_size
                num_states = grid_size * grid_size
                reward_location = grid_size - 1
                obstacles_location = []
                walls_location = []
                maze = LearningMazeDomain(height,
                                          width,
                                          reward_location,
                                          walls_location,
                                          obstacles_location,
                                          num_sample=num_samples)

                pvf_all_results = {}

                for k in xrange(10):
                    pvf_num_steps, pvf_learned_policy, pvf_samples, pvf_distances = maze.learn_proto_values_basis(
                        num_basis=dimension,
                        explore=0,
                        discount=discount,
                        max_steps=500,
                        max_iterations=200)

                    pvf_all_steps_to_goal, pvf_all_samples, pvf_all_cumulative_rewards = simulate(
                        num_states, reward_location, walls_location, maze,
                        pvf_learned_policy)
                    pvf_all_results[k] = {
                        'steps_to_goal': pvf_all_steps_to_goal,
                        'samples': pvf_all_samples,
                        'cumul_rewards': pvf_all_cumulative_rewards,
                        'learning_distances': pvf_distances
                    }

                plot_results(pvf_all_results, grid_size, reward_location,
                             dimension, discount, num_samples)
コード例 #5
0
def low_stretch_tree_maze(num_sample=100, length_sample=100):
    reward_location = [15]
    obstacles_location = []
    obstacles_transition_probability = .2
    domain = domains.SymmetricMazeDomain(rewards_locations=reward_location,
                                              obstacles_location=obstacles_location)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample)

    return maze
コード例 #6
0
ファイル: PVF_simulation.py プロジェクト: Sephora-M/graph-rl
def tworooms_nowalls(num_sample=3000):
    height = 10
    width = 10
    reward_location = 18
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = [40, 41, 42, 43, 44, 46, 47, 48, 49]
    obstacles_transition_probability = .2
    maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state,
                              obstacles_transition_probability, num_sample=num_sample)
    return maze
コード例 #7
0
def example_grid_maze():
    height = 10
    width = 10
    reward_location = 9
    initial_state = None  # np.array([25])
    obstacles_location = [14, 13, 24, 23, 29, 28, 39, 38]  # range(height*width)
    walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79]
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                         walls_location, obstacles_location, initial_state,
                                         obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=2000)

    return maze
コード例 #8
0
def oneroom(plotV=True, num_sample=100, length_sample=100, computeV=False):
    height = 10
    width = 10
    reward_location = 9
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = []
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                         walls_location, obstacles_location, initial_state,
                                         obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample)

    return maze
コード例 #9
0
ファイル: PVF_simulation.py プロジェクト: Sephora-M/graph-rl
def tworooms(num_sample=3000):
    height = 10
    width = 10
    reward_location = 18
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
                      10, 20, 30, 40, 50, 60, 70, 80, 90,
                      9, 19, 29, 39, 49, 59, 69, 79, 89, 99,
                      90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
                      41, 42, 43, 44, 46, 47, 48, 49]
    obstacles_transition_probability = .2
    maze = LearningMazeDomain(height, width, reward_location, walls_location, obstacles_location, initial_state,
                              obstacles_transition_probability, num_sample=num_sample)

    return maze
コード例 #10
0
def obstacles_room(plotV=True, num_sample=100, length_sample=100, computeV=False):
    height = 10
    width = 10
    reward_location = 18
    initial_state = None  # np.array([25])
    obstacles_location = [12, 13, 22, 23,
                          35, 36, 45, 46,
                          62, 63, 72, 73,
                          67, 77]
    walls_location = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
                      10, 20, 30, 40, 50, 60, 70, 80, 90,
                      9, 19, 29, 39, 49, 59, 69, 79, 89, 99,
                      90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                         walls_location, obstacles_location, initial_state,
                                         obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample)

    return maze
コード例 #11
0
ファイル: smoothness.py プロジェクト: Sephora-M/graph-rl
def low_stretch_tree_maze(plotV=True, num_sample=100, computeV=False):
    reward_location = [15]
    obstacles_location = []
    obstacles_transition_probability = .2
    domain = domains.SymmetricMazeDomain(rewards_locations=reward_location,
                                         obstacles_location=obstacles_location)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample)

    V = None
    if computeV:
        V = value_iteration(maze.domain.graph, reward_location[0],
                            obstacles_location, [],
                            obstacles_transition_probability)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('graphs/lowStretchTree_trueV.pdf')
        plt.close()

    return maze, V
コード例 #12
0
def threerooms(num_sample=5000, length_sample=100):
    height = 50
    width = 100
    reward_location = 198
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = []
    walls_location.extend(range(100))
    walls_location.extend(range(4900, 5000))
    walls_location.extend(range(0, 5000, 100))
    walls_location.extend(range(99, 5000, 100))
    walls_location.extend(range(1600, 1670))
    walls_location.extend(range(1680, 1700))
    walls_location.extend(range(3200, 3220))
    walls_location.extend(range(3230, 3300))

    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                         walls_location, obstacles_location, initial_state,
                                         obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample, length_sample=length_sample)


    return maze
コード例 #13
0
ファイル: smoothness.py プロジェクト: Sephora-M/graph-rl
def threerooms(plotV=True, num_sample=5000, computeV=False):
    height = 50
    width = 100
    reward_location = 198
    initial_state = None  # np.array([25])
    obstacles_location = []  # range(height*width)
    walls_location = []
    walls_location.extend(range(100))
    walls_location.extend(range(4900, 5000))
    walls_location.extend(range(0, 5000, 100))
    walls_location.extend(range(99, 5000, 100))
    walls_location.extend(range(1600, 1670))
    walls_location.extend(range(1680, 1700))
    walls_location.extend(range(3200, 3220))
    walls_location.extend(range(3230, 3300))

    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                    walls_location, obstacles_location,
                                    initial_state,
                                    obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=num_sample)

    V = None
    if computeV:
        V = value_iteration(maze.domain.graph, reward_location,
                            obstacles_location, walls_location,
                            obstacles_transition_probability)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('graphs/threeRooms_trueV.pdf')
        plt.close()

    return maze, V
コード例 #14
0
ファイル: smoothness.py プロジェクト: Sephora-M/graph-rl
def example_grid_maze(plotV=True):
    height = 10
    width = 10
    reward_location = 9
    initial_state = None  # np.array([25])
    obstacles_location = [14, 13, 24, 23, 29, 28, 39,
                          38]  # range(height*width)
    walls_location = [50, 51, 52, 53, 54, 55, 56, 74, 75, 76, 77, 78, 79]
    obstacles_transition_probability = .2
    domain = domains.GridMazeDomain(height, width, reward_location,
                                    walls_location, obstacles_location,
                                    initial_state,
                                    obstacles_transition_probability)
    maze = LearningMazeDomain(domain=domain, num_sample=2000)
    V = value_iteration(maze.domain.graph, reward_location, obstacles_location,
                        walls_location, obstacles_transition_probability)

    if plotV:
        fig, ax = plt.subplots(1, 1)
        maze.domain.graph.plot_signal(np.array(V), vertex_size=60, ax=ax)
        plt.savefig('graphs/simpleMaze_trueV.pdf')
        plt.close()

    return maze, V