コード例 #1
0
 def __init__(self, question, test_dict):
     super(QLearningTest, self).__init__(question, test_dict)
     self.discount = float(test_dict['discount'])
     self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
     if 'noise' in test_dict:
         self.grid.set_noise(float(test_dict['noise']))
     if 'living_reward' in test_dict:
         self.grid.set_living_reward(float(test_dict['living_reward']))
     self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
     self.env = grid_world.GridWorldEnvironment(self.grid)
     self.epsilon = float(test_dict['epsilon'])
     self.learning_rate = float(test_dict['learning_rate'])
     self.opts = {
         'action_fn': self.env.get_possible_actions,
         'epsilon': self.epsilon,
         'gamma': self.discount,
         'alpha': self.learning_rate
     }
     num_experiences = int(test_dict['num_experiences'])
     max_pre_experiences = 10
     self.nums_experiences_for_display = range(
         min(num_experiences, max_pre_experiences))
     self.test_out_file = test_dict['test_out_file']
     if max_pre_experiences < num_experiences:
         self.nums_experiences_for_display.append(num_experiences)
コード例 #2
0
    def __init__(self, question, test_dict):
        super(GridPolicyTest, self).__init__(question, test_dict)

        # Function in module in analysis that returns (discount, noise)
        self.parameter_fn = test_dict['parameter_fn']
        self.question2 = test_dict.get('question2', 'false').lower() == 'true'

        # GridWorld specification
        #    _ is empty space
        #    numbers are terminal states with that value
        #    # is a wall
        #    S is a start state
        #
        self.grid_text = test_dict['grid']
        self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
        self.grid_name = test_dict['grid_name']

        # Policy specification
        #    _                  policy choice not checked
        #    N, E, S, W policy action must be north, east, south, west
        #
        self.policy = parse_grid(test_dict['policy'])

        # State the most probable path must visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.path_visits = test_dict.get('path_visits', None)

        # State the most probable path must not visit
        #    (x,y) for a particular location; (0,0) is bottom left
        #    terminal for the terminal state
        self.path_not_visits = test_dict.get('path_not_visits', None)
コード例 #3
0
def create_environment(environment_type, grid_dimension_size, reward_function,
                       state_encoding):
    if environment_type == "grid-world":
        return grid_world.GridWorld(grid_dimension_size, reward_function,
                                    state_encoding)
    elif environment_type == "package-grid-world":
        return package_grid_world.PackageGridWorld(grid_dimension_size,
                                                   reward_function)
コード例 #4
0
    def __init__(self, question, test_dict):
        super(EpsilonGreedyTest, self).__init__(question, test_dict)
        self.discount = float(test_dict['discount'])
        self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
        if 'noise' in test_dict:
            self.grid.set_noise(float(test_dict['noise']))
        if 'living_reward' in test_dict:
            self.grid.set_living_reward(float(test_dict['living_reward']))

        self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
        self.env = grid_world.GridWorldEnvironment(self.grid)
        self.epsilon = float(test_dict['epsilon'])
        self.learning_rate = float(test_dict['learning_rate'])
        self.num_experiences = int(test_dict['num_experiences'])
        self.num_iterations = int(test_dict['iterations'])
        self.opts = {
            'action_fn': self.env.get_possible_actions,
            'epsilon': self.epsilon,
            'gamma': self.discount,
            'alpha': self.learning_rate
        }
コード例 #5
0
 def __init__(self, question, test_dict):
     super(ValueIterationTest, self).__init__(question, test_dict)
     self.discount = float(test_dict['discount'])
     self.grid = grid_world.GridWorld(parse_grid(test_dict['grid']))
     iterations = int(test_dict['value_iterations'])
     if 'noise' in test_dict:
         self.grid.set_noise(float(test_dict['noise']))
     if 'living_reward' in test_dict:
         self.grid.set_living_reward(float(test_dict['living_reward']))
     max_pre_iterations = 10
     self.nums_iterations_for_display = range(
         min(iterations, max_pre_iterations))
     self.test_out_file = test_dict['test_out_file']
     if max_pre_iterations < iterations:
         self.nums_iterations_for_display.append(iterations)
コード例 #6
0
def first_visit_mc(num_episodes):
    gw = grid_world.GridWorld()
    N = np.zeros((len(gw.STATES), len(gw.ACTIONS)))
    returns = np.zeros((len(gw.STATES), len(gw.ACTIONS)))
    for ep in range(num_episodes):
        states, actions, rewards = run_equiprobable(gw)
        N_at_start = N.copy()
        for t in range(len(states)):
            s = states[t]
            a = actions[t]
            gt = sum(rewards[t:])
            if N_at_start[s, a] == N[s, a]:
                N[s, a] += 1
                returns[s, a] += gt

    Q = returns / N
    return Q
コード例 #7
0
ファイル: main.py プロジェクト: mullevik/gridword
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.w, self.h = style.WIDTH, style.HEIGHT
        self.model = grid_world.GridWorld()

        self.geometry("{}x{}".format(self.w, self.h))
        self.world = Frame(self, bg=style.WORLD_BG, height=style.WORLD_HEIGHT)
        self.options = Frame(self,
                             bg=style.OPTIONS_BG,
                             height=style.OPTIONS_HEIGHT)

        self.world.pack(expand=True, fill="both")
        self.options.pack(fill="both")

        self.renderer = SimpleRenderer(self.model, self.world)
        self.time_delta_ms = 0
コード例 #8
0
ファイル: main.py プロジェクト: IssamLaradji/MachineLearning
import numpy as np
import grid_world
import algorithms as alg

from prettyplots import PrettyPlot
import pandas as pd

if __name__ == "__main__":
    # 1. Load Grid World environment of size 5 x 5
    n_rows = 5
    n_cols = 5
    env = grid_world.GridWorld(n_rows, n_cols)

    # 4 actions are possible - (left, right, up, down)
    n_actions = env.n_actions

    # 25 states are possible since the grid is of size 5x5
    n_states = env.n_states
    """
	Get the transition probabilities
		index (i, k, j) of P is the probability of transitioning from 
		state i to j if action k was taken
	"""
    P = env.probability_transition_matrix
    """
	Get reward for each state 
		There are 4 terminal states 
			2 bad terminal states that return reward of -1
			2 good terminal states that return reward of 1 
	"""
    R = env.rewards