def __init__(self, question, test_dict): super(QLearningTest, self).__init__(question, test_dict) self.discount = float(test_dict['discount']) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) if 'noise' in test_dict: self.grid.set_noise(float(test_dict['noise'])) if 'living_reward' in test_dict: self.grid.set_living_reward(float(test_dict['living_reward'])) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) self.env = grid_world.GridWorldEnvironment(self.grid) self.epsilon = float(test_dict['epsilon']) self.learning_rate = float(test_dict['learning_rate']) self.opts = { 'action_fn': self.env.get_possible_actions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learning_rate } num_experiences = int(test_dict['num_experiences']) max_pre_experiences = 10 self.nums_experiences_for_display = range( min(num_experiences, max_pre_experiences)) self.test_out_file = test_dict['test_out_file'] if max_pre_experiences < num_experiences: self.nums_experiences_for_display.append(num_experiences)
def __init__(self, question, test_dict): super(GridPolicyTest, self).__init__(question, test_dict) # Function in module in analysis that returns (discount, noise) self.parameter_fn = test_dict['parameter_fn'] self.question2 = test_dict.get('question2', 'false').lower() == 'true' # GridWorld specification # _ is empty space # numbers are terminal states with that value # # is a wall # S is a start state # self.grid_text = test_dict['grid'] self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) self.grid_name = test_dict['grid_name'] # Policy specification # _ policy choice not checked # N, E, S, W policy action must be north, east, south, west # self.policy = parse_grid(test_dict['policy']) # State the most probable path must visit # (x,y) for a particular location; (0,0) is bottom left # terminal for the terminal state self.path_visits = test_dict.get('path_visits', None) # State the most probable path must not visit # (x,y) for a particular location; (0,0) is bottom left # terminal for the terminal state self.path_not_visits = test_dict.get('path_not_visits', None)
def create_environment(environment_type, grid_dimension_size, reward_function, state_encoding): if environment_type == "grid-world": return grid_world.GridWorld(grid_dimension_size, reward_function, state_encoding) elif environment_type == "package-grid-world": return package_grid_world.PackageGridWorld(grid_dimension_size, reward_function)
def __init__(self, question, test_dict): super(EpsilonGreedyTest, self).__init__(question, test_dict) self.discount = float(test_dict['discount']) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) if 'noise' in test_dict: self.grid.set_noise(float(test_dict['noise'])) if 'living_reward' in test_dict: self.grid.set_living_reward(float(test_dict['living_reward'])) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) self.env = grid_world.GridWorldEnvironment(self.grid) self.epsilon = float(test_dict['epsilon']) self.learning_rate = float(test_dict['learning_rate']) self.num_experiences = int(test_dict['num_experiences']) self.num_iterations = int(test_dict['iterations']) self.opts = { 'action_fn': self.env.get_possible_actions, 'epsilon': self.epsilon, 'gamma': self.discount, 'alpha': self.learning_rate }
def __init__(self, question, test_dict): super(ValueIterationTest, self).__init__(question, test_dict) self.discount = float(test_dict['discount']) self.grid = grid_world.GridWorld(parse_grid(test_dict['grid'])) iterations = int(test_dict['value_iterations']) if 'noise' in test_dict: self.grid.set_noise(float(test_dict['noise'])) if 'living_reward' in test_dict: self.grid.set_living_reward(float(test_dict['living_reward'])) max_pre_iterations = 10 self.nums_iterations_for_display = range( min(iterations, max_pre_iterations)) self.test_out_file = test_dict['test_out_file'] if max_pre_iterations < iterations: self.nums_iterations_for_display.append(iterations)
def first_visit_mc(num_episodes): gw = grid_world.GridWorld() N = np.zeros((len(gw.STATES), len(gw.ACTIONS))) returns = np.zeros((len(gw.STATES), len(gw.ACTIONS))) for ep in range(num_episodes): states, actions, rewards = run_equiprobable(gw) N_at_start = N.copy() for t in range(len(states)): s = states[t] a = actions[t] gt = sum(rewards[t:]) if N_at_start[s, a] == N[s, a]: N[s, a] += 1 returns[s, a] += gt Q = returns / N return Q
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.w, self.h = style.WIDTH, style.HEIGHT self.model = grid_world.GridWorld() self.geometry("{}x{}".format(self.w, self.h)) self.world = Frame(self, bg=style.WORLD_BG, height=style.WORLD_HEIGHT) self.options = Frame(self, bg=style.OPTIONS_BG, height=style.OPTIONS_HEIGHT) self.world.pack(expand=True, fill="both") self.options.pack(fill="both") self.renderer = SimpleRenderer(self.model, self.world) self.time_delta_ms = 0
import numpy as np import grid_world import algorithms as alg from prettyplots import PrettyPlot import pandas as pd if __name__ == "__main__": # 1. Load Grid World environment of size 5 x 5 n_rows = 5 n_cols = 5 env = grid_world.GridWorld(n_rows, n_cols) # 4 actions are possible - (left, right, up, down) n_actions = env.n_actions # 25 states are possible since the grid is of size 5x5 n_states = env.n_states """ Get the transition probabilities index (i, k, j) of P is the probability of transitioning from state i to j if action k was taken """ P = env.probability_transition_matrix """ Get reward for each state There are 4 terminal states 2 bad terminal states that return reward of -1 2 good terminal states that return reward of 1 """ R = env.rewards