import json import numpy as np from utils import Mapper, LinearLearning, Saver, PendulumEnv parameters_file = "experiments/exp_1_linear_learning.json" with open(parameters_file) as j: parameters = json.loads(j.read()) mapping = Mapper() env = PendulumEnv() saver = Saver() state_map, state_reverse_map = mapping.get_state_map( parameters["step_state"], parameters["decimal_state"]) action_map, action_reverse_map = mapping.get_action_map( parameters["step_action"], parameters["decimal_action"]) steps = [] rewards = [] final_mean_reward = [] for i in range(parameters["n_simulations"]): lr_learner = LowRankLearning(env=env, state_set=parameters["state_set"], state_map=state_map, action_map=action_map, state_reverse_map=state_reverse_map, action_reverse_map=action_reverse_map, decimal_state=parameters["decimal_state"], decimal_action=parameters["decimal_action"], step_state=parameters["step_state"],
saver = Saver() step = .1 decimal = 1 episodes = 30000 max_steps = 100 alpha_q = .1 alpha_lr = .005 gamma = .9 epsilon = .2 k = 5 lambda_l = .1 lambda_r = .1 state_map, state_reverse_map = mapping.get_state_map(step, decimal) action_map, action_reverse_map = mapping.get_action_map(step, decimal) n_states = len(state_map) n_actions = len(action_map) q_learner = QLearning(env=env, state_map=state_map, action_map=action_map, state_reverse_map=state_reverse_map, action_reverse_map=action_reverse_map, n_states=n_states, n_actions=n_actions, decimal_state=decimal, decimal_action=decimal, step_state=step, step_action=step,