Esempio n. 1
0
import json
import numpy as np
from utils import Mapper, LinearLearning, Saver, PendulumEnv

parameters_file = "experiments/exp_1_linear_learning.json"
with open(parameters_file) as j:
    parameters = json.loads(j.read())

mapping = Mapper()
env = PendulumEnv()
saver = Saver()

state_map, state_reverse_map = mapping.get_state_map(
    parameters["step_state"], parameters["decimal_state"])
action_map, action_reverse_map = mapping.get_action_map(
    parameters["step_action"], parameters["decimal_action"])

steps = []
rewards = []
final_mean_reward = []

for i in range(parameters["n_simulations"]):
    lr_learner = LowRankLearning(env=env,
                                 state_set=parameters["state_set"],
                                 state_map=state_map,
                                 action_map=action_map,
                                 state_reverse_map=state_reverse_map,
                                 action_reverse_map=action_reverse_map,
                                 decimal_state=parameters["decimal_state"],
                                 decimal_action=parameters["decimal_action"],
                                 step_state=parameters["step_state"],
Esempio n. 2
0
saver = Saver()

step = .1
decimal = 1
episodes = 30000
max_steps = 100
alpha_q = .1
alpha_lr = .005
gamma = .9
epsilon = .2
k = 5
lambda_l = .1
lambda_r = .1

state_map, state_reverse_map = mapping.get_state_map(step, decimal)
action_map, action_reverse_map = mapping.get_action_map(step, decimal)

n_states = len(state_map)
n_actions = len(action_map)

q_learner = QLearning(env=env,
                      state_map=state_map,
                      action_map=action_map,
                      state_reverse_map=state_reverse_map,
                      action_reverse_map=action_reverse_map,
                      n_states=n_states,
                      n_actions=n_actions,
                      decimal_state=decimal,
                      decimal_action=decimal,
                      step_state=step,
                      step_action=step,