Beispiel #1
0
e0 = 0.90
e1 = 0.10
decay_fun = sch_exp_decay
cos_ann = True
ann_cyc = 5

schedule = Schedule(t0,
                    t1,
                    e0,
                    e1,
                    decay_fun,
                    cosine_annealing=cos_ann,
                    annealing_cycles=ann_cyc)

# Policy
policy = EpsilonGreedyPolicy(schedule=schedule, value_function=Q)

# Reward Function
reward_fun = rf_info2d_pos

# Action Pre/Post-Processing Action
act_fun = act_disc2cont

# Agent
lr = 1e-4
gamma = 0.99
doubleQ = True  # Run doubleQ-DQN sampling from Q_target and bootstraping from Q
rb = True
rb_max_size = 1e6
rb_batch_size = 64
tau = 0.1