Python DQN.anneal_epsilon Examples

Programming Language: Python

Namespace/Package Name: dqn

Class/Type: DQN

Method/Function: anneal_epsilon

Examples at hotexamples.com: 2

Python DQN.anneal_epsilon - 2 examples found. These are the top rated real world Python examples of dqn.DQN.anneal_epsilon extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQN(30)

act(18)

action(5)

_init_tf(2)

add(2)

anneal_epsilon(2)

__init__(1)

act_greedily(1)

action_choose(1)

action_values(1)

add_to_memory(1)

adjust_epsilon(1)

append_data(1)

Example #1

Show file

File: h_dqn.py Project: JshenCCC/task_scheduling_dqn_pytorch

class HDQN(object):
    def __init__(self):
        self.ac = DQN(ACNet)
        self.ts = DQN(TSNet)
        self.ac_agent, self.ts_agent = self.ac.eval_net, self.ts.eval_net

    def learn(self, env, max_episodes=MAX_EPISODES):
        for episode in range(0, max_episodes):
            print(episode)
            state = env.init()
            done = False
            while not done:
                goal = self.ac_agent.choose_goal(state)
                # low-level learn action according to sub-goal
                best_action, r_in_his, train_limit = [], [], 500
                while len(r_in_his) < train_limit:
                    action = self.ts_agent.choose_action(state, [goal])
                    r_in = Env.intrinsic_reward(state, goal, action)
                    if r_in_his == [] or r_in > max(r_in_his):
                        best_action = action
                    r_in_his.append(r_in)
                    self.ts.memory.push(state, action, r_in)
                    self.ts.train()
                # high-level learn sub-goal
                action = best_action
                next_state, reward, done, env_state = env.step(state, action)
                print(reward, env_state)
                if not done:
                    self.ac.memory.push(state, action, reward, next_state)
                    self.ac.train()
                state = next_state
            # anneal exploration probability
            self.ac.anneal_epsilon()
            self.ts.anneal_epsilon()
        # save model
        self.save_model()

    def save_model(self, ac_path="model/ac_agent", ts_path="model/ts_agent"):
        torch.save(self.ac_agent, ac_path)
        torch.save(self.ts_agent, ts_path)

    def load_model(self, ac_path="model/ac_agent", ts_path="model/ts_agent"):
        self.ac_agent = torch.load(ac_path)
        self.ts_agent = torch.load(ts_path)

Example #2

Show file

File: dqn_test.py Project: epignatelli/human-level-control-through-deep-reinforcement-learning

def test_epsilon_annealing():
    gym_env = gym.make("Pong-v4")
    env = DMEnvFromGym(gym_env)
    in_shape = (4, 84, 84)
    hparams = HParams(
        replay_start=10,
        final_exploration_frame=20,
        initial_exploration=10.0,
        final_exploration=20.0,
    )
    agent = DQN(env.action_spec().num_values, in_shape, hparams, 0)

    agent.iteration = 10
    assert round(agent.anneal_epsilon(), 1) == 10.0

    agent.iteration = 20
    assert round(agent.anneal_epsilon(), 1) == 20.0

    agent.iteration = 15
    assert round(agent.anneal_epsilon(), 1) == 15.0