Python Agent.select_action Examples

Programming Language: Python

Namespace/Package Name: dqn_agent

Class/Type: Agent

Method/Function: select_action

Examples at hotexamples.com: 2

Python Agent.select_action - 2 examples found. These are the top rated real world Python examples of dqn_agent.Agent.select_action extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Agent(30)

act(30)

step(25)

load_model(4)

train(4)

store_transition(3)

save_model(3)

get_action(3)

select_action(2)

test(2)

choose_action(2)

learn(2)

save(2)

run(2)

get_done(1)

train_dqn(1)

convert_action(1)

update_exploration(1)

train_model(1)

stock_experience(1)

update_exploration_value(1)

update_network(1)

update_replay_memory(1)

update_target_Q_network_weights(1)

update_target_model(1)

update_target_network(1)

store_experience(1)

set_exploration_scheme(1)

dqn(1)

sim_learn(1)

set_paramter(1)

get_greedy_action(1)

save_the_model(1)

Q_values(1)

retrain(1)

replay(1)

predict(1)

load_the_model(1)

load(1)

inventory(1)

initialize_neural_net_weights(1)

init_episode(1)

get_reward(1)

write_history(1)

Example #1

Show file

File: dqn_train.py Project: fleth/stock_dqn

while e < n_epochs:
    frame = 0
    loss = 0.0
    Q_max = 0.0
    env.reset()
    state_t_1, reward_t, terminal = env.observe()
    # 終了までアクションして経験を積む
    # 経験が一定以上になったら経験から学習する
    loops += 1

    while not terminal:
        state_t = state_t_1

        exploration = 0.1 if args.load and not start_replay else agent.exploration

        action_t, is_random = agent.select_action([state_t], exploration)
        env.execute_action(action_t)
        state_t_1, reward_t, terminal = env.observe()

        # 楽観的初期値法
        # 最初のK回は各アクションに対して同じ報酬を得られたことにする
        if is_optimistic_epoch(loops, optimistic_num) and not args.load:
            action_t = optimistic_action(env, loops, optimistic)
            reward_t = 1 if action_t == 0 else 1

        start_replay = False
        start_replay = agent.store_experience([state_t], action_t, reward_t,
                                              [state_t_1], terminal)

        if start_replay:
            do_replay_count += 1

Example #2

Show file

File: dqn_test.py Project: fleth/stock_dqn

start_date = utils.format(args.start_date)
end_date = utils.format(args.end_date)

env = Env(start_date, end_date)
agent = Agent(env.actions, len(env.columns), env.state_size, args.memory_size)
agent.load_model()

terminal = False
total_frame = 0
max_step = 0
frame = 0
state_t, reward_t, terminal = env.observe()
while not terminal:

    action_t, is_random = agent.select_action([state_t], 0.0)
    env.execute_action(action_t)

    state_t, reward_t, terminal = env.observe()

    frame += 1
    total_frame += 1
    if max_step < env.step:
        max_step = env.step

    print("frame: %s, total_frame: %s, terminal: %s, action: %s, reward: %s" %
          (frame, total_frame, terminal, action_t, reward_t))

backend.clear_session()
print("max_step: %s, score: %s" % (max_step, env.score))