Python DeepQAgent Examples

Programming Language: Python

Namespace/Package Name: agent

Class/Type: DeepQAgent

Examples at hotexamples.com: 6

Python DeepQAgent - 6 examples found. These are the top rated real world Python examples of agent.DeepQAgent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DeepQAgent(6)

learn(2)

train(2)

act(1)

get_action(1)

load_models(1)

memorize(1)

observe(1)

pick_action(1)

play(1)

save(1)

save_models(1)

store_transition(1)

swap_board(1)

Example #1

Show file

def main():
    env = gym.make("LunarLander-v2")

    timestamp = '{:%Y-%m-%d-%H:%M}'.format(datetime.datetime.now())
    o_dir = "LunarLander-v2/{}/models".format(timestamp)
    if not os.path.exists(o_dir):
        os.makedirs(o_dir)

    nof_episodes = 500
    # 8 values in [0, 1]
    state_size = env.observation_space.shape[0]
    # 0, 1, 2, 3
    action_size = env.action_space.n
    agent = DeepQAgent(state_size, action_size, model=2)
    batch_size = 32

    for episode in range(nof_episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])

        done = False
        t = 0
        episode_reward = 0
        # Iterate over the timesteps
        while not done:
            env.render()

            # Instruct the agent to choose an action based on the current state of the environment
            # This may be a random action depending on the value of the exploration_rate(epsilon)
            action = agent.act(state)
            # Execute said action
            next_state, reward, done, _ = env.step(action)
            episode_reward += reward
            next_state = np.reshape(next_state, [1, state_size])

            agent.memorize(state, action, reward, next_state, done)
            state = next_state
            if done:
                print("episode: {}/{}, time: {}, total_reward: {}".format(
                    episode, nof_episodes - 1, t, episode_reward))
            t += 1
        if len(agent.memory) / batch_size > 1:
            agent.train(batch_size)
        # Save model after training
        if episode % batch_size == 1:
            agent.save(o_dir + "/model_" + str(episode) + ".hdf5")

Example #2

Show file

def main(_):
    if not tf.test.is_gpu_available() and FLAGS.use_gpu:
        raise Exception("use_gpu flag is true when no GPUs are available")

    assert FLAGS.checkpoint_dir != '', 'Checkpoint directory must be specified'

    if not FLAGS.to_train and not os.path.isfile(
            os.path.join(FLAGS.checkpoint_dir, 'ckpt.index')):
        raise Exception(
            "Checkpoint directory must contain a trained model to do testing")

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction),
        allow_growth=True)

    sess_config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=FLAGS.allow_soft_placement,
        gpu_options=gpu_options)

    with tf.Session(config=sess_config) as sess:
        config = get_config(FLAGS)

        env = AtariEnvironment(config)

        agent = DeepQAgent(env, sess, config)

        if config.to_train:
            agent.train()
        else:
            agent.play()

Example #3

Show file

import numpy as np
import time
import actor
from env.puzzle import PAD
from agent import DeepQAgent

shape = [5,6]
moves = 100
board = PAD(shape=shape, max_moves=moves, show=False)
print('board set up')

print('setting up agent')
agent = DeepQAgent(board,
    n_moves=moves,
    batch_size=64,
    memory=128,
    sample_mode='e_greedy',
    reward_type='combo')
print('agent set up')

print('Max moves: ', agent.n_moves)
agent.observe()
actor.train_loop(agent)

## Replace board so we can watch some play
board = PAD(shape=shape,
    max_moves=moves,
    show=True,
    sleep_time=0.05)
agent.swap_board(board)
actor.run_loop(agent)

Example #4

Show file

from agent import TabularQAgent, DeepQAgent
import numpy as np
import gym
import matplotlib.pyplot as plt
from utils import plot_learning_curve

env = gym.make('CartPole-v1')
n_actions = env.action_space.n
n_states = env.observation_space.shape

A = DeepQAgent(lr=0.001,
               gamma=0.9,
               eps_max=1.0,
               eps_min=0.01,
               eps_dec=0.9999995,
               n_actions=n_actions,
               n_states=n_states,
               input_dims=n_states)

n_episodes = 10000
win_pct_list = []
scores = []
eps_history = []

for i in range(n_episodes):
    done = False
    score = 0
    s = env.reset()
    done = False

    while not done:

Example #5

Show file

def main():
    print("Start Atari games")
    environment_name = "PongNoFrameskip-v4"
    env = make_env(environment_name)
    best_score = -np.inf
    load_checkpoint = False
    n_games = 500
    lr = 0.0001
    epsilon = 1
    gamma = 0.99
    input_dims = env.observation_space.shape
    n_actions = env.action_space.n
    eps_min = 0.01
    eps_dec = 5e-7
    replace = 1000
    algo = None
    mem_size = 50000
    batch_size = 32
    chkpt_dir = "models/"
    algo = "DeepQAgent"
    agent = DeepQAgent(lr, n_actions, input_dims, chkpt_dir, epsilon, gamma,
                       mem_size, batch_size, eps_min, eps_dec, replace, algo,
                       environment_name)
    if load_checkpoint:
        agent.load_models()
    fname = agent.algo + "_" + agent.env_name + '_lr' + str(
        agent.lr) + "_" + str(n_games) + "_games"
    figure_file = "plots/" + fname + ".png"
    n_steps = 0
    scores, eps_history, steps_array = [], [], []
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = agent.get_action(observation)
            new_observation, reward, done, info = env.step(action)
            score += reward
            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       new_observation, int(done))
                agent.learn()
            observation = new_observation
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)
        avg_score = np.mean(scores[-100:])
        print(
            "episode ", i + 1, "score: ", score,
            "average score %.1f best score %.1f epsilon %.2f" %
            (avg_score, best_score, agent.epsilon), " steps ", n_steps)
        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score
        eps_history.append(agent.epsilon)
    plot_learning_curve(steps_array, scores, eps_history, figure_file)
    print("End Atari games")

Example #6

Show file

File: train.py Project: roj4s/balloma_reinforcement_learning

    from agent import DDPG, DeepQAgent
    from environment import Environment

    done_comparison_data = {
        'coords_done_fail': [45, 60, 118, 180],
        'coords_done_success': [5, 16, 122, 174],
        'img_done_fail': 'data/s8_cut_try_again.png',
        'img_done_success': 'data/game_score_s8.png',
        'restart_btn_coords': [640, 1110],
        'restart_ongame': [(2764, 93), (2624, 552)],
    }

    scores = {
        'coords_diamonds_gathered': [11, 27, 25, 35],
        'digits_mask_addr': 'data/digits',
        'match_threshold': 10,
        'state_area': [28, 112, 0, 296],
        'time_importance': 0.7,
        'diamonds_importance': 0.3,
        'episode_time_limit': 60,
        'diamonds_total': 7
    }

    env = Environment(device_ref_elements_data={
        'done_comparison_data': done_comparison_data,
        'scores': scores
    })
    #agent = DDPG(env)
    agent = DeepQAgent(env)
    train(agent, env, episode_seconds_constrain=45)