コード例 #1
0
ファイル: play.py プロジェクト: osmanylc/dqn-implementation
import sys
sys.path.append('.')

import time

import torch
import gym

import agent, utils

agt = agent.DQNAgent()
agt.qnet.load_state_dict(torch.load('dqn_agt.pt'))
agt.qnet.eval()

obs_history = utils.ObsHistory()

env = gym.envs.make('Pong-v4')
obs = env.reset()
obs_history.reset(obs)

while True:
    env.render()
    phi = obs_history.phi
    a = agt.act(phi)
    obs, r, done, _ = env.step(a)
    obs_history.store(obs)

    time.sleep(.003)
    if done:
        obs = env.reset()
コード例 #2
0
ファイル: main.py プロジェクト: oktopus97/CartPoleDeepQ
parser.add_argument('--model')
#no training episodes
parser.add_argument('--eps')

parser.add_argument('--render')

args = parser.parse_args()


if args.tensorboard:
    writer = SummaryWriter()

    write_proc = subprocess.Popen(['tensorboard', '--logdir', '{}'.format(args.tensorboard)])

env = env.Environment(args.env)

if args.alg == 'DQN':
    agent = agent.DQNAgent(env, args.mode, args.model, writer)

try:
    if args.mode == 'train':
        agent.train(int(args.eps), args.render)
    elif args.mode == 'play':
        agent.play(int(args.eps))
except KeyboardInterrupt:
    print('PROCESS KILLED BY USER')
finally:
    env.close()
    if args.tensorboard:
        write_proc.terminate()
コード例 #3
0
        while not done:
            self.env.render()
            a = agent.act(s)
            s_, r, done, _ = self.env.step(a)
            agent.learn((s, a, s_, r, done))
            self.reward += r
            s = s_

        self.episode_count += 1
        self.reward_buffer.append(self.reward)
        average = sum(self.reward_buffer) / len(self.reward_buffer)

        print("Episode Nr. {} \nScore: {} \nAverage: {}".format(
            self.episode_count, self.reward, average))


if __name__ == "__main__":
    import gym
    import agent
    import observer
    # observer
    key = 'CartPole-v0'
    exp = Experiment(key)
    agent = agent.DQNAgent(exp.env)
    epsilon = observer.EpsilonUpdater(agent)
    agent.add_observer(epsilon)
    exp.run_experiment(agent)

    #epsilon = observer.EpsilonUpdater(agent)
    #agent.add_observer(epsilon)
コード例 #4
0
ファイル: 7_RND_DQN.py プロジェクト: qiu1234567/DRL_Pytorch
        elif str(name).startswith('model_frozen'):
            param.requires_grad = False
            param_frozen_list.append(param)
        else:
            continue

    # print(f"param_active_list : {param_active_list}")
    # print(f"param_frozen_list : {param_frozen_list}")

    # print(f"model : {list(model_.parameters())}")
    # print(f"model_a : {list(param_active_list)}")

    optimizer = optim.Adam(list(model_.parameters()) + list(param_active_list),
                           lr=config.learning_rate)
    algorithm = "_RND"
    agent = agent.DQNAgent(models, target_model_, optimizer, device, algorithm)

    step = 0
    episode = 0
    reward_list = []
    loss_list = []
    max_Q_list = []
    r_i_list = []
    loss_rl_list = []
    loss_fm_list = []

    # Reset Unity environment and set the train mode according to the environment setting (env_config)
    env_info = env.reset(train_mode=train_mode,
                         config=config.env_config)[default_brain]

    # Game loop
コード例 #5
0
def mean(values):
    return round(sum(values) / len(values),
                 2) if type(values) == list and len(values) > 0 else 0.0


if __name__ == "__main__":
    env = gym.make("CartPole-v1")
    state_size = env.observation_space.shape[0]

    model_name = input("Model name -> ")
    my_model = "models/" + model_name + "/model_trained.h5"
    epsilon = input("Epsilon -> ")

    print("Loading", my_model, "with epsilon", epsilon)
    agent = agent.DQNAgent(my_model, float(epsilon))

    episode_count = int(input("Episode count -> "))
    done = False

    max_score = None
    highest_score = 0
    scores = []

    start = time.time()
    first_start = start

    for e in range(episode_count):
        # at each episode, reset environment to starting position
        state = env.reset()
        state = np.reshape(state, [1, state_size])
コード例 #6
0
    env = gym.make("Pendulum-v0")
    state_size = env.observation_space.shape[0]

    model_name = input("Model name -> ")
    load_trained = input("Load trained (y/n)? ")
    load_trained = True if load_trained.lower() == "y" else False

    my_model_location = "models/" + model_name + "/"
    my_model = my_model_location + ("model_trained.h5"
                                    if load_trained else "model.h5")

    epsilon = float(input("Epsilon -> "))
    # if load_trained else 1.0;

    print("Loading", my_model, "with epsilon", epsilon)
    agent = agent.DQNAgent(my_model, epsilon)

    try:
        agent.memory = json.load(my_model.replace(".h5", ".json"))
    except:
        agent.memory = []

    episode_count = int(input("Episode count -> "))
    batch_size = 16

    max_score = None
    highest_score = None
    scores = []
    rewards = []

    start = time.time()
コード例 #7
0
"""This is just a sample main file to call the non-distributed
   implementation of the agent, the agent is very easy to create
   and train without any config needed."""
import sys

import agent

# Windows CUDA Issue on my Laptop
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if len(gpus) > 0:
    tf.config.experimental.set_memory_growth(gpus[0], True)

if __name__ == '__main__':
    total_steps = 1000
    if len(sys.argv) == 2:
        total_steps = int(sys.argv[1])
    a = agent.DQNAgent()
    a.train(total_steps)
    a.model.save('model.h5')
    a.target_model.save('target_model.h5')
コード例 #8
0
if __name__ == "__main__":
    env = gym.make("MountainCar-v0")
    state_size = env.observation_space.shape[0]

    model_name = input("Model name -> ")
    load_trained = input("Load trained (y/n)? ")
    load_trained = load_trained.lower() == "y"

    my_model_location = "models/" + model_name + "/"
    my_model = my_model_location + ("model_trained.h5"
                                    if load_trained else "model.h5")

    epsilon = input("Epsilon -> ")

    print("Loading", my_model, "with epsilon", epsilon)
    agent = agent.DQNAgent(my_model)

    try:
        agent.memory = json.load(my_model_trained.replace(".h5", ".json"))
    except:
        agent.memory = []

    episode_count = int(input("Episode count -> "))
    batch_size = 16

    max_score = None
    highest_score = 0
    scores = []
    rewards = []

    start = time.time()
コード例 #9
0
import agent
from environment import GymEnvironment
import tensorflow as tf

env_agent = GymEnvironment()
agent = agent.DQNAgent(environment=env_agent)

with tf.Session() as sess:
    agent.build_dqn(sess)
    sess.run(tf.global_variables_initializer())

    agent.train(episodes=50000)
コード例 #10
0
import agent
import tensorflow as tf
import argparse
from environment import GymEnvironment

env_agent = GymEnvironment(display=True)
agent = agent.DQNAgent(environment=env_agent, display=True)

with tf.Session() as sess:
    agent.build_dqn(sess)
    sess.run(tf.global_variables_initializer())
    agent.load_model()
    agent.play(10)
                        help="Boltzmann exploration")

    parser.add_argument("--render",
                        action="store_true",
                        help="Visualize training")

    args = parser.parse_args()

    config_info = {
        "config_param": args.config,
        "prefix_path": args.prefix_path,
        "path_ckpts": args.path_ckpts,
        "resume": args.resume,
    }

    # Create environment
    env = gym.make(args.env)

    # Initialize agent
    if args.boltzmann:
        agent = agent_boltzmann.DQNAgent(env, args.render, config_info)
    else:
        agent = agent.DQNAgent(env, args.render, config_info)

    # Launch training
    print(f"\nTraining on {env.unwrapped.spec.id}..\n")
    agent.train()

    # Visualize reward evolution
    agent.plot_reward()