Ejemplo n.º 1
0
def assemble_training(seed,
                      weights=None,
                      lr=cfg.LEARNING_RATE,
                      er=cfg.EPS_START):
    """
    Configure everything needed to start the training. The parameter weights is used to continue training 
    and set the weights. This function wraps the environment with all the preprocessing steps, sets the 
    type of policy and the Replay Buffer.
    """
    if weights:
        checkpoint = torch.load(weights)
        env = getWrappedEnv(seed=checkpoint["info"]["seed"])
        dqn = DuelingDQN(env, lr=lr)
        eval_net = DuelingDQN(env)

        load_checkpoint(dqn, weights, dqn.device)
        load_checkpoint(eval_net, weights, dqn.device)

        policy = eGreedyPolicyDecay(env, seed, checkpoint["info"]["er"], er,
                                    cfg.EPS_END, cfg.DECAY_STEPS, dqn)
        buffer = ReplayBuffer(seed=seed)
        agent = DDQNAgent(dqn, eval_net, policy, buffer)
        with open(checkpoint["info"]["buffer"], "rb") as f:
            preloaded_buffer = pickle.load(f)
        agent.buffer = preloaded_buffer
        print(
            "Resume training at Episode",
            checkpoint["info"]["episodes"],
            "after",
            checkpoint["info"]["frames"],
            "frames.\n",
            "Learning rate is",
            checkpoint["info"]["lr"],
            "\nExploration rate is",
            checkpoint["info"]["er"],
        )
        return env, agent, checkpoint["info"]["episodes"], checkpoint["info"][
            "frames"]

    env = getWrappedEnv(seed=seed)
    dqn = DuelingDQN(env, lr=lr)
    eval_net = DuelingDQN(env)

    policy = eGreedyPolicyDecay(env, seed, er, er, cfg.EPS_END,
                                cfg.DECAY_STEPS, dqn)
    buffer = ReplayBuffer(seed=seed)
    agent = DDQNAgent(dqn, eval_net, policy, buffer)
    return env, agent, 0, 0
Ejemplo n.º 2
0
def main(_):

    if check_path_validity() == -1:
        exit(1)

    FLAGS.logdir = FLAGS.logdir if FLAGS.logdir.endswith(
        '/') else FLAGS.logdir + '/'
    # Make a new directory to store checkpoints and tensorboard summaries,
    # this is only necessary if were are going to train a new model.
    if FLAGS.training:
        os.makedirs(FLAGS.logdir)

    # Setup tensorflow and tensorboard writers
    tf.reset_default_graph()
    session = tf.Session()
    writer = tf.summary.FileWriter(FLAGS.logdir,
                                   session.graph) if FLAGS.visualize else None
    summary_ops, summary_placeholders = setup_summary()

    # Initialize key objects: environment, agent and preprocessor
    env = Environment("127.0.0.1", 9090)
    agent = DDQNAgent(session, num_actions, width, height, FLAGS.logdir,
                      writer)
    preprocessor = Preprocessor(width, height)

    if FLAGS.training:
        summarize_func = partial(summarize, session, writer, summary_ops,
                                 summary_placeholders)
        train(agent, env, preprocessor, summarize_func)
    else:
        play(agent, env, preprocessor)
Ejemplo n.º 3
0
def play(**kwargs):
    env = BananaEnvironment(file_name=kwargs['env_file'],
                            num_stacked_frames=kwargs['num_stacked_frames'])
    agent_name = kwargs['agent_fname']
    is_per = 'PER' in agent_name
    if 'ddqn' in agent_name:
        agent = DDQNAgentPER.load(agent_name) if is_per else DDQNAgent.load(
            agent_name)
    elif 'dqn' in agent_name:
        agent = DQNAgentPER.load(agent_name) if is_per else DQNAgent.load(
            agent_name)
    else:
        raise KeyError('Unknown agent type')

    for i in range(kwargs['num_plays']):
        done = False
        score = 0
        state = env.reset(train_mode=False)
        while not done:
            action = agent.act(state, eps=0.)
            state, reward, done = env.step(action)  # roll out transition
            score += reward
            print("\r play #{}, reward: {} | score: {}".format(
                i + 1, reward, score),
                  end='')
        print()
Ejemplo n.º 4
0
def main():

    print("Creating model...")
    model = create_model()
    model.summary()

    print("Creating environment...")
    environment = gym.make("CartPole-v0")
    environment._max_episode_steps = 500

    print("Creating agent...")
    if agent_type == "dqn":
        agent = DQNAgent(name="cartpole-dqn",
                         model=model,
                         environment=environment,
                         observation_frames=1,
                         observation_transformation=observation_transformation,
                         reward_transformation=reward_transformation,
                         gamma=0.95,
                         final_epsilon=0.01,
                         initial_epsilon=1.0,
                         number_of_iterations=1000000,
                         replay_memory_size=2000,
                         minibatch_size=32)
    elif agent_type == "ddqn":
        agent = DDQNAgent(
            name="cartpole-ddqn",
            model=model,
            environment=environment,
            observation_frames=1,
            observation_transformation=observation_transformation,
            reward_transformation=reward_transformation,
            gamma=0.95,
            final_epsilon=0.01,
            initial_epsilon=1.0,
            number_of_iterations=1000000,
            replay_memory_size=2000,
            minibatch_size=32,
            model_copy_interval=100)
    agent.enable_rewards_tracking(rewards_running_means_length=10000)
    agent.enable_episodes_tracking(episodes_running_means_length=10000)
    agent.enable_maxq_tracking(maxq_running_means_length=10000)
    agent.enable_model_saving(model_save_frequency=100000)
    agent.enable_tensorboard_for_tracking()

    print("Training ...")
    agent.fit(verbose=True, headless="render" not in sys.argv)
Ejemplo n.º 5
0
def main():

    print("Creating environment...")
    environment = gym_tetris.make('Tetris-v0')

    print("Creating model...")
    model = modelutils.create_model(number_of_actions)
    model.summary()

    print("Creating agent...")
    if agent_type == "dqn":
        agent = DQNAgent(
            name="tetris-dqn",
            environment=environment,
            model=model,
            observation_transformation=utils.resize_and_bgr2gray,
            observation_frames=4,
            number_of_iterations=1000000,
            gamma=0.95,
            final_epsilon=0.01,
            initial_epsilon=1.0,
            replay_memory_size=2000,
            minibatch_size=32
        )
    elif agent_type == "ddqn":
        agent = DDQNAgent(
            name="tetris-ddqn",
            environment=environment,
            model=model,
            observation_transformation=utils.resize_and_bgr2gray,
            observation_frames=4,
            number_of_iterations=1000000,
            gamma=0.95,
            final_epsilon=0.01,
            initial_epsilon=1.0,
            replay_memory_size=2000,
            minibatch_size=32,
            model_copy_interval=100
        )
    agent.enable_rewards_tracking(rewards_running_means_length=10000)
    agent.enable_episodes_tracking(episodes_running_means_length=100)
    agent.enable_maxq_tracking(maxq_running_means_length=10000)
    agent.enable_model_saving(model_save_frequency=10000)
    agent.enable_plots_saving(plots_save_frequency=10000)

    print("Training ...")
    agent.fit(verbose=True, headless="headless" in sys.argv, render_states=True)
Ejemplo n.º 6
0
def main():

    print("Creating model...")
    model = modelutils.create_model(number_of_actions=4)
    model.summary()

    print("Creating agent...")
    if agent_type == "dqn":
        agent = DQNAgent(name="doom-dqn",
                         model=model,
                         number_of_actions=4,
                         gamma=0.99,
                         final_epsilon=0.0001,
                         initial_epsilon=0.1,
                         number_of_iterations=200000,
                         replay_memory_size=10000,
                         minibatch_size=32)
    elif agent_type == "ddqn":
        agent = DDQNAgent(name="doom-ddqn",
                          model=model,
                          number_of_actions=4,
                          gamma=0.99,
                          final_epsilon=0.0001,
                          initial_epsilon=0.1,
                          number_of_iterations=200000,
                          replay_memory_size=10000,
                          minibatch_size=32,
                          model_copy_interval=100)
    agent.enable_rewards_tracking(rewards_running_means_length=1000)
    agent.enable_episodes_tracking(episodes_running_means_length=1000)
    agent.enable_maxq_tracking(maxq_running_means_length=1000)
    agent.enable_model_saving(model_save_frequency=10000)
    agent.enable_plots_saving(plots_save_frequency=10000)

    print("Creating game...")
    #environment = Environment(headless=("headless" in sys.argv))
    # Create an instance of the Doom game.
    environment = DoomGame()
    environment.load_config("scenarios/basic.cfg")
    environment.set_screen_format(ScreenFormat.GRAY8)
    environment.set_window_visible("headless" not in sys.argv)
    environment.init()

    print("Training ...")
    train(agent, environment, verbose="verbose" in sys.argv)
Ejemplo n.º 7
0
def main():

    print("Creating model...")
    model = modelutils.create_model(number_of_actions)
    model.summary()

    print("Creating agent...")
    if agent_type == "dqn":
        agent = DQNAgent(name="supermario-dqn",
                         model=model,
                         number_of_actions=number_of_actions,
                         gamma=0.95,
                         final_epsilon=0.01,
                         initial_epsilon=1.0,
                         number_of_iterations=1000000,
                         replay_memory_size=2000,
                         minibatch_size=32)
    elif agent_type == "ddqn":
        agent = DDQNAgent(name="supermario-ddqn",
                          model=model,
                          number_of_actions=number_of_actions,
                          gamma=0.95,
                          final_epsilon=0.01,
                          initial_epsilon=1.0,
                          number_of_iterations=1000000,
                          replay_memory_size=2000,
                          minibatch_size=32,
                          model_copy_interval=100)
    agent.enable_rewards_tracking(rewards_running_means_length=10000)
    agent.enable_episodes_tracking(episodes_running_means_length=100)
    agent.enable_maxq_tracking(maxq_running_means_length=10000)
    agent.enable_model_saving(model_save_frequency=10000)
    agent.enable_plots_saving(plots_save_frequency=10000)

    print("Creating game...")
    environment = gym_super_mario_bros.make("SuperMarioBros-v0")
    environment = BinarySpaceToDiscreteSpaceEnv(environment, actions)

    print("Training ...")
    train(agent,
          environment,
          verbose="verbose" in sys.argv,
          headless="headless" in sys.argv)
Ejemplo n.º 8
0
    algo='DDQNAgent'
    render_game = True
    load_checkpoint = False
    train_model = True
    n_games = 500
    gamma = 0.99
    epsilon = 1.0
    lr = 1e-4
    eps_min = 1e-5
    eps_dec = 1e-5
    replace = 1000
    mem_size = 20000
    batch_size = 32

    agent = DDQNAgent(gamma=gamma, epsilon=epsilon, lr=lr, input_dims=input_dims,
                     n_actions=n_actions, mem_size=mem_size, eps_min=eps_min,
                     batch_size=batch_size, replace=replace, eps_dec=eps_dec, chkpt_dir=chkpt_dir, algo=algo,
                     env_name=env_name)

    if load_checkpoint:
        agent.load_models()
        agent.epsilon = eps_min

    fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) + '_' \
            '_' + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'
    scores_file = fname + '_scores.npy'

    n_steps = 0
    scores, eps_history, steps_array = [], [], []

    for i in range(n_games):
Ejemplo n.º 9
0
def train(**kwargs):
    kwargs['worker_id'] = 0
    if kwargs['env_type'] == 'visual':
        env = VisualBananaEnvironment(
            file_name=kwargs['env_file'],
            num_stacked_frames=kwargs['num_stacked_frames'],
            worker_id=kwargs['worker_id'])
    elif kwargs['env_type'] == 'simple':
        env = BananaEnvironment(file_name=kwargs['env_file'],
                                worker_id=kwargs['worker_id'])
    else:
        raise KeyError('unknown env type')
    state_dim = env.get_state_dim()
    action_dim = env.get_action_dim()

    kwargs['device'] = "cuda:0" if torch.cuda.is_available(
    ) and kwargs['use_gpu'] else "cpu"
    torch.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    random.seed(0)
    np.random.seed(0)
    if kwargs['env_type'] == 'visual':
        net = ConvQNetwork(state_dim, action_dim).to(kwargs['device'])
        target_net = ConvQNetwork(state_dim, action_dim).to(kwargs['device'])
    elif kwargs['env_type'] == 'simple':
        net = MlpQNetwork(state_dim, action_dim).to(kwargs['device'])
        target_net = MlpQNetwork(state_dim, action_dim).to(kwargs['device'])
    else:
        raise KeyError('unknown env type')

    kwargs['action_dim'] = action_dim

    if kwargs['agent_type'] == 'ddqn':
        agent = DDQNAgentPER(
            net, target_net, **
            kwargs) if kwargs['use_prioritized_buffer'] else DDQNAgent(
                net, target_net, **kwargs)
    elif kwargs['agent_type'] == 'dqn':
        agent = DQNAgentPER(
            net, target_net, **
            kwargs) if kwargs['use_prioritized_buffer'] else DQNAgent(
                net, target_net, **kwargs)
    else:
        raise KeyError('Unknown agent type')

    dqn = DQN(env=env, agent=agent, **kwargs)
    scores, losses = dqn.train(kwargs['num_episodes'])

    # save agent
    dt = str(datetime.datetime.now().strftime("%m_%d_%Y_%I_%M_%p"))
    per = 'PER' if kwargs['use_prioritized_buffer'] else ''
    model_fname = kwargs[
        'model_dir'] + '/' + kwargs['env_type'] + '/{}_agent_{}_{}.pt'.format(
            kwargs['agent_type'], per, dt)
    agent.save(model_fname)

    # save scores
    scores_fname = kwargs['reports_dir'] + '/' + kwargs[
        'env_type'] + '/{}_agent_{}_{}'.format(kwargs['agent_type'], per, dt)
    np.save(scores_fname, np.array(scores))

    # save scores
    losses_fname = kwargs['reports_dir'] + '/' + kwargs[
        'env_type'] + '/{}_agent_{}_loss_{}'.format(kwargs['agent_type'], per,
                                                    dt)
    np.save(scores_fname, np.array(losses_fname))

    env.close()
    pass
Ejemplo n.º 10
0
import numpy as np

from agent import DDQNAgent, DDQNArgs
from trainer import DistributedTrainer
from numeric_env import MultiEnv

import torch
torch.set_num_threads(1)

env = MultiEnv(2, 2)
action_space = [(-5 + x * 2, -5 + y * 2) for x in range(6) for y in range(6)]

args = DDQNArgs(state_dim=env.STATE_DIM + 1,
                discrete_action_space=action_space)

agents = [DDQNAgent(args) for _ in range(2)]
trainer = DistributedTrainer(agents,
                             env,
                             parameter_share=True,
                             state_transformer=np.append,
                             log_dir='../logs/ddqn_d_ps')
trainer.train(1000000)
    kwargs['device'] = "cuda:0" if torch.cuda.is_available() and kwargs['use_gpu'] else "cpu"

    torch.manual_seed(0)
    random.seed(0)


    if kwargs['env_type'] == 'visual':
        net = ConvQNetwork(state_dim, action_dim).to(kwargs['device'])
        target_net = ConvQNetwork(state_dim, action_dim).to(kwargs['device'])
    elif kwargs['env_type'] == 'simple':
        net = MlpQNetwork(state_dim, action_dim).to(kwargs['device'])
        target_net = MlpQNetwork(state_dim, action_dim).to(kwargs['device'])
    else:
        raise KeyError('unknown env type')

    kwargs['action_dim'] = action_dim

    if kwargs['agent_type'] == 'ddqn':
        agent = DDQNAgentPER(net, target_net, **kwargs) if kwargs['use_prioritized_buffer'] else DDQNAgent(net,
                                                                                                           target_net,
                                                                                                           **kwargs)
    elif kwargs['agent_type'] == 'dqn':
        agent = DQNAgentPER(net, target_net, **kwargs) if kwargs['use_prioritized_buffer'] else DQNAgent(net,
                                                                                                         target_net,
                                                                                                         **kwargs)
    else:
        raise KeyError('Unknown agent type')

    train(agent, env)
from agent import DDQNAgent, DDQNArgs
from trainer import CentralizedTrainer
from numeric_env import MultiEnv

import torch
torch.set_num_threads(1)

env = MultiEnv(2, 2)
action_space = [(-5 + x * 2, -5 + y * 2, -5 + a * 2, -5 + b * 2)
                for x in range(6) for y in range(6) for a in range(6)
                for b in range(6)]

args = DDQNArgs(state_dim=env.STATE_DIM, discrete_action_space=action_space)

agent = DDQNAgent(args)
trainer = CentralizedTrainer(agent, env, log_dir='../logs/ddqn_c')
trainer.train(1000000)
Ejemplo n.º 13
0
        print(full_data[i].head(5))
        print("(rows, columns):", full_data[i].shape)
        full_train_data.append(full_data[i].iloc[:data_split, :])

# Training phase

    print(
        "\n===========================  Training Mode  ======================================"
    )

    env = TradingEnv(train_data, args.initial_invest)
    state_size = env.observation_space.shape
    print("state size", state_size)
    action_size = env.action_space.n
    print("action size", action_size)
    trade_agent = DDQNAgent(state_size, action_size)
    scaler = get_scaler(env)

    labels = ['episode', 'total_reward']
    results = {x: [] for x in labels}
    acts = []
    portfolio_value = []

    train_results = {}

    for e in range(args.episode):
        state = env.reset()
        score = 0
        total_profit = 0
        state = scaler.transform([state])