Beispiel #1
0
    def __init__(self, config, logger=None):
        """
        Initialize Q Network and env

        Args:
            config: class with hyperparameters
            logger: logger instance from logging module
        """
        # directory for training outputs
        if not os.path.exists(config.output_path):
            os.makedirs(config.output_path)

        # store hyper params
        self.config = config
        self.logger = logger
        if logger is None:
            self.logger = get_logger(config.log_path)
        envs = []
        for env_name in config.env_names:
            env = gym.make(env_name)
            env = wrap_dqn(env)
            env = PreproWrapper(env,
                                prepro=greyscale,
                                shape=(84, 84, 1),
                                overwrite_render=config.overwrite_render)
            envs.append(env)
        self.envs = envs

        self.recon = self.config.recon

        # build model
        self.build()
Beispiel #2
0
 def record(self):
     """
     Re create an env and record a video for one episode
     """
     env = gym.make(self.config.env_names[self.index])
     env = gym.wrappers.Monitor(env,
                                self.config.record_path,
                                video_callable=lambda x: True,
                                resume=True)
     env = wrap_dqn(env)
     env = PreproWrapper(env,
                         prepro=greyscale,
                         shape=(84, 84, 1),
                         overwrite_render=self.config.overwrite_render)
     self.evaluate(env, 1)
Beispiel #3
0
import utils.utils as utils
from utils.srl_algorithms import ls_step, ls_step_dueling
import utils.wrappers as wrappers
import numpy as np
import random
import copy

if __name__ == "__main__":
    params = HYPERPARAMS['boxing']
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # conditional_update:
    # if true, test the updated weights before replacing the old ones,
    # if the new weights perform better, then replace them (bool)
    conditional_update = True
    env = gym.make(params['env_name'])
    env = wrappers.wrap_dqn(env)
    if conditional_update:
        test_env = gym.make(params['env_name'])
        test_env = wrappers.wrap_dqn(test_env)

    training_random_seed = 2019
    save_freq = 50000
    n_drl = 100000  # steps of DRL between SRL
    n_srl = params['replay_size']  # size of batch in SRL step
    num_srl_updates = 3  # number of to SRL updates to perform
    use_double_dqn = False
    use_dueling_dqn = True
    use_boosting = True
    use_ls_dqn = True
    use_constant_seed = True  # to compare performance independently of the randomness
    save_for_analysis = False  # save also the replay buffer for later analysis
You'll find the results, log and video recordings of your agent every 250k under
the corresponding file in the results folder. A good way to monitor the progress
of the training is to use Tensorboard. The starter code writes summaries of different
variables.

To launch tensorboard, open a Terminal window and run 
tensorboard --logdir=results/
Then, connect remotely to 
address-ip-of-the-server:6006 
6006 is the default port used by tensorboard.
"""
if __name__ == '__main__':
    # make env
    env = gym.make(config.env_name)
    # env = MaxAndSkipEnv(env, skip=config.skip_frame)
    env = wrap_dqn(env)
    env = PreproWrapper(env, prepro=greyscale, shape=(84, 84, 1), 
                        overwrite_render=config.overwrite_render)

    # exploration strategy
    exp_schedule = LinearExploration(env, config.eps_begin, 
            config.eps_end, config.eps_nsteps)

    # learning rate schedule
    lr_schedule  = LinearSchedule(config.lr_begin, config.lr_end,
            config.lr_nsteps)

    # train model
    model = NatureQN(env, config)
    model.run(exp_schedule, lr_schedule)
Beispiel #5
0
                     "--env",
                     default=BOXING_ENV_NAME,
                     help="Environment name to use, default=" +
                     DEFAULT_ENV_NAME)
 parser.add_argument("-r",
                     "--record",
                     help="Directory to store video recording")
 parser.add_argument("--no-visualize",
                     default=True,
                     action='store_false',
                     dest='visualize',
                     help="Disable visualization of the game play")
 args = parser.parse_args()
 use_dueling = True
 env = gym.make(args.env)
 env = wrappers.wrap_dqn(env)
 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 # env = wrappers.make_env(DEFAULT_ENV_NAME)
 if args.record:
     env = gym.wrappers.Monitor(env, args.record)
 if use_dueling:
     net = dqn_model.DuelingLSDQN(env.observation_space.shape,
                                  env.action_space.n).to(device)
 else:
     net = dqn_model.LSDQN(env.observation_space.shape,
                           env.action_space.n).to(device)
 # net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage))
 path_to_model_ckpt = './agent_ckpt/agent_ls_dqn_-boxing.pth'
 exists = os.path.isfile(path_to_model_ckpt)
 if exists:
     if not torch.cuda.is_available():