def __init__(self, config):
        config.model_type = Config.MODEL_TYPE_CONV2D

        print(config.device)
        self.config = config
        self.env = wrap_deepmind(make_atari(config.env), frame_stack=True)
        self.num_states = self.env.observation_space.shape[-1]
        self.num_actions = self.env.action_space.n
        self.agent = Agent(config, self.num_states, self.num_actions, self.config.num_atoms)
        self.total_step = np.zeros(100)

        self.data_path = config.data_path
        if self.data_path != Config.DATA_PATH_DEFAULT:
            self.agent.load_model()
Ejemplo n.º 2
0
    def __init__(self, config):
        config.model_type = Config.MODEL_TYPE_CONV2D

        print(config.device)
        self.config = config
        self.env = wrap_deepmind(make_atari(config.env), frame_stack=True)
        self.num_states = self.env.observation_space.shape[-1]
        self.num_actions = self.env.action_space.n
        self.agent = Agent(config, self.num_states, self.num_actions,
                           self.config.num_atoms)
        self.total_step = np.zeros(100)

        self.data_path = config.data_path
        if self.data_path != Config.DATA_PATH_DEFAULT:
            self.agent.load_model()
Ejemplo n.º 3
0
def get_env():
    benchmark = gym.benchmark_spec('Atari40M')

    # Change the index to select a different game.
    task = benchmark.tasks[3]

    env_id = task.env_id

    env = gym.make(env_id)

    env.seed(0)

    expt_dir = '/tmp/hw3_vid_dir2/'
    # env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = utils.wrap_deepmind(env)
    return env
Ejemplo n.º 4
0
                log_freq=args.log_freq,
                mode=args.mode)
plotter = Plotter(save_dirs=save_dirs,
                  plot_types=[
                      'avg_scores_ep', 'avg_scores_ts', 'avg_scores_100_ep',
                      'avg_scores_100_ts', 'scores_ep', 'scores_ts',
                      'high_scores_ep', 'high_scores_ts', 'low_scores_ep',
                      'low_scores_ts', 'avg_loss_ep', 'avg_acc_ep',
                      'timesteps_ep'
                  ],
                  interval_types=['overall', 'window'],
                  plot_freq=args.plot_freq,
                  mode=args.mode)

env = make_atari(ENV_GYM)
env = wrap_deepmind(env, frame_stack=True, scale=False)

if args.mode == 'train':
    agent = DDQNLearner(
        env=env,
        save_dirs=save_dirs,
        save_freq=args.save_freq,
        gamma=args.gamma,
        batch_size=args.batch_size,
        learning_rate=args.learning_rate,
        buffer_size=args.buffer_size,
        learn_start=args.learn_start,
        target_network_update_freq=args.target_network_update_freq,
        train_freq=args.train_freq,
        tot_steps=args.total_step_lim)
else:
Ejemplo n.º 5
0
        '--freeze_layers',
        type=int,
        default=0,
        help=
        'Number of initial layers to freeze when fine-tuning | Choose from 1, 2, 3'
    )

    args = parser.parse_args()

    logs_dir = 'logs'
    if not os.path.exists(logs_dir):
        os.makedirs(logs_dir)

    args.save_dir = utils.get_save_dir(logs_dir, args.name)

    # Training
    if not torch.cuda.is_available() and args.cuda:
        print(
            '--cuda is passed but torch.cuda.is_available() returned False. Will use CPU instead.'
        )

    env = utils.wrap_deepmind(utils.make_atari(
        args.env,
        max_episode_steps=args.episode_length,
        frameskip=args.frameskip),
                              frame_stack=True,
                              stacks=args.agent_history_length)
    agent = Agent(env, args)

    agent.train(args.episodes)
Ejemplo n.º 6
0
 def _thunk():
     env = make_atari('BreakoutNoFrameskip-v4')
     env.seed(0 + rank)
     return wrap_deepmind(env)