def a2c_sequence(name = 'a2c_sequence', task=None, body=None): config = Config() config.num_workers = batch_size # same thing as batch size config.task_fn = lambda: task config.optimizer_fn = lambda params: torch.optim.RMSprop(params, lr=0.0007) config.network_fn = lambda state_dim, action_dim: \ to_gpu(CategoricalActorCriticNet(state_dim, action_dim, body, gpu=0, mask_gen=mask_gen)) #config.policy_fn = SamplePolicy # not used config.state_normalizer = lambda x: x config.reward_normalizer = lambda x: x config.discount = 0.99 config.use_gae = False #TODO: for now, MUST be false as our RNN network isn't com config.gae_tau = 0.97 config.entropy_weight = 0.01 config.rollout_length = 5 config.gradient_clip = 0.5 config.logger = logging.getLogger()#get_logger(file_name='deep_rl_a2c', skip=True) config.logger.info('test') config.iteration_log_interval config.max_steps = 100000 dash_name = 'DeepRL' visdom = Dashboard(dash_name) run_iterations(MyA2CAgent(config), visdom, invalid_value=invalid_value)
def td_aux_many(config: Config, **kwargs): """ :param config: :param kwargs: kwargs used to generate the experiment tag name uses for saving. :return: """ generate_tag(kwargs) kwargs.setdefault('log_level', 0) config.merge(kwargs) mkdir(os.path.join(config.data_dir, 'log')) mkdir(os.path.join(config.data_dir, 'data')) config.task_fn = lambda: Task(config.game) config.eval_env = config.task_fn() config.optimizer_fn = lambda params: torch.optim.Adam(params, lr=1e-4) # I'm just hard coding the shape of the target config.network_fn = lambda: TDAuxNet((4, 84, 84), config.action_dim, NatureConvBody(in_channels=config.history_length), aux_dict) config.random_action_prob = LinearSchedule(1.0, 0.01, 1e6) config.replay_fn = lambda: AsyncReplay(memory_size=int(5e5), batch_size=32) config.batch_size = 32 config.state_normalizer = ImageNormalizer() config.reward_normalizer = SignNormalizer() config.discount = 0.99 config.target_network_update_freq = 10000 config.exploration_steps = 50000 config.sgd_update_frequency = 4 config.gradient_clip = 5 config.history_length = 4 # config.double_q = True config.double_q = False config.max_steps = int(2e7) run_steps(TDAuxAgent(config))
from deep_rl import Config if __name__ == '__main__': config = Config() config.merge() print(config)
config.replay_fn = lambda: AsyncReplay(memory_size=int(5e5), batch_size=32) config.batch_size = 32 config.state_normalizer = ImageNormalizer() config.reward_normalizer = SignNormalizer() config.discount = 0.99 config.target_network_update_freq = 10000 config.exploration_steps = 50000 config.sgd_update_frequency = 4 config.gradient_clip = 5 config.history_length = 4 # config.double_q = True config.double_q = False config.max_steps = int(2e7) run_steps(TDAuxAgent(config)) if __name__ == "__main__": cf = Config() cf.add_argument('--game', required=True) cf.add_argument('--run', type=int, required=True) cf.add_argument('--data_dir', type=str, required=True) cf.add_argument('--save_interval', type=int, default=1000000) cf.merge() set_one_thread() select_device(0) td_aux_many(cf, game=cf.game, run=cf.run, remark=f"og")
def ppo_pixel(log_name='ppo-dmlab-image', render=False): config = Config() log_dir = get_default_log_dir(ppo_pixel.__name__) config.num_workers = 8 config.task_fn = lambda: Task( use_vision=True, use_pos=False, num_envs=config.num_workers, render=render, ) config.eval_env = Task( use_vision=True, use_pos=False, num_envs=config.num_workers, log_dir=log_dir, render=render, ) config.optimizer_fn = lambda params: torch.optim.RMSprop( params, lr=0.00025, alpha=0.99, eps=1e-5) config.network_fn = lambda: CategoricalActorCriticNet( config.state_dim, config.action_dim, NatureConvBody(in_channels=3)) config.state_normalizer = ImageNormalizer() config.reward_normalizer = SignNormalizer() config.discount = 0.99 config.use_gae = True config.gae_tau = 0.95 config.entropy_weight = 0.01 config.gradient_clip = 0.5 config.rollout_length = 128 config.optimization_epochs = 3 config.mini_batch_size = 32 * 8 config.ppo_ratio_clip = 0.1 config.log_interval = 128 * 8 config.logger = get_logger(tag=log_name) config.tag = log_name # this name must be unique. Anything with the same name will be overwritten config.max_steps = int(2e7) run_steps(PPOAgent(config))