Ejemplo n.º 1
0
def a2c_sequence(name = 'a2c_sequence', task=None, body=None):
    config = Config()
    config.num_workers = batch_size # same thing as batch size
    config.task_fn = lambda: task
    config.optimizer_fn = lambda params: torch.optim.RMSprop(params, lr=0.0007)
    config.network_fn = lambda state_dim, action_dim: \
                            to_gpu(CategoricalActorCriticNet(state_dim,
                                                      action_dim,
                                                      body,
                                                      gpu=0,
                                                      mask_gen=mask_gen))
    #config.policy_fn = SamplePolicy # not used
    config.state_normalizer = lambda x: x
    config.reward_normalizer = lambda x: x
    config.discount = 0.99
    config.use_gae = False #TODO: for now, MUST be false as our RNN network isn't com
    config.gae_tau = 0.97
    config.entropy_weight = 0.01
    config.rollout_length = 5
    config.gradient_clip = 0.5
    config.logger = logging.getLogger()#get_logger(file_name='deep_rl_a2c', skip=True)
    config.logger.info('test')
    config.iteration_log_interval
    config.max_steps = 100000
    dash_name = 'DeepRL'
    visdom = Dashboard(dash_name)
    run_iterations(MyA2CAgent(config), visdom, invalid_value=invalid_value)
Ejemplo n.º 2
0
def td_aux_many(config: Config, **kwargs):
    """

    :param config:
    :param kwargs: kwargs used to generate the experiment tag name uses for saving.
    :return:
    """
    generate_tag(kwargs)
    kwargs.setdefault('log_level', 0)
    config.merge(kwargs)

    mkdir(os.path.join(config.data_dir, 'log'))
    mkdir(os.path.join(config.data_dir, 'data'))

    config.task_fn = lambda: Task(config.game)
    config.eval_env = config.task_fn()
    config.optimizer_fn = lambda params: torch.optim.Adam(params, lr=1e-4)

    # I'm just hard coding the shape of the target
    config.network_fn = lambda: TDAuxNet((4, 84, 84), config.action_dim,
                                         NatureConvBody(in_channels=config.history_length), aux_dict)
    config.random_action_prob = LinearSchedule(1.0, 0.01, 1e6)

    config.replay_fn = lambda: AsyncReplay(memory_size=int(5e5), batch_size=32)

    config.batch_size = 32
    config.state_normalizer = ImageNormalizer()
    config.reward_normalizer = SignNormalizer()
    config.discount = 0.99
    config.target_network_update_freq = 10000
    config.exploration_steps = 50000
    config.sgd_update_frequency = 4
    config.gradient_clip = 5
    config.history_length = 4
    # config.double_q = True
    config.double_q = False
    config.max_steps = int(2e7)
    run_steps(TDAuxAgent(config))
Ejemplo n.º 3
0
from deep_rl import Config

if __name__ == '__main__':
    config = Config()
    config.merge()

    print(config)
Ejemplo n.º 4
0
    config.replay_fn = lambda: AsyncReplay(memory_size=int(5e5), batch_size=32)

    config.batch_size = 32
    config.state_normalizer = ImageNormalizer()
    config.reward_normalizer = SignNormalizer()
    config.discount = 0.99
    config.target_network_update_freq = 10000
    config.exploration_steps = 50000
    config.sgd_update_frequency = 4
    config.gradient_clip = 5
    config.history_length = 4
    # config.double_q = True
    config.double_q = False
    config.max_steps = int(2e7)
    run_steps(TDAuxAgent(config))


if __name__ == "__main__":
    cf = Config()
    cf.add_argument('--game', required=True)
    cf.add_argument('--run', type=int, required=True)
    cf.add_argument('--data_dir', type=str, required=True)
    cf.add_argument('--save_interval', type=int, default=1000000)
    cf.merge()

    set_one_thread()
    select_device(0)

    td_aux_many(cf, game=cf.game, run=cf.run, remark=f"og")
Ejemplo n.º 5
0
def ppo_pixel(log_name='ppo-dmlab-image', render=False):

    config = Config()
    log_dir = get_default_log_dir(ppo_pixel.__name__)

    config.num_workers = 8

    config.task_fn = lambda: Task(
        use_vision=True,
        use_pos=False,
        num_envs=config.num_workers,
        render=render,
    )
    config.eval_env = Task(
        use_vision=True,
        use_pos=False,
        num_envs=config.num_workers,
        log_dir=log_dir,
        render=render,
    )

    config.optimizer_fn = lambda params: torch.optim.RMSprop(
        params, lr=0.00025, alpha=0.99, eps=1e-5)
    config.network_fn = lambda: CategoricalActorCriticNet(
        config.state_dim, config.action_dim, NatureConvBody(in_channels=3))
    config.state_normalizer = ImageNormalizer()
    config.reward_normalizer = SignNormalizer()
    config.discount = 0.99
    config.use_gae = True
    config.gae_tau = 0.95
    config.entropy_weight = 0.01
    config.gradient_clip = 0.5
    config.rollout_length = 128
    config.optimization_epochs = 3
    config.mini_batch_size = 32 * 8
    config.ppo_ratio_clip = 0.1
    config.log_interval = 128 * 8
    config.logger = get_logger(tag=log_name)
    config.tag = log_name  # this name must be unique. Anything with the same name will be overwritten
    config.max_steps = int(2e7)
    run_steps(PPOAgent(config))