def get_conf(): lr = 3e-5 return { **get_base_ppo_conf(num_workers=10), 'lr': lr, 'env_config': ENV_CONF_COMMUNISM, }
def get_conf(): lr = 1e-4 return { **get_base_ppo_conf(num_workers=10), "lr_schedule": [ [0, lr], [10_000_000, lr], [15_000_000, 0], ], }
def get_conf(): lr = 1e-4 return { **get_base_ppo_conf(num_workers=10), 'lr_schedule': [ [0, lr], [10_000_000, lr], [15_000_000, 0], ], 'framework': 'torch' }
def get_conf(): return { **get_base_ppo_conf(num_workers=10), "sgd_minibatch_size": 3000, # 60 * 200 * 4 / 3000 = 16 steps of (B=60, L=50, dim) "lr": 3e-4, "multiagent": { "policies_to_train": ["learned"], "policies": { "learned": (None, OBS_SPACE_AGENT, ACT_SPACE_AGENT, { "model": { "custom_model": "my_model", 'max_seq_len': 50, }, }), }, "policy_mapping_fn": lambda x: 'learned', }, }
def get_conf(): return { **get_base_ppo_conf(num_workers=10), 'vf_loss_coeff': 0.1, 'env_config': ENV_CONF_DYSTOPIA, }