Ejemplo n.º 1
0
def regression_test(local_mode=False):
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(DiCESACTrainer, {
        "gamma":
        0.95,
        "target_network_update_freq":
        32,
        "tau":
        1.0,
        "train_batch_size":
        200,
        "rollout_fragment_length":
        50,
        "optimization": {
            "actor_learning_rate": 0.005,
            "critic_learning_rate": 0.005,
            "entropy_learning_rate": 0.0001
        },
        **get_marl_env_config("CartPole-v0",
                              num_agents,
                              normalize_actions=False)
    }, {"episode_reward_mean": 150 * num_agents},
          exp_name="DELETEME",
          local_dir=local_dir,
          test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
Ejemplo n.º 2
0
def regression_test2(local_mode=False):
    from ray import tune
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(
        DiCESACTrainer,
        {
            "soft_horizon":
            True,
            "clip_actions":
            False,
            "normalize_actions":
            False,  # <<== Handle in MARL env
            "metrics_smoothing_episodes":
            5,
            "no_done_at_end":
            True,
            "train_batch_size":
            1000,
            "rollout_fragment_length":
            50,
            constants.DELAY_UPDATE:
            tune.grid_search([True, False]),
            # constants.NOR: tune.grid_search([True, False]),

            # "optimization": {
            #     "actor_learning_rate": 0.005,
            #     "critic_learning_rate": 0.005,
            #     "entropy_learning_rate": 0.0001
            # },
            **get_marl_env_config("Pendulum-v0",
                                  num_agents,
                                  normalize_actions=True)
        },
        {
            "episode_reward_mean": -300 * num_agents,
            "timesteps_total": 13000 * num_agents
        },
        exp_name="DELETEME",
        local_dir=local_dir,
        test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
Ejemplo n.º 3
0
    elif difficulty == "easy":
        stop = int(1e6)
        config["env"] = tune.grid_search([
            "BipedalWalker-v3",
            # 'ReacherBulletEnv-v0',
            # 'PusherBulletEnv-v0',
            # 'ThrowerBulletEnv-v0',
            # 'StrikerBulletEnv-v0',
            'Walker2DBulletEnv-v0',
            'HalfCheetahBulletEnv-v0',
            'AntBulletEnv-v0',
            'HopperBulletEnv-v0',
        ])
    else:
        raise ValueError("args.set must in [hard, easy].")

    train(
        algo,
        config=config,
        stop=stop,
        exp_name=exp_name,
        num_seeds=args.num_seeds,
        test_mode=args.test,
        start_seed=args.start_seed,
        verbose=1,
        keep_checkpoints_num=10,

        # We will mainly run on CUHK cluster so we need to specify the local
        # directory to store things
        local_dir=args.local_dir)
Ejemplo n.º 4
0
                'AntBulletEnv-v0',
                'HopperBulletEnv-v0',
            ]),
            "train_batch_size":
            4000,
            "num_workers":
            10,
            "optimizer_type":
            tune.grid_search(["sgd", "adam"]),
            "num_gpus":
            0,
            "lr":
            2.5e-4,
            "episodes_per_batch":
            1,
            "num_cpus_per_worker":
            0.5
        }
        run = GaussianESTrainer
        stop = int(1e8)

    train(run,
          stop=int(args.stop),
          verbose=1,
          config=config,
          exp_name=args.exp_name,
          num_seeds=args.num_seeds,
          num_gpus=args.num_gpus)

    print("Test finished! Cost time: ", time.time() - now)
Ejemplo n.º 5
0
        dice_utils.TWO_SIDE_CLIP_LOSS: False,
        dice_utils.ONLY_TNB: True,
        dice_utils.NORMALIZE_ADVANTAGE: True,  # May be need to set false
    }
)


DiESTrainer = DiCETrainer.with_updates(
    name="DiES",
    default_config=dies_default_config,
    after_train_result=run_evolution_strategies
)

if __name__ == '__main__':
    env_name = "CartPole-v0"
    num_agents = 3
    config = {
        "num_sgd_iter": 2,
        "train_batch_size": 400,
        "update_steps": 1000,
        **get_marl_env_config(env_name, num_agents)
    }
    initialize_ray(test_mode=True, local_mode=True)
    train(
        DiESTrainer,
        config,
        exp_name="DELETE_ME_TEST",
        stop={"timesteps_total": 10000},
        test_mode=True
    )