Beispiel #1
0
 def test_a2c_exec_impl(ray_start_regular):
     config = {"min_iter_time_s": 0}
     for _ in framework_iterator(config):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         assert isinstance(trainer.train(), dict)
         check_compute_single_action(trainer)
         trainer.stop()
Beispiel #2
0
def game_eval():
    env = PomFFA()
    obs = env.reset()

    model_path = "/home/subill/ray_results/A2C_pom_2020-03-04_16-49-377fgvw_gr/checkpoint_601/checkpoint-601"

    config = a3c.DEFAULT_CONFIG.copy()
    config["num_gpus"] = 1
    config["num_workers"] = 1
    config["eager"] = False
    config["use_pytorch"] = True
    config["env_config"] = {"is_training": False}
    config["model"] = model_config

    trainer = a3c.A2CTrainer(env="pom", config=config)
    trainer.restore(model_path)
    for i in range(500):
        env.render()
        actions = trainer.compute_action(obs)
        print(actions)
        obs, reward, done, _ = env.step(actions)
        if done:
            break
        time.sleep(0.5)

    env.render()
    time.sleep(10)
Beispiel #3
0
 def test_a2c_exec_impl(self):
     config = a3c.A2CConfig().reporting(min_time_s_per_reporting=0)
     for _ in framework_iterator(config):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         results = trainer.train()
         check_train_results(results)
         print(results)
         check_compute_single_action(trainer)
         trainer.stop()
Beispiel #4
0
 def test_a2c_exec_impl_microbatch(ray_start_regular):
     config = {
         "min_iter_time_s": 0,
         "microbatch_size": 10,
     }
     for _ in framework_iterator(config, ("tf", "torch")):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         assert isinstance(trainer.train(), dict)
         check_compute_action(trainer)
Beispiel #5
0
 def test_a2c_exec_impl(ray_start_regular):
     config = {"min_time_s_per_reporting": 0}
     for _ in framework_iterator(config):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         results = trainer.train()
         check_train_results(results)
         print(results)
         check_compute_single_action(trainer)
         trainer.stop()
Beispiel #6
0
 def test_a2c_exec_impl_microbatch(ray_start_regular):
     config = {
         "min_iter_time_s": 0,
         "microbatch_size": 10,
     }
     for _ in framework_iterator(config):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         results = trainer.train()
         check_train_results(results)
         print(results)
         check_compute_single_action(trainer)
         trainer.stop()
Beispiel #7
0
    def test_a2c_compilation(self):
        """Test whether an A2CTrainer can be built with both frameworks."""
        config = a3c.a2c.A2C_DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["num_envs_per_worker"] = 2

        num_iterations = 1

        # Test against all frameworks.
        for _ in framework_iterator(config):
            for env in ["PongDeterministic-v0"]:
                trainer = a3c.A2CTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    print(results)
                check_compute_single_action(trainer)
                trainer.stop()
Beispiel #8
0
def game_train():
    config = a3c.DEFAULT_CONFIG.copy()
    config["num_gpus"] = 1
    config["num_workers"] = 6
    config["eager"] = False
    config["use_pytorch"] = True
    config["model"] = model_config
    print(config)
    trainer = a3c.A2CTrainer(env="pom", config=config)

    # Can optionally call trainer.restore(path) to load a checkpoint.

    for i in range(10000):
        result = trainer.train()
        print(pretty_print(result))

        if i % 200 == 0:
            checkpoint = trainer.save()
            print("checkpoint saved at", checkpoint)
Beispiel #9
0
def get_rl_agent(agent_name, config, env_to_agent):
    if agent_name == A2C:
        import ray.rllib.agents.a3c as a2c
        agent = a2c.A2CTrainer(config=config, env=env_to_agent)
    elif agent_name == A3C:
        import ray.rllib.agents.a3c as a3c
        agent = a3c.A3CTrainer(config=config, env=env_to_agent)
    elif agent_name == BC:
        import ray.rllib.agents.marwil as bc
        agent = bc.BCTrainer(config=config, env=env_to_agent)
    elif agent_name == DQN:
        import ray.rllib.agents.dqn as dqn
        agent = dqn.DQNTrainer(config=config, env=env_to_agent)
    elif agent_name == APEX_DQN:
        import ray.rllib.agents.dqn as dqn
        agent = dqn.ApexTrainer(config=config, env=env_to_agent)
    elif agent_name == IMPALA:
        import ray.rllib.agents.impala as impala
        agent = impala.ImpalaTrainer(config=config, env=env_to_agent)
    elif agent_name == MARWIL:
        import ray.rllib.agents.marwil as marwil
        agent = marwil.MARWILTrainer(config=config, env=env_to_agent)
    elif agent_name == PG:
        import ray.rllib.agents.pg as pg
        agent = pg.PGTrainer(config=config, env=env_to_agent)
    elif agent_name == PPO:
        import ray.rllib.agents.ppo as ppo
        agent = ppo.PPOTrainer(config=config, env=env_to_agent)
    elif agent_name == APPO:
        import ray.rllib.agents.ppo as ppo
        agent = ppo.APPOTrainer(config=config, env=env_to_agent)
    elif agent_name == SAC:
        import ray.rllib.agents.sac as sac
        agent = sac.SACTrainer(config=config, env=env_to_agent)
    elif agent_name == LIN_UCB:
        import ray.rllib.contrib.bandits.agents.lin_ucb as lin_ucb
        agent = lin_ucb.LinUCBTrainer(config=config, env=env_to_agent)
    elif agent_name == LIN_TS:
        import ray.rllib.contrib.bandits.agents.lin_ts as lin_ts
        agent = lin_ts.LinTSTrainer(config=config, env=env_to_agent)
    else:
        raise Exception("Not valid agent name")
    return agent
def get_rllib_agent(agent_name, env_name, env, env_to_agent):
    config = get_config(env_name, env, 1) if is_rllib_agent(agent_name) else {}
    if agent_name == RLLIB_A2C:
        import ray.rllib.agents.a3c as a2c
        agent = a2c.A2CTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_A3C:
        import ray.rllib.agents.a3c as a3c
        agent = a3c.A3CTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_BC:
        import ray.rllib.agents.marwil as bc
        agent = bc.BCTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_DQN:
        import ray.rllib.agents.dqn as dqn
        agent = dqn.DQNTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_APEX_DQN:
        import ray.rllib.agents.dqn as dqn
        agent = dqn.ApexTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_IMPALA:
        import ray.rllib.agents.impala as impala
        agent = impala.ImpalaTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_MARWIL:
        import ray.rllib.agents.marwil as marwil
        agent = marwil.MARWILTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_PG:
        import ray.rllib.agents.pg as pg
        agent = pg.PGTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_PPO:
        import ray.rllib.agents.ppo as ppo
        agent = ppo.PPOTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_APPO:
        import ray.rllib.agents.ppo as ppo
        agent = ppo.APPOTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_SAC:
        import ray.rllib.agents.sac as sac
        agent = sac.SACTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_LIN_UCB:
        import ray.rllib.contrib.bandits.agents.lin_ucb as lin_ucb
        agent = lin_ucb.LinUCBTrainer(config=config, env=env_to_agent)
    elif agent_name == RLLIB_LIN_TS:
        import ray.rllib.contrib.bandits.agents.lin_ts as lin_ts
        agent = lin_ts.LinTSTrainer(config=config, env=env_to_agent)
    return agent
Beispiel #11
0
    def test_a2c_fake_multi_gpu_learning(self):
        """Test whether A2CTrainer can learn CartPole w/ faked multi-GPU."""
        config = copy.deepcopy(a3c.a2c.A2C_DEFAULT_CONFIG)

        # Fake GPU setup.
        config["num_gpus"] = 2
        config["_fake_gpus"] = True

        config["framework"] = "tf"
        # Mimic tuned_example for A2C CartPole.
        config["lr"] = 0.001

        trainer = a3c.A2CTrainer(config=config, env="CartPole-v0")
        num_iterations = 100
        learnt = False
        for i in range(num_iterations):
            results = trainer.train()
            print("reward={}".format(results["episode_reward_mean"]))
            if results["episode_reward_mean"] > 100.0:
                learnt = True
                break
        assert learnt, "A2C multi-GPU (with fake-GPUs) did not learn CartPole!"
        trainer.stop()
Beispiel #12
0
def create_agent(args):
    """Create XXX agent.

    Args:
        args (argparse.Namespace): argparse arguments.

    Returns:
        agent (ray.rllib.agents.trainer_template.XXX): XXX agent.
    """
    # A3C
    # ------------------------------------------

    if args.agent == "A2C":
        # Custom configuration
        config = a3c.DEFAULT_CONFIG.copy()
        config["framework"] = "torch"
        config["lr"] = 5e-4
        config["num_gpus"] = 1
        config["num_workers"] = 1
        config["train_batch_size"] = 128
        config["use_critic"] = True
        config["use_gae"] = False

        # Custom model
        config["model"]["fcnet_activation"] = "tanh"
        config["model"]["fcnet_hiddens"] = [64, 64, 64]

        # Agent creation
        agent = a3c.A2CTrainer(env=GymEnv, config=config)

    # DQN
    # ------------------------------------------

    elif args.agent == "DQN":
        # Custom configuration
        config = dqn.DEFAULT_CONFIG.copy()
        config["double_q"] = False
        config["dueling"] = False
        config["framework"] = "torch"
        config["lr"] = 5e-3
        config["num_gpus"] = 1
        config["num_workers"] = 1
        config["train_batch_size"] = 128

        # Custom model
        config["model"]["fcnet_activation"] = "tanh"
        config["model"]["fcnet_hiddens"] = [128, 128, 128]

        # Agent creation
        agent = dqn.DQNTrainer(env=GymEnv, config=config)

    # To optionally load a checkpoint
    if args.checkpoint:
        agent.restore(args.checkpoint)

    # Print model
    if args.verbose > 0:
        model = agent.get_policy().model
        if config["framework"] == "tf":
            print(type(model.base_model.summary()))
        elif config["framework"] == "torch":
            print(model)

    return agent
Beispiel #13
0
# config["sample_batch_size"]=256
config["gamma"] = 0.995
# config["entropy_coeff"] = 0.01

config['env_config'] = {
    # "num_rigid": 0,
    "reward": {
        "version": "v3"
    },
}
config["model"] = {
    "custom_model": "my_model",
    "custom_options": {},  # extra options to pass to your model
}

trainer = a3c.A2CTrainer(env="my_env", config=config)

# trainer = ppo.PPOTrainer(env="my_env", config=config)
# trainer = dqn.DQNTrainer(env="my_env", config=config)
policy = trainer.get_policy()
print(policy.model.base_model.summary())
model_path = "/home/charlieqiu818_gmail_com/ray_results/A2C_my_env_2020-04-12_21-36-42vs43fldq/checkpoint_7005/checkpoint-7005"
trainer.restore(model_path)

for i in range(10000):
    result = trainer.train()
    print(pretty_print(result))

    if i % 50 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)
    ray.shutdown(True)
    ray.init(num_gpus=1, temp_dir='/home/baitianxiang/ray_tmp')

    config = {
        'env_config': env_config,
        'num_workers': 2,
        'log_level': 'ERROR',
        'framework': 'tf',
        'model': model_config,
    }

    register_env('DirectCnnEnv-v0',
                 lambda env_config: DirectCnnEnv(env_config))

    a2c_trainer = a3c.A2CTrainer(config=config, env='DirectCnnEnv-v0')

    policy = a2c_trainer.get_policy()
    cur_model = policy.model.base_model
    cur_model.summary()

    for i in tqdm(range(1000)):
        result = a2c_trainer.train()
        print(f"{result['episode_reward_max']:.4f}  |  "
              f"{result['episode_reward_mean']:.4f}  |  "
              f"{result['episode_reward_min']:.4f}")

        if i % 10 == 0:
            checkpoint = a2c_trainer.save()

            print("checkpoint saved at", checkpoint)