Ejemplo n.º 1
0
def test_es():
    ray.init()

    config = {
        'policy': DefaultPolicy,
        'num_workers': 2,
        'episodes_per_batch': 1,
        'train_batch_size': 1,
        'noise_size': 100000000,
        'env_config': {
            'max_num_steps': 100,
        },
        'model': {
            'sparse': False,
            's_dim': 128,
        }
    }
    register_env('my_env', env_creator)
    trainer = ESTrainer(config, 'my_env')

    res = dict()
    for i in range(5):
        res = trainer.train()

    assert res['training_iteration'] == 5
    assert res['timesteps_total'] == 2000
    assert res['info']['update_ratio'] > 0
Ejemplo n.º 2
0
def register_all_environments():
    """Register all custom environments in Tune."""
    from ray.tune import register_env
    from raylab.envs.registry import ENVS

    for name, env_creator in ENVS.items():
        register_env(name, env_creator)
Ejemplo n.º 3
0
    def test_avail_actions_qmix(self):
        grouping = {
            "group_1": ["agent_1", "agent_2"],
        }
        obs_space = Tuple([
            AvailActionsTestEnv.observation_space,
            AvailActionsTestEnv.observation_space,
        ])
        act_space = Tuple([
            AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space
        ])
        register_env(
            "action_mask_test",
            lambda config: AvailActionsTestEnv(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space),
        )

        config = (QMixConfig().framework(framework="torch").environment(
            env="action_mask_test",
            env_config={
                "avail_actions": [3, 4, 8]
            },
        ).rollouts(num_envs_per_worker=5))  # Test with vectorization on.

        trainer = config.build()

        for _ in range(4):
            trainer.train()  # OK if it doesn't trip the action assertion error

        assert trainer.train()["episode_reward_mean"] == 30.0
        trainer.stop()
        ray.shutdown()
Ejemplo n.º 4
0
def fine_tune(config, run, env: RailEnv):
    """
    Fine-tune the agent on a static env at evaluation time
    """
    RailEnvPersister.save(env, CURRENT_ENV_PATH)
    num_agents = env.get_num_agents()
    tune_time = get_tune_time(num_agents)

    def env_creator(env_config):
        return FlatlandSparse(env_config,
                              fine_tune_env_path=CURRENT_ENV_PATH,
                              max_steps=num_agents * 100)

    register_env("flatland_sparse", env_creator)
    config['num_workers'] = 3
    config['num_envs_per_worker'] = 1
    config['lr'] = 0.00001 * num_agents
    exp_an = ray.tune.run(run["agent"],
                          reuse_actors=True,
                          verbose=1,
                          stop={"time_since_restore": tune_time},
                          checkpoint_freq=1,
                          keep_checkpoints_num=1,
                          checkpoint_score_attr="episode_reward_mean",
                          config=config,
                          restore=run["checkpoint_path"])

    trial: Trial = exp_an.trials[0]
    agent_config = trial.config
    agent_config['num_workers'] = 0
    agent = trial.get_trainable_cls()(env=config["env"], config=trial.config)
    checkpoint = exp_an.get_trial_checkpoints_paths(
        trial, metric="episode_reward_mean")
    agent.restore(checkpoint[0][0])
    return agent
Ejemplo n.º 5
0
def test_sac():
    ray.init()

    config = {
        "model": {
            "use_lstm": False,
        },
        "use_state_preprocessor": False,
        "learning_starts": 200,
        "normalize_actions": True,
        "rollout_fragment_length": 10,
        "timesteps_per_iteration": 100,
        "num_gpus": 1,
        "num_workers": 0,
        "evaluation_interval": 0,
        "monitor": False,
    }
    register_env("MinitaurEnv", lambda env_config: MinitaurGymEnv(**env_config))
    trainer = SACTrainer(config, "MinitaurEnv")

    print(trainer.get_policy().model)

    for i in range(5):
        res = trainer.train()
        print(res)

    print(trainer._logdir)

    ray.shutdown()
Ejemplo n.º 6
0
def test_dynamics():
    ray.init()

    config = {
        'env': {
            'max_num_steps': 200,
        },
        'use_dynamics': True,
        'num_workers': 2,
        'plan_horizon': 10,
        'rollout_fragment_length': 50,
        "learning_starts": 200,
        "train_batch_size": 200,
        "train_every": 200,
        "num_sgd_iter": 3,
        "monitor": False
    }
    register_env('CheetahEnv', lambda env_config: CheetahEnv(**env_config))
    trainer = A2CTrainer(config, 'CheetahEnv')

    print(trainer.get_policy().num_params)

    for i in range(5):
        res = trainer.train()
        assert res['timesteps_this_iter'] == config[
            'rollout_fragment_length'] * config['num_workers']
        logger.info(res['info'])

    print(trainer._logdir)

    ray.shutdown()
Ejemplo n.º 7
0
def create_expe_spec(config, n_cpu, n_gpu, exp_dir):
    def _trial_name_creator(trial):
        return "{}_{}_123".format(trial.trainable_name, trial.trial_id)

    # Create env and register it, so ray and rllib can use it
    register_env(config["env_config"]["env"],
                 lambda env_config: env_basic_creator(env_config))

    expe_config = merge_env_algo_config(config)

    # Shouldn't be useful now, automatic in RLLIB
    #trial_resources = {"cpu": expe_config["num_workers"]+3, "gpu": expe_config["num_gpus"]}

    # expe_config["lr"] = grid_search([1e-3, 1e-4, 5e-4, 1e-5, 5e-5])
    # expe_config["target_network_update_freq"] = grid_search([20000, 40000])

    experiment = Experiment(
        name=config["name_expe"],
        run=config["algo"],
        stop=config["stop"],
        config=expe_config,
        num_samples=config.get("num_samples", 1),
        checkpoint_freq=10,
        max_failures=2,
        local_dir=exp_dir,
        # trial_name_creator=tune.function(_trial_name_creator)
        # todo : add when available
    )

    return experiment
Ejemplo n.º 8
0
    def test_avail_actions_qmix(self):
        grouping = {
            "group_1": ["agent_1", "agent_2"],
        }
        obs_space = Tuple([
            AvailActionsTestEnv.observation_space,
            AvailActionsTestEnv.observation_space
        ])
        act_space = Tuple([
            AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space
        ])
        register_env(
            "action_mask_test",
            lambda config: AvailActionsTestEnv(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space))

        trainer = QMixTrainer(
            env="action_mask_test",
            config={
                "num_envs_per_worker": 5,  # test with vectorization on
                "env_config": {
                    "avail_actions": [3, 4, 8],
                },
                "framework": "torch",
            })
        for _ in range(4):
            trainer.train()  # OK if it doesn't trip the action assertion error
        assert trainer.train()["episode_reward_mean"] == 30.0
        trainer.stop()
        ray.shutdown()
Ejemplo n.º 9
0
def main():
    arg_params = parse_args()
    register_env("malmo", create_malmo)
    config = {
        'mission': arg_params.mission,
        'port': arg_params.port,
        'server': arg_params.server,
        'port2': arg_params.port2,
        'server2': arg_params.server2,
        'episodes': arg_params.episodes,
        'episode': arg_params.episode,
        'role': arg_params.role,
        'episodemaxsteps': arg_params.episodemaxsteps,
        'saveimagesteps': arg_params.saveimagesteps,
        'resync': arg_params.resync,
        'experimentUniqueId': arg_params.experimentUniqueId
    }
    env = create_malmo(config)

    ray.init(num_cpus=20)

    tune.run("IMPALA",
             stop={
                 "timesteps_total": 10000,
             },
             config={
                 "env_config": config,
                 "env": "malmo",
                 "num_workers": 1,
                 "num_gpus": 0
             })

    ray.shutdown()
Ejemplo n.º 10
0
def register_env(env_name, env_config={}, model_name=None):
    env = utils.create_env(env_name)
    tune.register_env(env_name,
                      lambda env_name: env(env_name, env_config=env_config))
    if model_name is None:
        model_name = env_name
    return model_name
Ejemplo n.º 11
0
    def test_counting_by_agent_steps(self):
        """Test whether a PPOTrainer can be built with all frameworks."""
        config = copy.deepcopy(ppo.DEFAULT_CONFIG)
        action_space = Discrete(2)
        obs_space = Box(float("-inf"), float("inf"), (4, ), dtype=np.float32)

        config["num_workers"] = 2
        config["num_sgd_iter"] = 2
        config["framework"] = "torch"
        config["rollout_fragment_length"] = 21
        config["train_batch_size"] = 147
        config["multiagent"] = {
            "policies": {
                "p0": (None, obs_space, action_space, {}),
                "p1": (None, obs_space, action_space, {}),
            },
            "policy_mapping_fn": lambda aid: "p{}".format(aid),
            "count_steps_by": "agent_steps",
        }
        tune.register_env("ma_cartpole",
                          lambda _: MultiAgentCartPole({"num_agents": 2}))
        num_iterations = 2
        trainer = ppo.PPOTrainer(config=config, env="ma_cartpole")
        results = None
        for i in range(num_iterations):
            results = trainer.train()
        self.assertGreater(results["agent_timesteps_total"],
                           num_iterations * config["train_batch_size"])
        self.assertLess(results["agent_timesteps_total"],
                        (num_iterations + 1) * config["train_batch_size"])
        trainer.stop()
Ejemplo n.º 12
0
def register_retro(game, state, **kwargs):
    """Registers a given retro game as a ray environment

    The environment is registered with name 'retro-v0'
    """
    env_creator = lambda env_config: make_env(game=game, state=state, **kwargs)
    register_env("retro-v0", env_creator)
    return partial(env_creator, {})
def main():
    args = parser.parse_args()
    config = generate_config(args)

    # env = CityFlowEnvRay(config)
    # eng = cityflow.Engine(config["cityflow_config_file"], thread_num = config["thread_num"])
    # config["eng"] = [eng,]
    # print(config["eng"])
    num_agents = len(config["intersection_id"])
    grouping = {"group_1": [id_ for id_ in config["intersection_id"]]}
    obs_space = Tuple(
        [CityFlowEnvRay.observation_space for _ in range(num_agents)])
    act_space = Tuple([CityFlowEnvRay.action_space for _ in range(num_agents)])
    register_env(
        "cityflow_multi", lambda config_: CityFlowEnvRay(config_).
        with_agent_groups(grouping, obs_space=obs_space, act_space=act_space))

    if args.algo == "QMIX":
        config_ = {
            # "num_workers": 2,
            "num_gpus_per_worker": 0,
            "sample_batch_size": 4,
            "num_cpus_per_worker": 3,
            "train_batch_size": 32,
            "exploration_final_eps": 0.0,
            "num_workers": 8,
            "mixer": grid_search(["qmix"]),
            "env_config": config
        }
        group = True
    elif args.algo == "APEX_QMIX":
        config_ = {
            "num_gpus": 1,
            "num_workers": 2,
            "optimizer": {
                "num_replay_buffer_shards": 1,
            },
            "min_iter_time_s": 3,
            "buffer_size": 2000,
            "learning_starts": 300,
            "train_batch_size": 64,
            "sample_batch_size": 32,
            "target_network_update_freq": 100,
            "timesteps_per_iteration": 1000,
            "env_config": config
        }
        group = True
    else:
        config_ = {}
        group = False

    ray.init()
    tune.run(
        args.algo,
        stop={"timesteps_total": args.epoch * args.num_step},
        checkpoint_freq=args.save_freq,
        config=dict(config_, **{"env": "cityflow_multi"}),
    )
Ejemplo n.º 14
0
def test_job(model: Model, checkpoint: Path, outputs_dir: Path) -> None:
    logger.info(
        "Initializing ray with 2 cpus and %d GPUs",
        model.executor.gpus,
    )
    ray.init(
        num_cpus=2,
        num_gpus=model.executor.gpus,
        include_dashboard=False,
    )

    tune.register_env(model.environment.rllib_id,
                      lambda _: model.environment.make_env())
    agent = model.agent.make_agent(model.environment)

    logger.info(
        "Restoring %s agent with %s trainable params from %s",
        model.agent.type,
        f"{model.agent.trainable_parameters_count(agent):,}",
        checkpoint,
    )
    agent.restore(str(checkpoint))

    # Run inference on all of the test benchmarks.
    results: List[InferenceResult] = []

    with model.environment.make_env() as env:
        test_benchmarks = list(model.testing.benchmark_uris_iterator(env))
        for i, benchmark in enumerate(test_benchmarks, start=1):
            env.reset(benchmark=benchmark)
            result = InferenceResult.from_agent(
                env,
                agent,
                runtime=model.environment.reward_space == "Runtime")
            logger.info(
                "Test %s of %s: %s",
                f"{i:,d}",
                f"{len(test_benchmarks):,d}",
                result,
            )
            results.append(result)

    # Do this once the actual work has been done so that failed jobs
    # don't leave meta files lying around.
    with open(outputs_dir / "test-results.json", "w") as f:
        json.dump([r.dict() for r in results], f)
    with open(outputs_dir / "test-meta.json", "w") as f:
        json.dump(
            {
                "timestamp": datetime.now().isoformat(),
                "checkpoint": checkpoint.name,
            },
            f,
        )

    # Explicit call to ray shutdown here so that multiple consecutive
    # jobs can initialize ray with different resource requirements.
    ray.shutdown()
Ejemplo n.º 15
0
    def test_pg_compilation(self):
        """Test whether a PGTrainer can be built with all frameworks."""
        config = pg.DEFAULT_CONFIG.copy()
        config["num_workers"] = 1
        config["rollout_fragment_length"] = 500
        # Test with filter to see whether they work w/o preprocessing.
        config["observation_filter"] = "MeanStdFilter"
        num_iterations = 1

        image_space = Box(-1.0, 1.0, shape=(84, 84, 3))
        simple_space = Box(-1.0, 1.0, shape=(3,))

        tune.register_env(
            "random_dict_env",
            lambda _: RandomEnv(
                {
                    "observation_space": Dict(
                        {
                            "a": simple_space,
                            "b": Discrete(2),
                            "c": image_space,
                        }
                    ),
                    "action_space": Box(-1.0, 1.0, shape=(1,)),
                }
            ),
        )
        tune.register_env(
            "random_tuple_env",
            lambda _: RandomEnv(
                {
                    "observation_space": Tuple(
                        [simple_space, Discrete(2), image_space]
                    ),
                    "action_space": Box(-1.0, 1.0, shape=(1,)),
                }
            ),
        )

        for _ in framework_iterator(config, with_eager_tracing=True):
            # Test for different env types (discrete w/ and w/o image, + cont).
            for env in [
                "random_dict_env",
                "random_tuple_env",
                "MsPacmanNoFrameskip-v4",
                "CartPole-v0",
                "FrozenLake-v1",
            ]:
                print(f"env={env}")
                trainer = pg.PGTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)

                check_compute_single_action(trainer, include_prev_action_reward=True)
Ejemplo n.º 16
0
def register(env_config):
    ModelCatalog.register_custom_model("1st_model", TorchRNNModel)
    ModelCatalog.register_custom_model("2nd_model", SecondModel)
    ModelCatalog.register_custom_model("3rd_model", ThirdModel)
    ModelCatalog.register_custom_model("4th_model", FourthModel)
    ModelCatalog.register_custom_model("5th_model", FifthModel)
    ModelCatalog.register_custom_model("6th_model", SixthModel)
    ModelCatalog.register_custom_model("7th_model", SeventhModel)

    tune.register_env("MinerEnv-v0", lambda x: v0.RllibMinerEnv(env_config))
    def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
Ejemplo n.º 18
0
def main():
    register_env('MinitaurEnv',
                 lambda env_config: MinitaurGymEnv(**env_config))

    if args.eval:
        config = {
            'env_config': {
                'render': True,
            },
            'num_workers': 0,
            'seed': 123,
            'skill_input': random.randint(0, 9),
        }

        trainer = PPOTrainer(config, 'MinitaurEnv')
        state = pickle.load(open(args.restore_checkpoint, "rb"))
        states = pickle.loads(state['worker'])['state']
        trainer.set_weights(states)

        while True:
            trainer.evaluate()
            time.sleep(0.01)
    else:
        log_interval = args.log_interval
        num_episodes = args.num_episodes

        ray.init()

        config = {
            'num_workers': args.num_workers,
            'policy': args.policy,
            'rollout_fragment_length': 200,
            'sgd_minibatch_size': 256,
            'num_sgd_iter': 20,
            'train_batch_size': 3200,
            'use_env_rewards': False,
        }

        trainer = PPOTrainer(config=config, env='MinitaurEnv')

        if args.restore_checkpoint:
            logger.info('Resuming from checkpoint path: {}'.format(
                args.restore_checkpoint))
            trainer.restore(args.restore_checkpoint)

        for epi_counter in range(num_episodes):
            res = trainer.train()
            logger.info(res['info'])

            if (epi_counter + 1) % log_interval == 0:
                ckp = trainer.save()
                logger.info('model saved to: {}'.format(ckp))
Ejemplo n.º 19
0
def load_agent():

    # Initialize training environment

    ray.init()

    def environment_creater(params=None):
        agent = SimpleAvoidAgent(noise=0.05)
        return TronRaySinglePlayerEnvironment(board_size=13,
                                              num_players=4,
                                              agent=agent)

    env = environment_creater()
    tune.register_env("tron_single_player", environment_creater)
    ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard)

    # Configure Deep Q Learning with reasonable values
    config = DEFAULT_CONFIG.copy()
    config['num_workers'] = 4
    ## config['num_gpus'] = 1
    #config["timesteps_per_iteration"] = 1024
    #config['target_network_update_freq'] = 256
    #config['buffer_size'] = 100_000
    #config['schedule_max_timesteps'] = 200_000
    #config['exploration_fraction'] = 0.02
    #config['compress_observations'] = False
    #config['n_step'] = 2
    #config['seed'] = SEED                                              f

    #Configure for PPO
    #config["sample_batch_size"]= 100
    #config["train_batch_size"]=200
    #config["sgd_minibatch_size"]=60
    #Configure A3C with reasonable values

    # We will use a simple convolution network with 3 layers as our feature extractor
    config['model']['vf_share_layers'] = True
    config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)]
    config['model']['fcnet_hiddens'] = [256]
    config['model']['custom_preprocessor'] = 'tron_prep'

    # Begin training or evaluation
    #trainer = DDPGTrainer(config, "tron_single_player")
    #trainer = A3CTrainer(config, "tron_single_player")
    trainer = DQNTrainer(config, "tron_single_player")
    #trainer = PPOTrainer(config, "tron_single_player")

    trainer.restore("./dqn_checkpoint_3800/checkpoint-3800")

    return trainer  #.get_policy("trainer")
Ejemplo n.º 20
0
    def setup_grouping(config: dict):
        grouping = {
            "group_1": list(range(config["env_config"]["max_n_agents"])),
        }

        obs_space = Tuple([make_obs(config["env_config"]["observation"],
                                    config["env_config"]["observation_config"]).observation_space()
                           for _ in range(config["env_config"]["max_n_agents"])])

        act_space = Tuple([GlobalFlatlandGymEnv.action_space for _ in range(config["env_config"]["max_n_agents"])])

        register_env(
            "flatland_sparse_grouped",
            lambda config: FlatlandSparse(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space))
Ejemplo n.º 21
0
def check_support_multiagent(alg, config):
    register_env("multi_agent_mountaincar",
                 lambda _: MultiAgentMountainCar({"num_agents": 2}))
    register_env("multi_agent_cartpole",
                 lambda _: MultiAgentCartPole({"num_agents": 2}))
    config["log_level"] = "ERROR"
    for _ in framework_iterator(config, frameworks=("torch", "tf")):
        if alg in ["DDPG", "APEX_DDPG", "SAC"]:
            a = get_agent_class(alg)(
                config=config, env="multi_agent_mountaincar")
        else:
            a = get_agent_class(alg)(config=config, env="multi_agent_cartpole")

        print(a.train())
        a.stop()
Ejemplo n.º 22
0
    def test_sac_dict_obs_order(self):
        dict_space = Dict({
            "img": Box(low=0, high=1, shape=(42, 42, 3)),
            "cont": Box(low=0, high=100, shape=(3, )),
        })

        # Dict space .sample() returns an ordered dict.
        # Make sure the keys in samples are ordered differently.
        dict_samples = [{
            k: v
            for k, v in reversed(dict_space.sample().items())
        } for _ in range(10)]

        class NestedDictEnv(Env):
            def __init__(self):
                self.action_space = Box(low=-1.0, high=1.0, shape=(2, ))
                self.observation_space = dict_space
                self._spec = EnvSpec("NestedDictEnv-v0")
                self.steps = 0

            def reset(self):
                self.steps = 0
                return dict_samples[0]

            def step(self, action):
                self.steps += 1
                return dict_samples[self.steps], 1, self.steps >= 5, {}

        tune.register_env("nested", lambda _: NestedDictEnv())
        config = (sac.SACConfig().training(
            replay_buffer_config={
                "learning_starts": 0,
                "capacity": 10
            },
            train_batch_size=5,
        ).rollouts(
            num_rollout_workers=0,
            rollout_fragment_length=5,
        ).experimental(_disable_preprocessor_api=True))
        num_iterations = 1

        for _ in framework_iterator(config, with_eager_tracing=True):
            trainer = config.build(env="nested")
            for _ in range(num_iterations):
                results = trainer.train()
                check_train_results(results)
                print(results)
            check_compute_single_action(trainer)
Ejemplo n.º 23
0
    def test_dreamer_compilation(self):
        """Test whether an DreamerTrainer can be built with all frameworks."""
        config = dreamer.DEFAULT_CONFIG.copy()
        tune.register_env("dm_control_hopper_hop", lambda _: hopper_hop())

        num_iterations = 1

        # Test against all frameworks.
        for _ in framework_iterator(config, frameworks="torch"):
            for env in ["dm_control_hopper_hop"]:
                trainer = dreamer.DREAMERTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    print(results)
                check_compute_single_action(trainer)
                trainer.stop()
Ejemplo n.º 24
0
def main():
    env_args = {
        "forest_data_path": "/Users/anmartin/Projects/summer_project/hl_planner/forest_data.tiff",
        "simulation_data_path": "/Users/anmartin/Projects/FormationSimulation/fastsimulation.json",
        "num_measurements": 6,
        "max_forest_heights": [60, 90, 45, 38, 30, 76],
        "orbit_altitude": 757000,
        "draw_plot": True
    }

    parser = rollout.create_parser()
    args = parser.parse_args()

    register_env("offline-orekit", lambda _: OfflineOrekitEnv(env_args))

    rollout.run(args, parser)
Ejemplo n.º 25
0
    def test_sac_dict_obs_order(self):
        dict_space = Dict({
            "img": Box(low=0, high=1, shape=(42, 42, 3)),
            "cont": Box(low=0, high=100, shape=(3, )),
        })

        # Dict space .sample() returns an ordered dict.
        # Make sure the keys in samples are ordered differently.
        dict_samples = [{
            k: v
            for k, v in reversed(dict_space.sample().items())
        } for _ in range(10)]

        class NestedDictEnv(Env):
            def __init__(self):
                self.action_space = Box(low=-1.0, high=1.0, shape=(2, ))
                self.observation_space = dict_space
                self._spec = EnvSpec("NestedDictEnv-v0")
                self.steps = 0

            def reset(self):
                self.steps = 0
                return dict_samples[0]

            def step(self, action):
                self.steps += 1
                return dict_samples[self.steps], 1, self.steps >= 5, {}

        tune.register_env("nested", lambda _: NestedDictEnv())

        config = sac.DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.
        config["learning_starts"] = 0
        config["rollout_fragment_length"] = 5
        config["train_batch_size"] = 5
        config["replay_buffer_config"]["capacity"] = 10
        # Disable preprocessors.
        config["_disable_preprocessor_api"] = True
        num_iterations = 1

        for _ in framework_iterator(config, with_eager_tracing=True):
            trainer = sac.SACTrainer(env="nested", config=config)
            for _ in range(num_iterations):
                results = trainer.train()
                check_train_results(results)
                print(results)
            check_compute_single_action(trainer)
Ejemplo n.º 26
0
def main():
    register_env("MinitaurEnv",
                 lambda env_config: MinitaurGymEnv(**env_config))

    if args.eval:
        config = {
            "env_config": {
                "render": True,
            },
            "num_workers": 0,
            "seed": 123,
        }

        trainer = A2CTrainer(config, "MinitaurEnv")
        state = pickle.load(open(args.restore_checkpoint, "rb"))
        states = pickle.loads(state["worker"])["state"]
        trainer.set_weights(states)

        while True:
            trainer.workers.local_worker().sample()
            time.sleep(0.01)
    else:
        ray.init()

        config = {
            "num_workers": args.num_workers,
            "rollout_fragment_length": 50,
            "train_batch_size": 2500,
            "num_sgd_iter": 80
        }
        trainer = A2CTrainer(config, "MinitaurEnv")

        if args.restore_checkpoint:
            logger.info("Resuming from checkpoint path: {}".format(
                args.restore_checkpoint))
            trainer.restore(args.restore_checkpoint)

        for epi_counter in range(args.num_episodes):
            res = trainer.train()
            logger.info(res["info"])

            if (epi_counter + 1) % args.log_interval == 0:
                ckp = trainer.save()
                logger.info("model saved to: {}".format(ckp))

        ray.shutdown()
Ejemplo n.º 27
0
def register_pettingzoo_env(env_name):
    """ Register the Env including preprocessing pipeline, s.t. it can be easily
    imported using ray. """
    def get_env(config):
        name = env_name.replace('-', '_')
        env = __import__(f'pettingzoo.atari.{name}', fromlist=[None])
        env = env.parallel_env(obs_type='grayscale_image')
        env = frame_skip_v0(env, 4)
        env = resize_v0(env, 84, 84)
        env = frame_stack_v1(env, 4)
        env = agent_indicator_v0(env)
        return ParallelPettingZooEnv(
            env,
            random_action=config['random_action'],
            random_proba=config['random_action_probability'])

    print(f'Registering env with name {env_name}')
    register_env(env_name, lambda config: get_env(config))
Ejemplo n.º 28
0
def test_model_free():
    ray.init()

    config = {
        'use_dynamics': False,
        'num_workers': 2,
        'train_batch_size': 200,
        "train_every": 200,
    }
    register_env('MinitaurEnv',
                 lambda env_config: MinitaurGymEnv(**env_config))
    trainer = A2CTrainer(config, 'MinitaurEnv')

    for i in range(5):
        res = trainer.train()
        logger.info(res)

    ray.shutdown()
Ejemplo n.º 29
0
def check_support_multiagent(alg, config):
    register_env("multi_agent_mountaincar",
                 lambda _: MultiAgentMountainCar({"num_agents": 2}))
    register_env("multi_agent_cartpole",
                 lambda _: MultiAgentCartPole({"num_agents": 2}))
    config["log_level"] = "ERROR"
    for fw in framework_iterator(config):
        if fw in ["tf2", "tfe"] and \
                alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]:
            continue
        if alg in ["DDPG", "APEX_DDPG", "SAC"]:
            a = get_trainer_class(alg)(config=config,
                                       env="multi_agent_mountaincar")
        else:
            a = get_trainer_class(alg)(config=config,
                                       env="multi_agent_cartpole")

        print(a.train())
        a.stop()
Ejemplo n.º 30
0
def register_doom_envs_rllib(**kwargs):
    """Register env factories in RLLib system."""
    for spec in DOOM_ENVS:

        def make_env_func(env_config):
            print('Creating env!!!')
            cfg = default_cfg(env=spec.name)
            cfg.pixel_format = 'HWC'  # tensorflow models expect HWC by default

            if 'skip_frames' in env_config:
                cfg.env_frameskip = env_config['skip_frames']
            if 'res_w' in env_config:
                cfg.res_w = env_config['res_w']
            if 'res_h' in env_config:
                cfg.res_h = env_config['res_h']
            if 'wide_aspect_ratio' in env_config:
                cfg.wide_aspect_ratio = env_config['wide_aspect_ratio']

            env = make_doom_env(spec.name,
                                env_config=env_config,
                                cfg=cfg,
                                **kwargs)

            # we lock the global mutex here, otherwise Doom instances may crash on first reset when too many of them are reset simultaneously
            lock = FileLock(DOOM_LOCK_PATH)
            attempt = 0
            while True:
                attempt += 1
                try:
                    with lock.acquire(timeout=10):
                        print('Env created, resetting...')
                        env.reset()
                        print('Env reset completed! Config:', env_config)
                        break
                except Timeout:
                    print(
                        'Another instance of this application currently holds the lock, attempt:',
                        attempt)

            return env

        register_env(spec.name, make_env_func)
Ejemplo n.º 31
0
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
    "verbose": False,
    "x_res": 80,
    "y_res": 80,
    "use_depth_camera": False,
    "discrete_actions": False,
    "server_map": "/Game/Maps/Town02",
    "reward_function": "lane_keep",
    "enable_planner": False,
    "scenarios": [LANE_KEEP],
})

register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()

ray.init()
run_experiments({
    "carla-a3c": {
        "run": "A3C",
        "env": "carla_env",
        "resources": {"cpu": 4, "gpu": 1},
        "config": {
            "env_config": env_config,
            "model": {
                "custom_model": "carla",
                "custom_options": {
                    "image_shape": [80, 80, 6],
                },
Ejemplo n.º 32
0
        self.poletrans.set_rotation(-x[2])

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        if self.viewer:
            self.viewer.close()


if __name__ == "__main__":
    import ray
    from ray import tune

    args = parser.parse_args()

    tune.register_env("cartpole_stateless", lambda _: CartPoleStatelessEnv())

    ray.init()

    configs = {
        "PPO": {
            "num_sgd_iter": 5,
            "vf_share_layers": True,
            "vf_loss_coeff": 0.0001,
        },
        "IMPALA": {
            "num_workers": 2,
            "num_gpus": 0,
            "vf_loss_coeff": 0.01,
        },
    }
Ejemplo n.º 33
0
if __name__ == "__main__":
    args = parser.parse_args()

    grouping = {
        "group_1": ["agent_1", "agent_2"],
    }
    obs_space = Tuple([
        TwoStepGame.observation_space,
        TwoStepGame.observation_space,
    ])
    act_space = Tuple([
        TwoStepGame.action_space,
        TwoStepGame.action_space,
    ])
    register_env(
        "grouped_twostep",
        lambda config: TwoStepGame(config).with_agent_groups(
            grouping, obs_space=obs_space, act_space=act_space))

    if args.run == "QMIX":
        config = {
            "sample_batch_size": 4,
            "train_batch_size": 32,
            "exploration_final_eps": 0.0,
            "num_workers": 0,
            "mixer": grid_search([None, "qmix", "vdn"]),
        }
        group = True
    elif args.run == "APEX_QMIX":
        config = {
            "num_gpus": 0,
            "num_workers": 2,