Example #1
0
def check_support_multiagent(alg, config):
    register_env("multi_mountaincar", lambda _: MultiMountainCar(2))
    register_env("multi_cartpole", lambda _: MultiCartpole(2))
    if "DDPG" in alg:
        a = get_agent_class(alg)(config=config, env="multi_mountaincar")
    else:
        a = get_agent_class(alg)(config=config, env="multi_cartpole")
    try:
        a.train()
    finally:
        a.stop()
Example #2
0
def check_support_multiagent(alg, config):
    register_env("multi_mountaincar", lambda _: MultiMountainCar(2))
    register_env("multi_cartpole", lambda _: MultiCartpole(2))
    config["log_level"] = "ERROR"
    if "DDPG" in alg:
        a = get_agent_class(alg)(config=config, env="multi_mountaincar")
    else:
        a = get_agent_class(alg)(config=config, env="multi_cartpole")
    try:
        a.train()
    finally:
        a.stop()
Example #3
0
def check_support_multiagent(alg, config):
    register_env("multi_agent_mountaincar",
                 lambda _: MultiAgentMountainCar({"num_agents": 2}))
    register_env("multi_agent_cartpole",
                 lambda _: MultiAgentCartPole({"num_agents": 2}))
    config["log_level"] = "ERROR"
    for _ in framework_iterator(config, frameworks=("torch", "tf")):
        if alg in ["DDPG", "APEX_DDPG", "SAC"]:
            a = get_agent_class(alg)(
                config=config, env="multi_agent_mountaincar")
        else:
            a = get_agent_class(alg)(config=config, env="multi_agent_cartpole")

        print(a.train())
        a.stop()
Example #4
0
def check_support(alg, config, test_eager=False, test_trace=True):
    config["framework"] = "tfe"
    config["log_level"] = "ERROR"
    # Test both continuous and discrete actions.
    for cont in [True, False]:
        if cont and alg in ["DQN", "APEX", "SimpleQ"]:
            continue
        elif not cont and alg in ["DDPG", "APEX_DDPG", "TD3"]:
            continue

        if cont:
            config["env"] = "Pendulum-v0"
        else:
            config["env"] = "CartPole-v0"

        a = get_agent_class(alg)
        if test_eager:
            print("tf-eager: alg={} cont.act={}".format(alg, cont))
            config["eager_tracing"] = False
            tune.run(a,
                     config=config,
                     stop={"training_iteration": 1},
                     verbose=1)
        if test_trace:
            config["eager_tracing"] = True
            print("tf-eager-tracing: alg={} cont.act={}".format(alg, cont))
            tune.run(a,
                     config=config,
                     stop={"training_iteration": 1},
                     verbose=1)
Example #5
0
def run(args, config):
    local_mode = False
    if args.dbg:
        local_mode = True

    ray.init(local_mode=local_mode)

    cls = get_agent_class(args._run)
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_steps = int(1e9)

    render_frameskip = args.render_action_repeat
    if render_frameskip == -1:
        # default - read from config
        # fallback to default if env config does not have it
        render_frameskip = cfg_param('skip_frames',
                                     config.get('env_config', None))

    log.info('Using render frameskip %d! \n\n\n', render_frameskip)

    rollout_loop(
        agent,
        args.env,
        num_steps,
        num_episodes=args.num_episodes,
        no_render=args.no_render,
        fps=args.fps,
        frameskip=render_frameskip,
    )
Example #6
0
def check_support(alg, config, stats, check_bounds=False, name=None):
    covered_a = set()
    covered_o = set()
    config["log_level"] = "ERROR"
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing", alg, action_space, obs_space, "===")
            stub_env = make_stub_env(action_space, obs_space, check_bounds)
            register_env("stub_env", lambda c: stub_env())
            stat = "ok"
            a = None
            try:
                if a_name in covered_a and o_name in covered_o:
                    stat = "skip"  # speed up tests by avoiding full grid
                else:
                    a = get_agent_class(alg)(config=config, env="stub_env")
                    a.train()
                    covered_a.add(a_name)
                    covered_o.add(o_name)
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[name or alg, a_name, o_name] = stat
    def __init__(self, sim_config, algoConfig, checkPointPath):

        import ray
        from ray.tune import run_experiments
        from ray.tune.registry import register_env
        from ray.rllib.agents.registry import get_agent_class

        from v2i import V2I

        # Do Essentials
        algoConfig["EXP_NAME"]["config"]["num_workers"] = 2
        algoConfig["EXP_NAME"]["config"]["num_envs_per_worker"] = 1
        algoConfig["EXP_NAME"]["config"]["train_batch_size"] = algoConfig[
            "EXP_NAME"]["config"]["num_workers"] * algoConfig["EXP_NAME"][
                "config"]["sgd_minibatch_size"]
        simConfigYaml = readYaml(sim_config)
        self.lstmEnabled = False

        if simConfigYaml['config']['enable-lstm']:
            algoConfig['EXP_NAME']['config']['model']['use_lstm'] = True
            self.lstmEnabled = True
        else:
            algoConfig['EXP_NAME']['config']['model']['use_lstm'] = False

        env_creator_name = "v2i-v0"
        register_env(env_creator_name,
                     lambda config: V2I.V2I(sim_config, "train"))

        ray.init()
        cls = get_agent_class('PPO')
        self.agent = cls(env=env_creator_name,
                         config=algoConfig["EXP_NAME"]["config"])
        self.agent.restore(checkPointPath)
        print("Loaded Checkpoint -> %s" % (checkPointPath))
Example #8
0
def test_export(algo_name, failures):
    cls = get_agent_class(algo_name)
    if "DDPG" in algo_name:
        algo = cls(config=CONFIGS[name], env="Pendulum-v0")
    else:
        algo = cls(config=CONFIGS[name], env="CartPole-v0")

    for _ in range(3):
        res = algo.train()
        print("current status: " + str(res))

    export_dir = "/tmp/export_dir_%s" % algo_name
    print("Exporting model ", algo_name, export_dir)
    algo.export_policy_model(export_dir)
    if not os.path.exists(os.path.join(export_dir, "saved_model.pb")) \
            or not os.listdir(os.path.join(export_dir, "variables")):
        failures.append(algo_name)
    shutil.rmtree(export_dir)

    print("Exporting checkpoint", algo_name, export_dir)
    algo.export_policy_checkpoint(export_dir)
    if not os.path.exists(os.path.join(export_dir, "model.meta")) \
            or not os.path.exists(os.path.join(export_dir, "model.index")) \
            or not os.path.exists(os.path.join(export_dir, "checkpoint")):
        failures.append(algo_name)
    shutil.rmtree(export_dir)
Example #9
0
def _restore(agent_type,
             run_name,
             ckpt,
             env_name,
             extra_config=None,
             existing_agent=None):
    assert isinstance(agent_type, str) or issubclass(agent_type, Trainer)
    if existing_agent is not None:
        agent = existing_agent
    else:
        change_model = None
        use_activation_model = False
        if agent_type == "PPOAgentWithActivation":
            cls = PPOAgentWithActivation
            change_model = "fc_with_activation"
            use_activation_model = True
        elif agent_type == "PPOAgentWithMask":
            cls = PPOAgentWithMask
            change_model = "fc_with_mask"
            use_activation_model = True
        elif (not isinstance(agent_type, str)) and issubclass(
                agent_type, Trainer):
            cls = agent_type
        else:
            cls = get_agent_class(run_name)
        is_es_agent = run_name == "ES"
        config = build_config(ckpt, extra_config, is_es_agent, change_model,
                              use_activation_model)
        logger.info("The config of restored agent: ", config)
        agent = cls(env=env_name, config=config)
    if ckpt is not None:
        ckpt = os.path.abspath(os.path.expanduser(ckpt))  # Remove relative dir
        agent.restore(ckpt)
    return agent
def run(args, parser):
    config = {}
    # Load configuration from file
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory.")
    else:
        with open(config_path, 'rb') as f:
            config = pickle.load(f)
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    rollout(agent, args.env, num_steps, args.out, args.no_render)
    def __init__(self, sim_config, algoConfig, checkPointPath):

        import ray
        from ray.tune import run_experiments
        from ray.tune.registry import register_env
        from ray.rllib.agents.registry import get_agent_class

        from v2i import V2I

        # Do Essentials
        algoConfig["EXP_NAME"]["config"]["num_workers"] = 2
        algoConfig["EXP_NAME"]["config"]["num_envs_per_worker"] = 1
        algoConfig["EXP_NAME"]["config"]["train_batch_size"] = algoConfig[
            "EXP_NAME"]["config"]["num_workers"] * algoConfig["EXP_NAME"][
                "config"]["sample_batch_size"]
        env_creator_name = "v2i-v0"
        register_env(env_creator_name,
                     lambda config: V2I.V2I(sim_config, "train"))

        ray.init()
        cls = get_agent_class('IMPALA')
        self.agent = cls(env=env_creator_name,
                         config=algoConfig["EXP_NAME"]["config"])
        self.agent.restore(checkPointPath)
        print("Loaded Checkpoint -> %s" % (checkPointPath))
Example #12
0
def _register_all():
    from ray.rllib.agents.trainer import Trainer, with_common_config
    from ray.rllib.agents.registry import ALGORITHMS, get_agent_class
    from ray.rllib.contrib.registry import CONTRIBUTED_ALGORITHMS

    for key in list(ALGORITHMS.keys()) + list(CONTRIBUTED_ALGORITHMS.keys(
    )) + ["__fake", "__sigmoid_fake_data", "__parameter_tuning"]:
        register_trainable(key, get_agent_class(key))

    def _see_contrib(name):
        """Returns dummy agent class warning algo is in contrib/."""

        class _SeeContrib(Trainer):
            _name = "SeeContrib"
            _default_config = with_common_config({})

            def setup(self, config):
                raise NameError(
                    "Please run `contrib/{}` instead.".format(name))

        return _SeeContrib

    # also register the aliases minus contrib/ to give a good error message
    for key in list(CONTRIBUTED_ALGORITHMS.keys()):
        assert key.startswith("contrib/")
        alias = key.split("/", 1)[1]
        register_trainable(alias, _see_contrib(alias))
Example #13
0
def get_default_config(params, env):
    """
    Return the default configuration for a specific type of algorithm
    :param params: (dict)  general dictionary containing every configuration parameter (env, netwrok, inflow ...)
    :return:(dict)
    """

    # get original config from alg
    config = get_agent_class(Params.training_alg)._default_config.copy()

    # apply alg-free changes
    config = env_config(config)
    config = eval_config(config)
    config = model_config(config)
    config = flow_config(params, config)
    config = performance_config(config)

    if Params.training_alg == "PPO":
        config = ppo_config(config)

    elif Params.training_alg == "MARWIL":
        config = marwil_config(config)


    elif Params.training_alg == "contrib/MADDPG":
        config = maddpg_config(config, env)

    else:
        raise NotImplementedError(f"{Params.training_alg} has not been implemented")

    return config
Example #14
0
    def backtest(self, checkpoint_path):
        agent_config, assets, currency, datapoints, granularity, _ = get_instruments_from_checkpoint(
            checkpoint_path)

        config = {
            'assets': assets,
            'currency': currency,
            'granularity': granularity,
            'datapoints': datapoints,
            'df_complete': {},
            'df_features': {},
            'variables': self.config_spec_variables
        }

        for asset in assets:
            config['df_complete'][asset] = self.df[asset]['rollout']
            config['df_features'][asset] = self.df[asset][
                'rollout'].loc[:, self.df[asset]['rollout'].columns != 'Date']

        register_env(env_name, lambda _: TradingEnv(config))
        ray.init()
        cls = get_agent_class('PPO')
        agent = cls(env=env_name, config=agent_config)
        agent.restore(checkpoint_path)

        num_steps = int(len(config['df_complete'][assets[0]]))
        no_render = False

        rollout(agent, env_name, num_steps, no_render)
Example #15
0
def check_support(alg, config, stats, check_bounds=False):
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing", alg, action_space, obs_space, "===")
            stub_env = make_stub_env(action_space, obs_space, check_bounds)
            register_env("stub_env", lambda c: stub_env())
            stat = "ok"
            a = None
            try:
                a = get_agent_class(alg)(config=config, env="stub_env")
                a.train()
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[alg, a_name, o_name] = stat
Example #16
0
    def test_pettingzoo_env(self):
        register_env("prison", lambda _: PettingZooEnv(simple_spread_v0.env()))

        agent_class = get_agent_class("PPO")

        config = deepcopy(agent_class._default_config)

        test_env = PettingZooEnv(simple_spread_v0.env())
        obs_space = test_env.observation_space
        act_space = test_env.action_space
        test_env.close()

        config["multiagent"] = {
            "policies": {
                # the first tuple value is None -> uses default policy
                "av": (None, obs_space, act_space, {}),
            },
            "policy_mapping_fn": lambda agent_id: "av"
        }

        config["log_level"] = "DEBUG"
        config["num_workers"] = 0
        config["rollout_fragment_length"] = 30
        config["train_batch_size"] = 200
        config["horizon"] = 200  # After n steps, force reset simulation
        config["no_done_at_end"] = False

        agent = agent_class(env="prison", config=config)
        agent.train()
 def create_tf_serving_model(self, algorithm=None, env_string=None):
     self.register_env_creator()
     self.register_algorithms_and_preprocessors()
     if ray.__version__ >= "0.6.5":
         from ray.rllib.agents.registry import get_agent_class
     else:
         from ray.rllib.agents.agent import get_agent_class
     cls = get_agent_class(algorithm)
     with open(os.path.join(MODEL_OUTPUT_DIR,
                            "params.json")) as config_json:
         config = json.load(config_json)
     use_torch = config.get("use_pytorch", False)
     if not use_torch:
         if "callbacks" in config:
             callback_cls_str = config["callbacks"]
             callback_cls = callback_cls_str.split("'")[-2].split(".")[-1]
             config["callbacks"] = ast.literal_eval()(callback_cls)
         print("Loaded config for TensorFlow serving.")
         config["monitor"] = False
         config["num_workers"] = 1
         config["num_gpus"] = 0
         agent = cls(env=env_string, config=config)
         checkpoint = os.path.join(MODEL_OUTPUT_DIR, "checkpoint")
         agent.restore(checkpoint)
         export_tf_serving(agent, MODEL_OUTPUT_DIR)
Example #18
0
def training_workflow(config_, reporter):
    # build trainer
    cls = get_agent_class(args.algo)
    trainer = cls(env=CityflowGymEnv, config=config_)
    for i in range(args.epoch):
        res = trainer.train()
        reporter(**res)
Example #19
0
def setup_PPO_exp():

    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = n_cpus
    config['train_batch_size'] = horizon * rollouts
    config['gamma'] = discount_rate
    config['use_gae'] = True
    config['lambda'] = 0.97
    config['kl_target'] = 0.02
    config['num_sgd_iter'] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['horizon'] = horizon
    config['model'].update({'fcnet_hiddens': [32, 32]})

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)

    return alg_run, gym_name, config
Example #20
0
def run(args, parser):
    config = {}
    # Load configuration from file
    config_dir = os.path.dirname(args.checkpoint)
    config_path = os.path.join(config_dir, "params.pkl")
    if not os.path.exists(config_path):
        config_path = os.path.join(config_dir, "../params.pkl")
    if not os.path.exists(config_path):
        if not args.config:
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory.")
    else:
        with open(config_path, 'rb') as f:
            config = pickle.load(f)
    if "num_workers" in config:
        config["num_workers"] = min(2, config["num_workers"])
    config = merge_dicts(config, args.config)
    if not args.env:
        if not config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)
    rollout(agent, args.env, num_steps, args.out, args.no_render)
def test_ckpt_restore(use_object_store, alg_name, failures):
    cls = get_agent_class(alg_name)
    if "DDPG" in alg_name:
        alg1 = cls(config=CONFIGS[name], env="Pendulum-v0")
        alg2 = cls(config=CONFIGS[name], env="Pendulum-v0")
    else:
        alg1 = cls(config=CONFIGS[name], env="CartPole-v0")
        alg2 = cls(config=CONFIGS[name], env="CartPole-v0")

    for _ in range(3):
        res = alg1.train()
        print("current status: " + str(res))

    # Sync the models
    if use_object_store:
        alg2.restore_from_object(alg1.save_to_object())
    else:
        alg2.restore(alg1.save())

    for _ in range(10):
        if "DDPG" in alg_name:
            obs = np.random.uniform(size=3)
        else:
            obs = np.random.uniform(size=4)
        a1 = get_mean_action(alg1, obs)
        a2 = get_mean_action(alg2, obs)
        print("Checking computed actions", alg1, obs, a1, a2)
        if abs(a1 - a2) > .1:
            failures.append((alg_name, [a1, a2]))
Example #22
0
def check_support(alg, config, stats, check_bounds=False, name=None):
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing", alg, action_space, obs_space, "===")
            stub_env = make_stub_env(action_space, obs_space, check_bounds)
            register_env("stub_env", lambda c: stub_env())
            stat = "ok"
            a = None
            try:
                a = get_agent_class(alg)(config=config, env="stub_env")
                a.train()
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[name or alg, a_name, o_name] = stat
Example #23
0
def ckpt_restore_test(use_object_store, alg_name, failures):
    cls = get_agent_class(alg_name)
    if "DDPG" in alg_name or "SAC" in alg_name:
        alg1 = cls(config=CONFIGS[alg_name], env="Pendulum-v0")
        alg2 = cls(config=CONFIGS[alg_name], env="Pendulum-v0")
        env = gym.make("Pendulum-v0")
    else:
        alg1 = cls(config=CONFIGS[alg_name], env="CartPole-v0")
        alg2 = cls(config=CONFIGS[alg_name], env="CartPole-v0")
        env = gym.make("CartPole-v0")

    for _ in range(2):
        res = alg1.train()
        print("current status: " + str(res))

    # Sync the models
    if use_object_store:
        alg2.restore_from_object(alg1.save_to_object())
    else:
        alg2.restore(alg1.save())

    for _ in range(5):
        if "DDPG" in alg_name or "SAC" in alg_name:
            obs = np.clip(np.random.uniform(size=3), env.observation_space.low,
                          env.observation_space.high)
        else:
            obs = np.clip(np.random.uniform(size=4), env.observation_space.low,
                          env.observation_space.high)
        a1 = get_mean_action(alg1, obs)
        a2 = get_mean_action(alg2, obs)
        print("Checking computed actions", alg1, obs, a1, a2)
        if abs(a1 - a2) > .1:
            failures.append((alg_name, [a1, a2]))
Example #24
0
def _register_all():

    from ray.rllib.agents.registry import ALGORITHMS
    from ray.rllib.contrib.registry import CONTRIBUTED_ALGORITHMS
    for key in list(ALGORITHMS.keys()) + list(CONTRIBUTED_ALGORITHMS.keys(
    )) + ["__fake", "__sigmoid_fake_data", "__parameter_tuning"]:
        from ray.rllib.agents.registry import get_agent_class
        register_trainable(key, get_agent_class(key))
Example #25
0
def get_agent_class(agent_name):
    """
    Returns the class that corresponds to the agent_name.
    """
    if agent_name in CUSTOM_ALGORITHMS:
        return CUSTOM_ALGORITHMS[agent_name]
    else:
        return reg.get_agent_class(agent_name)
Example #26
0
def check_support(alg, config):
    config["eager"] = True
    if alg in ["APEX_DDPG", "TD3", "DDPG", "SAC"]:
        config["env"] = "Pendulum-v0"
    else:
        config["env"] = "CartPole-v0"
    a = get_agent_class(alg)
    tune.run(a, config=config, stop={"training_iteration": 0})
Example #27
0
def _register_all():

    from ray.rllib.agents.registry import ALGORITHMS
    from ray.rllib.contrib.registry import CONTRIBUTED_ALGORITHMS
    for key in list(ALGORITHMS.keys()) + list(CONTRIBUTED_ALGORITHMS.keys(
    )) + ["__fake", "__sigmoid_fake_data", "__parameter_tuning"]:
        from ray.rllib.agents.registry import get_agent_class
        register_trainable(key, get_agent_class(key))
Example #28
0
def get_agent(agent_name):
    try:
        agent_class = get_agent_class(agent_name.upper())
    except Exception as e:
        print("%s Loading basic algorithm" % e)
        # We use PG as the base class for experiments
        agent_class = type(agent_name.upper(), (MaxAgent, ), {})
    return agent_class
Example #29
0
def check_support(alg, config, stats, check_bounds=False, name=None):
    covered_a = set()
    covered_o = set()
    config["log_level"] = "ERROR"
    first_error = None
    torch = config.get("use_pytorch", False)
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing {} (torch={}) A={} S={} ===".format(
                alg, torch, action_space, obs_space))
            stub_env = make_stub_env(action_space, obs_space, check_bounds)
            register_env("stub_env", lambda c: stub_env())
            stat = "ok"
            a = None
            try:
                if a_name in covered_a and o_name in covered_o:
                    stat = "skip"  # speed up tests by avoiding full grid
                else:
                    a = get_agent_class(alg)(config=config, env="stub_env")
                    if alg not in ["DDPG", "ES", "ARS", "SAC"]:
                        if o_name in ["atari", "image"]:
                            if torch:
                                assert isinstance(a.get_policy().model,
                                                  TorchVisionNetV2)
                            else:
                                assert isinstance(a.get_policy().model,
                                                  VisionNetV2)
                        elif o_name in ["vector", "vector2"]:
                            if torch:
                                assert isinstance(a.get_policy().model,
                                                  TorchFCNetV2)
                            else:
                                assert isinstance(a.get_policy().model,
                                                  FCNetV2)
                    a.train()
                    covered_a.add(a_name)
                    covered_o.add(o_name)
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
                first_error = first_error if first_error is not None else e
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[name or alg, a_name, o_name] = stat

    # If anything happened, raise error.
    if first_error is not None:
        raise first_error
Example #30
0
def run_heuristic_vs_learned(args, use_lstm=False, trainer="PG"):
    """Run heuristic policies vs a learned agent.

    The learned agent should eventually reach a reward of ~5 with
    use_lstm=False, and ~7 with use_lstm=True. The reason the LSTM policy
    can perform better is since it can distinguish between the always_same vs
    beat_last heuristics.
    """
    def select_policy(agent_id):
        if agent_id == "player1":
            return "learned"
        else:
            return random.choice(["always_same", "beat_last"])

    config = {
        "env": RockPaperScissors,
        "gamma": 0.9,
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 0,
        "num_envs_per_worker": 4,
        "rollout_fragment_length": 10,
        "train_batch_size": 200,
        "multiagent": {
            "policies_to_train": ["learned"],
            "policies": {
                "always_same":
                (AlwaysSameHeuristic, Discrete(3), Discrete(3), {}),
                "beat_last": (BeatLastHeuristic, Discrete(3), Discrete(3), {}),
                "learned": (None, Discrete(3), Discrete(3), {
                    "model": {
                        "use_lstm": use_lstm
                    },
                    "framework": "torch" if args.torch else "tf",
                }),
            },
            "policy_mapping_fn": select_policy,
        },
        "framework": "torch" if args.torch else "tf",
    }
    cls = get_agent_class(trainer) if isinstance(trainer, str) else trainer
    trainer_obj = cls(config=config)
    env = trainer_obj.workers.local_worker().env
    for _ in range(args.stop_iters):
        results = trainer_obj.train()
        print(results)
        # Timesteps reached.
        if results["timesteps_total"] > args.stop_timesteps:
            break
        # Reward (difference) reached -> all good, return.
        elif env.player1_score - env.player2_score > args.stop_reward:
            return

    # Reward (difference) not reached: Error if `as_test`.
    if args.as_test:
        raise ValueError(
            "Desired reward difference ({}) not reached! Only got to {}.".
            format(args.stop_reward, env.player1_score - env.player2_score))
def run(args, parser, env_config={}):

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    config = args.config
    config["monitor"] = False
    config["num_workers"] = 1
    config["num_gpus"] = 0
    env_config = config["env_config"]

    from gameserver_env import GameServerEnv

    env = GameServerEnv(env_config)

    if ray.__version__ >= "0.6.5":
        from ray.rllib.agents.registry import get_agent_class
    else:
        from ray.rllib.agents.agent import get_agent_class

    cls = get_agent_class(args.algorithm)
    agent = cls(env=GameServerEnv, config=config)
    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    env = wrappers.Monitor(env,
                           OUTPUT_DIR,
                           force=True,
                           video_callable=lambda episode_id: True)
    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))
 def create_tf_serving_model(self, algorithm=None, env_string=None, config=None):
     self.register_env_creator()
     cls = get_agent_class(algorithm)
     config["monitor"] = False
     config["num_workers"] = 1
     config["num_gpus"] = 0
     agent = cls(env=env_string, config=config)
     checkpoint = os.path.join(MODEL_OUTPUT_DIR, "checkpoint")
     agent.restore(checkpoint)
     export_tf_serving(agent, MODEL_OUTPUT_DIR)
Example #33
0
def train_and_export(algo_name, num_steps, model_dir, ckpt_dir, prefix):
    cls = get_agent_class(algo_name)
    alg = cls(config={}, env="CartPole-v0")
    for _ in range(num_steps):
        alg.train()

    # Export tensorflow checkpoint for fine-tuning
    alg.export_policy_checkpoint(ckpt_dir, filename_prefix=prefix)
    # Export tensorflow SavedModel for online serving
    alg.export_policy_model(model_dir)
Example #34
0
def train_and_export(algo_name, num_steps, model_dir, ckpt_dir, prefix):
    cls = get_agent_class(algo_name)
    alg = cls(config={}, env="CartPole-v0")
    for _ in range(num_steps):
        alg.train()

    # Export tensorflow checkpoint for fine-tuning
    alg.export_policy_checkpoint(ckpt_dir, filename_prefix=prefix)
    # Export tensorflow SavedModel for online serving
    alg.export_policy_model(model_dir)
Example #35
0
def run(args, parser):
    def create_environment(env_config={}):
        return RoboschoolReacher()

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    register_env(args.env, create_environment)

    if ray.__version__ >= "0.6.5":
        from ray.rllib.agents.registry import get_agent_class
    else:
        from ray.rllib.agents.agent import get_agent_class

    cls = get_agent_class(args.algorithm)
    config = args.config
    config["monitor"] = False
    config["num_workers"] = 1
    config["num_gpus"] = 0
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    env = RoboschoolReacher()
    all_rewards = []
    max_steps = 100  # set a max_steps as stopping condition as this env does not return done=True

    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while steps < max_steps:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))
    def _doTestFaultFatal(self, alg, config):
        register_env("fault_env", lambda c: FaultInjectEnv(c))
        agent_cls = get_agent_class(alg)

        # Test raises real error when out of workers
        config["num_workers"] = 2
        config["ignore_worker_failures"] = True
        config["env_config"] = {"bad_indices": [1, 2]}
        a = agent_cls(config=config, env="fault_env")
        self.assertRaises(Exception, lambda: a.train())
        a.stop()
def test_export(algo_name, failures):
    def valid_tf_model(model_dir):
        return os.path.exists(os.path.join(model_dir, "saved_model.pb")) \
            and os.listdir(os.path.join(model_dir, "variables"))

    def valid_tf_checkpoint(checkpoint_dir):
        return os.path.exists(os.path.join(checkpoint_dir, "model.meta")) \
            and os.path.exists(os.path.join(checkpoint_dir, "model.index")) \
            and os.path.exists(os.path.join(checkpoint_dir, "checkpoint"))

    cls = get_agent_class(algo_name)
    if "DDPG" in algo_name:
        algo = cls(config=CONFIGS[name], env="Pendulum-v0")
    else:
        algo = cls(config=CONFIGS[name], env="CartPole-v0")

    for _ in range(3):
        res = algo.train()
        print("current status: " + str(res))

    export_dir = "/tmp/export_dir_%s" % algo_name
    print("Exporting model ", algo_name, export_dir)
    algo.export_policy_model(export_dir)
    if not valid_tf_model(export_dir):
        failures.append(algo_name)
    shutil.rmtree(export_dir)

    print("Exporting checkpoint", algo_name, export_dir)
    algo.export_policy_checkpoint(export_dir)
    if not valid_tf_checkpoint(export_dir):
        failures.append(algo_name)
    shutil.rmtree(export_dir)

    print("Exporting default policy", algo_name, export_dir)
    algo.export_model([ExportFormat.CHECKPOINT, ExportFormat.MODEL],
                      export_dir)
    if not valid_tf_model(os.path.join(export_dir, ExportFormat.MODEL)) \
            or not valid_tf_checkpoint(os.path.join(export_dir,
                                                    ExportFormat.CHECKPOINT)):
        failures.append(algo_name)
    shutil.rmtree(export_dir)