コード例 #1
0
ファイル: loader.py プロジェクト: spMohanty/rl-experiments
def load_preprocessors(local_dir="."):
    """
    This function takes a path to a local directory
    and looks for an `envs` folder, and imports
    all the available files in there.
    """
    for _file_path in glob.glob(
            os.path.join(local_dir, "preprocessors", "*.py")):
        """
        Determine the filename, preprocessor_name and class_name

        # Convention :
            - filename : snake_case
            - classname : PascalCase

            the class implementation, should be an inheritance
            of the rllib class Preprocessor for it to used in rllib
        """
        preprocessor_name, class_name, _class = load_class_from_file(
            _file_path)
        CustomPreprocessor = _class
        # Validate the class
        if not issubclass(CustomPreprocessor, Preprocessor):
            raise Exception(
                "We expected the class named {} to be "
                "a subclass of Preprocessor. "
                "Please read more here : <insert-link>".format(class_name))
        # Finally Register Preprocessor in Tune
        ModelCatalog.register_custom_preprocessor(preprocessor_name,
                                                  CustomPreprocessor)
        print("-    Successfully Loaded custom Preprocessor \
                class {} from {}".format(class_name,
                                         os.path.basename(_file_path)))
コード例 #2
0
def load_preprocessors(CUSTOM_PREPROCESSORS):
    """Function to register custom preprocessors
    """
    from ray.rllib.models import ModelCatalog

    for _precessor_name, _processor_class in CUSTOM_PREPROCESSORS.items():
        ModelCatalog.register_custom_preprocessor(_precessor_name,
                                                  _processor_class)
コード例 #3
0
 def test_custom_preprocessor(self):
     ray.init(object_store_memory=1000 * 1024 * 1024)
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(env)
     self.assertEqual(type(p3), NoPreprocessor)
コード例 #4
0
ファイル: test_catalog.py プロジェクト: robertnishihara/ray
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(env)
     self.assertEqual(type(p3), NoPreprocessor)
コード例 #5
0
ファイル: test_catalog.py プロジェクト: qyccc/rllibddpg
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     self.assertEqual(type(p3), NoPreprocessor)
コード例 #6
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     assert type(p1) == CustomPreprocessor
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     assert type(p2) == CustomPreprocessor2
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     assert type(p3) == NoPreprocessor
コード例 #7
0
def load_agent():

    # Initialize training environment

    ray.init()

    def environment_creater(params=None):
        agent = SimpleAvoidAgent(noise=0.05)
        return TronRaySinglePlayerEnvironment(board_size=13,
                                              num_players=4,
                                              agent=agent)

    env = environment_creater()
    tune.register_env("tron_single_player", environment_creater)
    ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard)

    # Configure Deep Q Learning with reasonable values
    config = DEFAULT_CONFIG.copy()
    config['num_workers'] = 4
    ## config['num_gpus'] = 1
    #config["timesteps_per_iteration"] = 1024
    #config['target_network_update_freq'] = 256
    #config['buffer_size'] = 100_000
    #config['schedule_max_timesteps'] = 200_000
    #config['exploration_fraction'] = 0.02
    #config['compress_observations'] = False
    #config['n_step'] = 2
    #config['seed'] = SEED                                              f

    #Configure for PPO
    #config["sample_batch_size"]= 100
    #config["train_batch_size"]=200
    #config["sgd_minibatch_size"]=60
    #Configure A3C with reasonable values

    # We will use a simple convolution network with 3 layers as our feature extractor
    config['model']['vf_share_layers'] = True
    config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)]
    config['model']['fcnet_hiddens'] = [256]
    config['model']['custom_preprocessor'] = 'tron_prep'

    # Begin training or evaluation
    #trainer = DDPGTrainer(config, "tron_single_player")
    #trainer = A3CTrainer(config, "tron_single_player")
    trainer = DQNTrainer(config, "tron_single_player")
    #trainer = PPOTrainer(config, "tron_single_player")

    trainer.restore("./dqn_checkpoint_3800/checkpoint-3800")

    return trainer  #.get_policy("trainer")
コード例 #8
0


import ray
from ray import tune
from ray.rllib.utils.seed import seed as rllib_seed
import rl_toy
from rl_toy.envs import RLToyEnv
from ray.tune.registry import register_env
register_env("RLToy-v0", lambda config: RLToyEnv(config))



from ray.rllib.models.preprocessors import OneHotPreprocessor
from ray.rllib.models import ModelCatalog
ModelCatalog.register_custom_preprocessor("ohe", OneHotPreprocessor)



#rllib_seed(0, 0, 0) ####IMP Doesn't work due to multi-process I think; so use config["seed"]
# np.random.seed(0)
# import random
# random.seed(0)
# import tensorflow as tf
# tf.set_random_seed(0)
ray.init(local_mode=True)#, object_id_seed=0)


# Old config space
# algorithms = ["DQN"]
# state_space_sizes = [2**i for i in range(4,6)]
コード例 #9
0
    def _init_shape(self, obs_space, options):
        return (2,13)  # can vary depending on inputs

    def transform(self, observation):
        player_obs = observation[0].reshape((4,13))
        player_obs = np.sum(player_obs,axis = 0).reshape((1,13))
        #print(player_obs.shape)
        dealer_obs = np.zeros((1,13))
        dealer_obs[0][observation[1]%13] = 1
        #print(dealer_obs.shape)

        #print(dealer_obs.shape)
        new_obs = np.concatenate([player_obs,dealer_obs],axis = 0)
        return new_obs# return the preprocessed observation

ModelCatalog.register_custom_preprocessor("my_prep", MyPreprocessorClass)
ModelCatalog.register_custom_preprocessor("my_prep2", MyPreprocessorClass2)

#train function
def train_zero(config, reporter):
    agent = SACTrainer(config)
    #agent.restore("/home/yunke/ray_results/AlphaZero_BlackjackEnv_zero_2020-05-01_22-50-303ae70oaq/checkpoint_1981/checkpoint-1981") #continue training
    #training curriculum, start with phase 0

    episodes = 0
    i = 0
    while True:
        result = agent.train()
        if reporter is None:
            continue
        else:
コード例 #10
0
            else:
                zString = "_".join(key_list)

                if isinstance(v, list):
                    v_count = len(d[k])
                    for j in range(v_count):
                        tempString = zString + "_" + str(j)
                        ret_dict[tempString] = v[j]
                else:
                    ret_dict[zString] = v

            key_list.pop()
        return ret_dict


ModelCatalog.register_custom_preprocessor("osim_prep", OsimPreprocessor)

# class OsimEnv(gym.Env):
#     def __init__(self,env_config):
#         self.accuracy_setting = 1e-1
#         self.nstep_hold = 4
#         self.env = ProstheticsEnv(visualize=False,integrator_accuracy=self.accuracy_setting)
#
#     # def step(self,action):
#     #     for j in range(self.nstep_hold):
#     #         obs, reward, done, info = env.step(action, project=False)
#     #         if done:
#     #             break

#register_env(env_name, lambda c: OsimEnv(c))
self.accuracy_setting = 1e-1
コード例 #11
0
ファイル: utils.py プロジェクト: valldabo2/orderbookrl

def env_creator_marketorderenvbuysell(env_config):
    if not 'data_path' in env_config:
        path = get_default_path()
    else:
        path = env_config['data_path']
    env = MarketOrderEnvBuySell(order_paths=path + 'feather/',
                  snapshot_paths=path + 'snap_json/', **env_config)
    return env  # or return your own custom env


env_creator_name = "MarketOrderEnvBuySell-v0"
register_env(env_creator_name, env_creator_marketorderenvbuysell)

ModelCatalog.register_custom_preprocessor('mv', MarketVariables)

ModelCatalog.register_custom_preprocessor('mv_l', MarketVariablesSingleL)

ModelCatalog.register_custom_preprocessor('mv_pred', PredictiveMarketVariables)

ModelCatalog.register_custom_preprocessor('zeros', Zeros)

#register_trainable('PPOADV', PPOAdv)
#
#register_trainable('PPOCUMRET', PPOCumRet)


def get_env(env_id, env_config):
    if env_id == "DistEnv-v0":
        return env_creator_distenv(env_config)
コード例 #12
0
# Initialize training environment
ray.shutdown()
ray.init()

def environment_creater(params=None):
    agent = SimpleAvoidAgent(noise=0.05)
    #agent = DQNTrainer(DEFAULT_CONFIG,"tron_single_player")
    #agent.load("./dqn_model_v2/checkpoint_6700/checkpoint-6700")
    return TronRaySinglePlayerEnvironment(board_size=13, num_players=4, agent=agent)

env = environment_creater()
print("***************************************************************************************************************************************************************************")
tune.register_env("tron_single_player", environment_creater)

ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard)

# Configure Deep Q Learning with reasonable values
config = DEFAULT_CONFIG.copy()
#config['num_workers'] = 4
# config['num_gpus'] = 1
#config["timesteps_per_iteration"] = 1024
#config['lambda'] = .7
#config['target_network_update_freq'] = 256
#config['buffer_size'] = 100_000
#config['schedule_max_timesteps'] = 200_000
#config['exploration_fraction'] = 0.4
#config['compress_observations'] = False
#config['n_step'] = 3
#config['seed'] = SEED
コード例 #13
0
    "--redis-address", default=None, type=str,
    help="The Redis address of the cluster.")
parser.add_argument(
    "--num-cpus", default=2, type=int,
    help="Number of CPUs to allocate to Ray.")
parser.add_argument(
    "--num-gpus", default=1, type=int,
    help="Number of GPUs to allocate to Ray.")
parser.add_argument(
    "--experiment-name", default="default", type=str,
    help="Name of the subdirectory under `local_dir` to put results in.")
parser.add_argument(
    "--env", default=None, type=str, help="The gym environment to use.")


ModelCatalog.register_custom_preprocessor("sc_prep", StarCraftPreprocessor)
register_env("sc2", lambda config: StarCraft(config))
register_trainable("SC_A3C", A3CAgent)


if __name__ == "__main__":
    args = parser.parse_args(sys.argv[1:])

    experiments = {
                'experiment_name': {
                    "run" : 'SC_A3C',
                    "env" : 'sc2',
                    "trial_resources" : resources_to_json(args.trial_resources),
                    "config": dict(args.config, env=args.env),
                }
            }
コード例 #14
0
        self.check_shape(observation)
        array = np.zeros(self.shape)
        self.write(observation, array, 0)
        return array

    @override(Preprocessor)
    def write(self, observation, array, offset):
        if not isinstance(observation, OrderedDict):
            observation = OrderedDict(sorted(list(observation.items())))
        assert len(observation) == len(self.preprocessors), \
            (len(observation), len(self.preprocessors))
        for o, p in zip(observation.values(), self.preprocessors):
            p.write(o, array, offset)
            offset += p.size

    @property
    @override(Preprocessor)
    def observation_space(self):
        obs_space = gym.spaces.Box(np.finfo(np.float32).min,
                                   np.finfo(np.float32).max,
                                   self.shape,
                                   dtype=np.float32)
        # Stash the unwrapped space so that we can unwrap dict spaces
        # automatically in models
        obs_space.original_space = self._obs_space
        return obs_space


ModelCatalog.register_custom_preprocessor(STRATEGO_PREPROCESSOR,
                                          StrategoDictFlatteningPreprocessor)
コード例 #15
0
def load_agent():

    # Initialize training environment

    ray.init()

    def environment_creater(params=None):
        agent = SimpleAvoidAgent(noise=0.05)
        return TronRayEnvironment(board_size=13, num_players=4)

    env = environment_creater()
    tune.register_env("tron_multi_player", environment_creater)
    ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard)

    # Configure Deep Q Learning with reasonable values
    config = DEFAULT_CONFIG.copy()
    config['num_workers'] = 4
    ## config['num_gpus'] = 1
    #config["timesteps_per_iteration"] = 1024
    #config['target_network_update_freq'] = 256
    #config['buffer_size'] = 100_000
    #config['schedule_max_timesteps'] = 200_000
    #config['exploration_fraction'] = 0.02
    #config['compress_observations'] = False
    #config['n_step'] = 2
    #config['seed'] = SEED

    #Configure for PPO
    #config["sample_batch_size"]= 100
    #config["train_batch_size"]=200
    #config["sgd_minibatch_size"]=60
    #Configure A3C with reasonable values

    # We will use a simple convolution network with 3 layers as our feature extractor
    config['model']['vf_share_layers'] = True
    config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)]
    config['model']['fcnet_hiddens'] = [256]
    config['model']['custom_preprocessor'] = 'tron_prep'

    # All of the models will use the same network as before
    agent_config = {
        "model": {
            "vf_share_layers": True,
            "conv_filters": [(512, 5, 1), (256, 3, 2), (128, 3, 2)],
            "fcnet_hiddens": [256],
            "custom_preprocessor": 'tron_prep'
        }
    }

    def policy_mapping_function(x):
        if x == '0':
            return "trainer"
        return "opponent"

    config['multiagent'] = {
        "policy_mapping_fn": policy_mapping_function,
        "policies": {
            "trainer":
            (None, env.observation_space, env.action_space, agent_config),
            "opponent":
            (None, env.observation_space, env.action_space, agent_config)
        },
        "policies_to_train": ["trainer"]
    }

    # Begin training or evaluation
    #trainer = DDPGTrainer(config, "tron_single_player")
    #trainer = A3CTrainer(config, "tron_single_player")
    #trainer = MARWILTrainer(config, "tron_single_player")
    trainer = PPOTrainer(config, "tron_multi_player")

    trainer.restore("./sp_checkpoint_1802/checkpoint-1802")

    return trainer.get_policy("trainer")
コード例 #16
0
from ray.rllib.agents.pg import PGAgent
from ray.rllib.agents.pg.pg_policy_graph import PGPolicyGraph


def shutdown():
    print('Shutting down Ray...')
    ray.shutdown()


atexit.register(shutdown)

if __name__ == '__main__':
    # register environment
    register_env('Pomme_v0', lambda config: Pomme_v0(config))
    # register preprocessor
    ModelCatalog.register_custom_preprocessor('Featurize_Preprocessor',
                                              Featurize_Preprocessor)

    ray.init()

    # get env config and create dummy instance to retrieve observation & action space
    env_config = team_competition_env()
    p = Pomme_v0(env_config)
    obs_space = p.pomme.observation_space
    act_space = p.pomme.action_space
    p.pomme.close()

    # initialize trainer - since all agent's use the same policy graph one trainer is fine here
    # otherwise we'd need one trainer per policy graph used for training
    trainer = PGAgent(env='Pomme_v0',
                      config={
                          'multiagent': {
コード例 #17
0

register_env(env_name, lambda config: env_creator(config))


# Placeholder to enable use of a custom pre-processor
class ImagePreproc(Preprocessor):
    def _init_shape(self, obs_space, options):
        shape = (84, 84, 3)  # Adjust third dim if stacking frames
        return shape

    def transform(self, observation):
        return observation


ModelCatalog.register_custom_preprocessor("sq_im_84", ImagePreproc)

if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    obs_space = Box(0.0, 255.0, shape=(84, 84, 3))
    act_space = Discrete(9)

    def gen_policy():
        config = {
            # Model and preprocessor options.
            "model": {
                "custom_model": model_name,
                "custom_options": {
                    # Custom notes for the experiment
コード例 #18
0
env_name = "ple_env"

#screen dimensions, keep consistent for PLE env and ray
screen_wh = 80


class PLEPreprocessor(Preprocessor):
    def _init(self):
        self.shape = self._obs_space.shape  #can vary this based on options

    def transform(self, observation):
        observation = observation / 255.0
        return observation


ModelCatalog.register_custom_preprocessor("ple_prep", PLEPreprocessor)


class PLEEnv(gym.Env):
    def __init__(self, env_config):
        game = Catcher(width=screen_wh, height=screen_wh)

        fps = 30  # fps we want to run at
        frame_skip = 2
        num_steps = 2
        force_fps = True  # False for slower speed
        display_screen = False
        # make a PLE instance.
        self.env = PLE(game,
                       fps=fps,
                       frame_skip=frame_skip,