def load_preprocessors(local_dir="."): """ This function takes a path to a local directory and looks for an `envs` folder, and imports all the available files in there. """ for _file_path in glob.glob( os.path.join(local_dir, "preprocessors", "*.py")): """ Determine the filename, preprocessor_name and class_name # Convention : - filename : snake_case - classname : PascalCase the class implementation, should be an inheritance of the rllib class Preprocessor for it to used in rllib """ preprocessor_name, class_name, _class = load_class_from_file( _file_path) CustomPreprocessor = _class # Validate the class if not issubclass(CustomPreprocessor, Preprocessor): raise Exception( "We expected the class named {} to be " "a subclass of Preprocessor. " "Please read more here : <insert-link>".format(class_name)) # Finally Register Preprocessor in Tune ModelCatalog.register_custom_preprocessor(preprocessor_name, CustomPreprocessor) print("- Successfully Loaded custom Preprocessor \ class {} from {}".format(class_name, os.path.basename(_file_path)))
def load_preprocessors(CUSTOM_PREPROCESSORS): """Function to register custom preprocessors """ from ray.rllib.models import ModelCatalog for _precessor_name, _processor_class in CUSTOM_PREPROCESSORS.items(): ModelCatalog.register_custom_preprocessor(_precessor_name, _processor_class)
def test_custom_preprocessor(self): ray.init(object_store_memory=1000 * 1024 * 1024) ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(get_registry(), env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "foo"}) assert type(p1) == CustomPreprocessor p2 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "bar"}) assert type(p2) == CustomPreprocessor2 p3 = ModelCatalog.get_preprocessor(get_registry(), env) assert type(p3) == NoPreprocessor
def load_agent(): # Initialize training environment ray.init() def environment_creater(params=None): agent = SimpleAvoidAgent(noise=0.05) return TronRaySinglePlayerEnvironment(board_size=13, num_players=4, agent=agent) env = environment_creater() tune.register_env("tron_single_player", environment_creater) ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard) # Configure Deep Q Learning with reasonable values config = DEFAULT_CONFIG.copy() config['num_workers'] = 4 ## config['num_gpus'] = 1 #config["timesteps_per_iteration"] = 1024 #config['target_network_update_freq'] = 256 #config['buffer_size'] = 100_000 #config['schedule_max_timesteps'] = 200_000 #config['exploration_fraction'] = 0.02 #config['compress_observations'] = False #config['n_step'] = 2 #config['seed'] = SEED f #Configure for PPO #config["sample_batch_size"]= 100 #config["train_batch_size"]=200 #config["sgd_minibatch_size"]=60 #Configure A3C with reasonable values # We will use a simple convolution network with 3 layers as our feature extractor config['model']['vf_share_layers'] = True config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)] config['model']['fcnet_hiddens'] = [256] config['model']['custom_preprocessor'] = 'tron_prep' # Begin training or evaluation #trainer = DDPGTrainer(config, "tron_single_player") #trainer = A3CTrainer(config, "tron_single_player") trainer = DQNTrainer(config, "tron_single_player") #trainer = PPOTrainer(config, "tron_single_player") trainer.restore("./dqn_checkpoint_3800/checkpoint-3800") return trainer #.get_policy("trainer")
import ray from ray import tune from ray.rllib.utils.seed import seed as rllib_seed import rl_toy from rl_toy.envs import RLToyEnv from ray.tune.registry import register_env register_env("RLToy-v0", lambda config: RLToyEnv(config)) from ray.rllib.models.preprocessors import OneHotPreprocessor from ray.rllib.models import ModelCatalog ModelCatalog.register_custom_preprocessor("ohe", OneHotPreprocessor) #rllib_seed(0, 0, 0) ####IMP Doesn't work due to multi-process I think; so use config["seed"] # np.random.seed(0) # import random # random.seed(0) # import tensorflow as tf # tf.set_random_seed(0) ray.init(local_mode=True)#, object_id_seed=0) # Old config space # algorithms = ["DQN"] # state_space_sizes = [2**i for i in range(4,6)]
def _init_shape(self, obs_space, options): return (2,13) # can vary depending on inputs def transform(self, observation): player_obs = observation[0].reshape((4,13)) player_obs = np.sum(player_obs,axis = 0).reshape((1,13)) #print(player_obs.shape) dealer_obs = np.zeros((1,13)) dealer_obs[0][observation[1]%13] = 1 #print(dealer_obs.shape) #print(dealer_obs.shape) new_obs = np.concatenate([player_obs,dealer_obs],axis = 0) return new_obs# return the preprocessed observation ModelCatalog.register_custom_preprocessor("my_prep", MyPreprocessorClass) ModelCatalog.register_custom_preprocessor("my_prep2", MyPreprocessorClass2) #train function def train_zero(config, reporter): agent = SACTrainer(config) #agent.restore("/home/yunke/ray_results/AlphaZero_BlackjackEnv_zero_2020-05-01_22-50-303ae70oaq/checkpoint_1981/checkpoint-1981") #continue training #training curriculum, start with phase 0 episodes = 0 i = 0 while True: result = agent.train() if reporter is None: continue else:
else: zString = "_".join(key_list) if isinstance(v, list): v_count = len(d[k]) for j in range(v_count): tempString = zString + "_" + str(j) ret_dict[tempString] = v[j] else: ret_dict[zString] = v key_list.pop() return ret_dict ModelCatalog.register_custom_preprocessor("osim_prep", OsimPreprocessor) # class OsimEnv(gym.Env): # def __init__(self,env_config): # self.accuracy_setting = 1e-1 # self.nstep_hold = 4 # self.env = ProstheticsEnv(visualize=False,integrator_accuracy=self.accuracy_setting) # # # def step(self,action): # # for j in range(self.nstep_hold): # # obs, reward, done, info = env.step(action, project=False) # # if done: # # break #register_env(env_name, lambda c: OsimEnv(c)) self.accuracy_setting = 1e-1
def env_creator_marketorderenvbuysell(env_config): if not 'data_path' in env_config: path = get_default_path() else: path = env_config['data_path'] env = MarketOrderEnvBuySell(order_paths=path + 'feather/', snapshot_paths=path + 'snap_json/', **env_config) return env # or return your own custom env env_creator_name = "MarketOrderEnvBuySell-v0" register_env(env_creator_name, env_creator_marketorderenvbuysell) ModelCatalog.register_custom_preprocessor('mv', MarketVariables) ModelCatalog.register_custom_preprocessor('mv_l', MarketVariablesSingleL) ModelCatalog.register_custom_preprocessor('mv_pred', PredictiveMarketVariables) ModelCatalog.register_custom_preprocessor('zeros', Zeros) #register_trainable('PPOADV', PPOAdv) # #register_trainable('PPOCUMRET', PPOCumRet) def get_env(env_id, env_config): if env_id == "DistEnv-v0": return env_creator_distenv(env_config)
# Initialize training environment ray.shutdown() ray.init() def environment_creater(params=None): agent = SimpleAvoidAgent(noise=0.05) #agent = DQNTrainer(DEFAULT_CONFIG,"tron_single_player") #agent.load("./dqn_model_v2/checkpoint_6700/checkpoint-6700") return TronRaySinglePlayerEnvironment(board_size=13, num_players=4, agent=agent) env = environment_creater() print("***************************************************************************************************************************************************************************") tune.register_env("tron_single_player", environment_creater) ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard) # Configure Deep Q Learning with reasonable values config = DEFAULT_CONFIG.copy() #config['num_workers'] = 4 # config['num_gpus'] = 1 #config["timesteps_per_iteration"] = 1024 #config['lambda'] = .7 #config['target_network_update_freq'] = 256 #config['buffer_size'] = 100_000 #config['schedule_max_timesteps'] = 200_000 #config['exploration_fraction'] = 0.4 #config['compress_observations'] = False #config['n_step'] = 3 #config['seed'] = SEED
"--redis-address", default=None, type=str, help="The Redis address of the cluster.") parser.add_argument( "--num-cpus", default=2, type=int, help="Number of CPUs to allocate to Ray.") parser.add_argument( "--num-gpus", default=1, type=int, help="Number of GPUs to allocate to Ray.") parser.add_argument( "--experiment-name", default="default", type=str, help="Name of the subdirectory under `local_dir` to put results in.") parser.add_argument( "--env", default=None, type=str, help="The gym environment to use.") ModelCatalog.register_custom_preprocessor("sc_prep", StarCraftPreprocessor) register_env("sc2", lambda config: StarCraft(config)) register_trainable("SC_A3C", A3CAgent) if __name__ == "__main__": args = parser.parse_args(sys.argv[1:]) experiments = { 'experiment_name': { "run" : 'SC_A3C', "env" : 'sc2', "trial_resources" : resources_to_json(args.trial_resources), "config": dict(args.config, env=args.env), } }
self.check_shape(observation) array = np.zeros(self.shape) self.write(observation, array, 0) return array @override(Preprocessor) def write(self, observation, array, offset): if not isinstance(observation, OrderedDict): observation = OrderedDict(sorted(list(observation.items()))) assert len(observation) == len(self.preprocessors), \ (len(observation), len(self.preprocessors)) for o, p in zip(observation.values(), self.preprocessors): p.write(o, array, offset) offset += p.size @property @override(Preprocessor) def observation_space(self): obs_space = gym.spaces.Box(np.finfo(np.float32).min, np.finfo(np.float32).max, self.shape, dtype=np.float32) # Stash the unwrapped space so that we can unwrap dict spaces # automatically in models obs_space.original_space = self._obs_space return obs_space ModelCatalog.register_custom_preprocessor(STRATEGO_PREPROCESSOR, StrategoDictFlatteningPreprocessor)
def load_agent(): # Initialize training environment ray.init() def environment_creater(params=None): agent = SimpleAvoidAgent(noise=0.05) return TronRayEnvironment(board_size=13, num_players=4) env = environment_creater() tune.register_env("tron_multi_player", environment_creater) ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard) # Configure Deep Q Learning with reasonable values config = DEFAULT_CONFIG.copy() config['num_workers'] = 4 ## config['num_gpus'] = 1 #config["timesteps_per_iteration"] = 1024 #config['target_network_update_freq'] = 256 #config['buffer_size'] = 100_000 #config['schedule_max_timesteps'] = 200_000 #config['exploration_fraction'] = 0.02 #config['compress_observations'] = False #config['n_step'] = 2 #config['seed'] = SEED #Configure for PPO #config["sample_batch_size"]= 100 #config["train_batch_size"]=200 #config["sgd_minibatch_size"]=60 #Configure A3C with reasonable values # We will use a simple convolution network with 3 layers as our feature extractor config['model']['vf_share_layers'] = True config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)] config['model']['fcnet_hiddens'] = [256] config['model']['custom_preprocessor'] = 'tron_prep' # All of the models will use the same network as before agent_config = { "model": { "vf_share_layers": True, "conv_filters": [(512, 5, 1), (256, 3, 2), (128, 3, 2)], "fcnet_hiddens": [256], "custom_preprocessor": 'tron_prep' } } def policy_mapping_function(x): if x == '0': return "trainer" return "opponent" config['multiagent'] = { "policy_mapping_fn": policy_mapping_function, "policies": { "trainer": (None, env.observation_space, env.action_space, agent_config), "opponent": (None, env.observation_space, env.action_space, agent_config) }, "policies_to_train": ["trainer"] } # Begin training or evaluation #trainer = DDPGTrainer(config, "tron_single_player") #trainer = A3CTrainer(config, "tron_single_player") #trainer = MARWILTrainer(config, "tron_single_player") trainer = PPOTrainer(config, "tron_multi_player") trainer.restore("./sp_checkpoint_1802/checkpoint-1802") return trainer.get_policy("trainer")
from ray.rllib.agents.pg import PGAgent from ray.rllib.agents.pg.pg_policy_graph import PGPolicyGraph def shutdown(): print('Shutting down Ray...') ray.shutdown() atexit.register(shutdown) if __name__ == '__main__': # register environment register_env('Pomme_v0', lambda config: Pomme_v0(config)) # register preprocessor ModelCatalog.register_custom_preprocessor('Featurize_Preprocessor', Featurize_Preprocessor) ray.init() # get env config and create dummy instance to retrieve observation & action space env_config = team_competition_env() p = Pomme_v0(env_config) obs_space = p.pomme.observation_space act_space = p.pomme.action_space p.pomme.close() # initialize trainer - since all agent's use the same policy graph one trainer is fine here # otherwise we'd need one trainer per policy graph used for training trainer = PGAgent(env='Pomme_v0', config={ 'multiagent': {
register_env(env_name, lambda config: env_creator(config)) # Placeholder to enable use of a custom pre-processor class ImagePreproc(Preprocessor): def _init_shape(self, obs_space, options): shape = (84, 84, 3) # Adjust third dim if stacking frames return shape def transform(self, observation): return observation ModelCatalog.register_custom_preprocessor("sq_im_84", ImagePreproc) if __name__ == "__main__": args = parser.parse_args() ray.init() obs_space = Box(0.0, 255.0, shape=(84, 84, 3)) act_space = Discrete(9) def gen_policy(): config = { # Model and preprocessor options. "model": { "custom_model": model_name, "custom_options": { # Custom notes for the experiment
env_name = "ple_env" #screen dimensions, keep consistent for PLE env and ray screen_wh = 80 class PLEPreprocessor(Preprocessor): def _init(self): self.shape = self._obs_space.shape #can vary this based on options def transform(self, observation): observation = observation / 255.0 return observation ModelCatalog.register_custom_preprocessor("ple_prep", PLEPreprocessor) class PLEEnv(gym.Env): def __init__(self, env_config): game = Catcher(width=screen_wh, height=screen_wh) fps = 30 # fps we want to run at frame_skip = 2 num_steps = 2 force_fps = True # False for slower speed display_screen = False # make a PLE instance. self.env = PLE(game, fps=fps, frame_skip=frame_skip,