def _init(self): env = self.env_creator(self.config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy( self.sess, env.action_space, env.observation_space, preprocessor, self.config["observation_filter"], self.config["model"]) self.optimizer = optimizers.SGD(self.policy, self.config["sgd_stepsize"]) self.rollouts_used = self.config["rollouts_used"] self.num_rollouts = self.config["num_rollouts"] self.report_length = self.config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(self.config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. logger.info("Creating actors.") self.workers = [ Worker.remote(self.config, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()
def __init__(self, config, policy_params, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) from ray.rllib import models self.preprocessor = models.ModelCatalog.get_preprocessor(self.env) self.sess = utils.make_session(single_threaded=True) if config["policy_type"] == "LinearPolicy": self.policy = policies.LinearPolicy( self.sess, self.env.action_space, self.preprocessor, config["observation_filter"], **policy_params) else: self.policy = policies.MLPPolicy( self.sess, self.env.action_space, self.preprocessor, config["observation_filter"], config["fcnet_hiddens"], **policy_params)
def _init(self, config, env_creator): env = env_creator(config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(self.sess, env.action_space, env.observation_space, preprocessor, config["observation_filter"], config["model"]) self.optimizer = optimizers.SGD(self.policy, config["sgd_stepsize"]) self.rollouts_used = config["rollouts_used"] self.num_rollouts = config["num_rollouts"] self.report_length = config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. logger.info("Creating actors.") self.workers = [ Worker.remote(config, env_creator, noise_id) for _ in range(config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()
def _init(self, config, env_creator): # PyTorch check. config["use_pytorch"] = False if config["use_pytorch"]: raise ValueError( "ARS does not support PyTorch yet! Use tf instead." ) env = env_creator(config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy( self.sess, env.action_space, env.observation_space, preprocessor, config["observation_filter"], config["model"]) self.optimizer = optimizers.SGD(self.policy, config["sgd_stepsize"]) self.rollouts_used = config["rollouts_used"] self.num_rollouts = config["num_rollouts"] self.report_length = config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Getting the configurations of the random environments self.extra_config = config["env_config"]["extra_trainer_configs"] self.domain_randomization_config = self.extra_config["domain_randomization"] self.domain_randomization_flag = False if(self.extra_config is not None): if("domain_randomization" in self.extra_config.keys()): domain_randomization = self.extra_config["domain_randomization"] self.domain_randomization_flag = True if("angle" in domain_randomization.keys()): self.min_random_angles = [0,0] self.max_random_angles = [0,0] for i,flag in enumerate(domain_randomization["angle"][0]): if(flag == True): self.min_random_angles[i] = domain_randomization["angle"][1] self.max_random_angles[i] = domain_randomization["angle"][2] # Create the actors. # TODO: Change Worker and add the config logger.info("Creating actors.") self.workers = [ Worker.remote(config, env_creator, noise_id) for _ in range(config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()
def __init__(self, config, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) from ray.rllib import models self.preprocessor = models.ModelCatalog.get_preprocessor(self.env) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy( self.sess, self.env.action_space, self.env.observation_space, self.preprocessor, config["observation_filter"], config["model"])
def __init__(self, config, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) from ray.rllib import models self.preprocessor = models.ModelCatalog.get_preprocessor(self.env) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy( self.sess, self.env.action_space, self.env.observation_space, self.preprocessor, config["observation_filter"], config["model"])
def _init(self): policy_params = {"action_noise_std": 0.0} # register the linear network utils.register_linear_network() env = self.env_creator(self.config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) if self.config["policy_type"] == "LinearPolicy": self.policy = policies.LinearPolicy( self.sess, env.action_space, preprocessor, self.config["observation_filter"], self.config["model"], **policy_params) else: self.policy = policies.MLPPolicy(self.sess, env.action_space, preprocessor, self.config["observation_filter"], self.config["model"], self.config["fcnet_hiddens"], **policy_params) self.optimizer = optimizers.SGD(self.policy, self.config["sgd_stepsize"]) self.rollouts_used = self.config["rollouts_used"] self.num_rollouts = self.config["num_rollouts"] self.report_length = self.config["report_length"] # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote(self.config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(self.config, policy_params, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.reward_list = [] self.tstart = time.time()