def create_env(env, seed): if env.startswith('rllab.'): # Get env name and class env_name = re.match('rllab.(\S+)', env).group(1) env_rllab_class = rllab_env_from_name(env_name) # Define env maker def make_env(): env_rllab = env_rllab_class() _env = Rllab2GymWrapper(env_rllab) return _env # Used later env_type = 'rllab' else: # Normal gym, get if Atari or not. env_type = get_env_type(env) assert env_type is not None, "Env not recognized." # Define the correct env maker if env_type == 'atari': # Atari, custom env creation def make_env(): _env = make_atari(env) return wrap_deepmind(_env) else: # Not atari, standard env creation def make_env(): env_rllab = gym.make(env) return env_rllab env = make_env() env.seed(seed) return env
def train(env, policy, seed, njobs=1, **alg_args): if env.startswith('rllab.'): # Get env name and class env_name = re.match('rllab.(\w+)', env).group(1) env_rllab_class = rllab_env_from_name(env_name) # Define env maker def make_env(seed=0): def _thunk(): env_rllab = Rllab2GymWrapper(env_rllab_class()) env_rllab.seed(seed) return env_rllab return _thunk parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)]) # Used later env_type = 'rllab' else: # Normal gym, get if Atari or not. env_type = get_env_type(env) assert env_type is not None, "Env not recognized." # Define the correct env maker if env_type == 'atari': # Atari, custom env creation def make_env(seed=0): def _thunk(): _env = make_atari(env) _env.seed(seed) return wrap_deepmind(_env) return _thunk parallel_env = VecFrameStack(SubprocVecEnv([make_env(i + seed) for i in range(njobs)]), 4) else: # Not atari, standard env creation def make_env(seed=0): def _thunk(): _env = gym.make(env) _env.seed(seed) return _env return _thunk parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)]) # Create the policy if policy == 'linear': hid_size = num_hid_layers = 0 elif policy == 'nn': hid_size = [100, 50, 25] num_hid_layers = 3 if policy == 'linear' or policy == 'nn': def make_policy(name, ob_space, ac_space): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=False, use_critic=False, hidden_W_init=tf.contrib.layers.xavier_initializer(), output_W_init=tf.contrib.layers.xavier_initializer()) elif policy == 'cnn': def make_policy(name, ob_space, ac_space): return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, gaussian_fixed_var=True, use_bias=False, use_critic=False, hidden_W_init=tf.contrib.layers.xavier_initializer(), output_W_init=tf.contrib.layers.xavier_initializer()) else: raise Exception('Unrecognized policy type.') try: affinity = len(os.sched_getaffinity(0)) except: affinity = njobs sess = U.make_session(affinity) sess.__enter__() set_global_seeds(seed) gym.logger.setLevel(logging.WARN) pois2.learn(parallel_env, make_policy, **alg_args)
def train(env, policy, policy_init, n_episodes, horizon, seed, njobs=1, save_weights=False, **alg_args): if env.startswith('rllab.'): # Get env name and class env_name = re.match('rllab.(\S+)', env).group(1) env_rllab_class = rllab_env_from_name(env_name) # Define env maker def make_env(): env_rllab = env_rllab_class() _env = Rllab2GymWrapper(env_rllab) return _env # Used later env_type = 'rllab' else: # Normal gym, get if Atari or not. env_type = get_env_type(env) assert env_type is not None, "Env not recognized." # Define the correct env maker if env_type == 'atari': # Atari, custom env creation def make_env(): _env = make_atari(env) return wrap_deepmind(_env) else: # Not atari, standard env creation def make_env(): env_rllab = gym.make(env) return env_rllab if policy == 'linear': hid_size = num_hid_layers = 0 elif policy == 'nn': hid_size = [100, 50, 25] num_hid_layers = 3 if policy_init == 'xavier': policy_initializer = tf.contrib.layers.xavier_initializer() elif policy_init == 'zeros': policy_initializer = U.normc_initializer(0.0) else: raise Exception('Unrecognized policy initializer.') if policy == 'linear' or policy == 'nn': def make_policy(name, ob_space, ac_space): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=False, use_critic=False, hidden_W_init=policy_initializer, output_W_init=policy_initializer) elif policy == 'cnn': def make_policy(name, ob_space, ac_space): return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, gaussian_fixed_var=True, use_bias=False, use_critic=False, hidden_W_init=policy_initializer, output_W_init=policy_initializer) else: raise Exception('Unrecognized policy type.') sampler = ParallelSampler(make_policy, make_env, n_episodes, horizon, True, n_workers=njobs, seed=seed) try: affinity = len(os.sched_getaffinity(0)) except: affinity = njobs sess = U.make_session(affinity) sess.__enter__() set_global_seeds(seed) gym.logger.setLevel(logging.WARN) pois.learn(make_env, make_policy, n_episodes=n_episodes, horizon=horizon, sampler=sampler, save_weights=save_weights, **alg_args) sampler.close()
def create_sampler(env=None, policy='linear', n_episodes=100, horizon=500, njobs=1, seed=42): # Create the environment if env.startswith('rllab.'): # Get env name and class env_name = re.match('rllab.(\S+)', env).group(1) env_rllab_class = rllab_env_from_name(env_name) # Define env maker def make_env(): env_rllab = env_rllab_class() _env = Rllab2GymWrapper(env_rllab) return _env # Used later env_type = 'rllab' else: # Normal gym, get if Atari or not. env_type = get_env_type(env) assert env_type is not None, "Env not recognized." # Define the correct env maker if env_type == 'atari': # Atari, custom env creation def make_env(): _env = make_atari(env) return wrap_deepmind(_env) else: # Not atari, standard env creation def make_env(): env_rllab = gym.make(env) return env_rllab # Select policy architecture if policy == 'linear': hid_size = num_hid_layers = 0 use_bias = False elif policy == 'simple-nn': hid_size = [16] num_hid_layers = 1 use_bias = True elif policy == 'nn': hid_size = [100, 50, 25] num_hid_layers = 3 use_bias = True policy_initializer = U.normc_initializer(0.0) if policy == 'linear' or policy == 'nn' or policy == 'simple-nn': def make_policy(name, ob_space, ac_space): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=use_bias, use_critic=False, hidden_W_init=policy_initializer, output_W_init=policy_initializer) elif policy == 'cnn': def make_policy(name, ob_space, ac_space): return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, gaussian_fixed_var=True, use_bias=False, use_critic=False, hidden_W_init=policy_initializer, output_W_init=policy_initializer) else: raise Exception('Unrecognized policy type.') # Create the sampler sampler = ParallelSampler(make_policy, make_env, n_episodes, horizon, True, n_workers=njobs, seed=seed) try: affinity = len(os.sched_getaffinity(0)) except: affinity = njobs sess = U.make_session(affinity) sess.__enter__() # Set random seed set_global_seeds(seed) return sampler