def train(env_id, num_timesteps, seed): from baselines.ppo1 import pposgd_simple, cnn_policy import baselines.common.tf_util as U rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() if rank == 0: logger.configure() else: logger.configure(format_strs=[]) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() if seed is not None else None set_global_seeds(workerseed) env = make_atari(env_id) def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) env = wrap_deepmind(env) env.seed(workerseed) pposgd_simple.learn(env, policy_fn, max_timesteps=int(num_timesteps * 1.1), timesteps_per_actorbatch=256, clip_param=0.2, entcoeff=0.01, optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear' ) env.close()
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None): wrapper_kwargs = wrapper_kwargs or {} if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def main(): logger.configure() env = make_atari('PongNoFrameskip-v4') env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.learn( env, "conv_only", convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True, lr=1e-4, total_timesteps=int(1e7), buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, ) model.save('pong_model.pkl') env.close()
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=True) obs_shape = env.observation_space.shape env = WrapPyTorch(env) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) env = wrap_deepmind(env) obs_shape = env.observation_space.shape env = WrapPyTorch(env) return env
def _thunk(): env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) if is_atari: env = wrap_deepmind(env) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = WrapPyTorch(env) return env
def _thunk(): if env_type == 'unity': worker_id = 32 + rank print ("***** UnityEnv", env_id, worker_id, rank) env = UnityEnv(env_id, worker_id) else: env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, distance_threshold=None, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 wrapper_kwargs = wrapper_kwargs or {} if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: if distance_threshold is not None: env = gym.make(env_id, distance_threshold=distance_threshold) else: env = gym.make(env_id) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def train(env_id, num_timesteps, seed, give_state, vf_iters, trial, nsteps, method, hist_len): from baselines.guidedcarla.nosharing_cnn_policy import CnnPolicy from baselines.guidedcarla import copos_mpi import baselines.common.tf_util as U rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() # if rank == 0: # logger.configure() # else: # logger.configure(format_strs=[]) workerseed = seed * 10000 set_global_seeds(workerseed) #TODO:change the environment to carla env = make_atari(env_id) def policy_fn(name, ob_space, ac_space, ob_name, hist_len): #pylint: disable=W0613 return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, ob_name=ob_name, hist_len=hist_len) #TODO: check if monitor can deal with carla env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) #TODO: check wrap deepmind and carla env = wrap_deepmind(env) env.seed(workerseed) timesteps_per_batch=nsteps beta = -1 if beta < 0: nr_episodes = num_timesteps // timesteps_per_batch # Automatically compute beta based on initial entropy and number of iterations tmp_pi = policy_fn("tmp_pi", env.observation_space, env.action_space, ob_name="tmp_ob", hist_len=hist_len) sess.run(tf.global_variables_initializer()) tmp_ob = np.zeros((1,) + env.observation_space.shape) entropy = sess.run(tmp_pi.pd.entropy(), feed_dict={tmp_pi.ob: tmp_ob}) beta = 2 * entropy / nr_episodes print("Initial entropy: " + str(entropy) + ", episodes: " + str(nr_episodes)) print("Automatically set beta: " + str(beta)) copos_mpi.learn(env, policy_fn, timesteps_per_batch=timesteps_per_batch, epsilon=0.01, beta=beta, cg_iters=10, cg_damping=0.1, method=method, max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=vf_iters, vf_stepsize=1e-3, trial=trial, crosskl_coeff=0.01, kl_target=0.01, sess=sess) env.close()
def _thunk(): #print("Make envs", params) if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id, **TspParams.current().ENVIRONMENT_KWARGS) is_atari = (hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv)) if is_atari: env = make_atari(env_id) is_minigrid = "MiniGrid" in env_id if set_time_limit is not None: env = TimeLimit(env, set_time_limit) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find( 'TimeLimit') >= 0 or set_time_limit is not None: env = TimeLimitMask(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif is_minigrid: pass elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def _thunk(): if env_type == 'unity': from gym_unity.envs import UnityEnv import random; r=random.randint(64,164) print ("***** HELLO", mpi_rank + r) env = UnityEnv(env_id, mpi_rank + r) else: env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def main(): env = make_atari("BreakoutNoFrameskip-v0") env = WarpFrame(env) env = FrameStack(env, k=4) act = deepq.load("breakout_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if rank == start_index and 'video_log_dir' in kwargs: env = VideoLogMonitor( env, kwargs['video_log_dir'] + '_rgb', write_attention_video=kwargs['write_attention_video'], hparams=hparams, nsteps=nsteps) return wrap_deepmind(env, **wrapper_kwargs)
def train(env_id, num_timesteps, seed): """ Train TRPO model for the atari environment, for testing purposes :param env_id: (str) Environment ID :param num_timesteps: (int) The total number of samples :param seed: (int) The initial seed for training """ rank = MPI.COMM_WORLD.Get_rank() if rank == 0: logger.configure() else: logger.configure(format_strs=[]) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = make_atari(env_id) def policy_fn(name, ob_space, ac_space, sess=None, placeholders=None): # pylint: disable=W0613 return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, sess=sess, placeholders=placeholders) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) env.seed(workerseed) env = wrap_deepmind(env) env.seed(workerseed) trpo_mpi.learn(env, policy_fn, timesteps_per_batch=512, max_kl=0.001, cg_iters=10, cg_damping=1e-3, max_timesteps=int(num_timesteps * 1.1), gamma=0.98, lam=1.0, vf_iters=3, vf_stepsize=1e-4, entcoeff=0.00) env.close()
def make_dataset(env_name): # Hyper-parameters total_frames_to_generate = 100000 #env_id = ['PongNoFrameskip-v4', 'SeaquestNoFrameskip-v4'][1] env_id = env_name save_path = './data/{}/sfmnet/episodes'.format(env_id) seed = 0 # Track how many frames we have created. total_frames_generated = 0 episode_index = 0 # Create and set-up the environment. env = make_atari(env_id) env = wrap_deepmind(env) env.seed(seed) set_global_seeds(seed) # Generate frames. while total_frames_generated < total_frames_to_generate: print("Starting episode {}".format(episode_index)) obs = env.reset() frame_index = 0 done = False while not done and total_frames_generated < total_frames_to_generate: # Take a random action. action = env.action_space.sample() obs, reward, done, info = env.step(action) # Create a directory to save frames to for this episode. episode_save_path = os.path.join(save_path, str(episode_index)) if not os.path.exists(episode_save_path): os.makedirs(episode_save_path) # Save the frame img = Image.fromarray(np.squeeze(obs), mode='L') img.save( os.path.join( episode_save_path, '{}_{}_{}.png'.format(frame_index, action, reward))) frame_index += 1 total_frames_generated += 1 # Start a new episode. episode_index += 1
def main(): global abort_training global q_pressed abort_training = False q_pressed = False listener = keyboard.Listener(on_press=on_press, on_release=on_release) listener.start() parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn(env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, print_freq=1, prioritized_replay=bool(args.prioritized), callback=callback) print("Saving model to pong_model.pkl") act.save("pong_model.pkl") env.close()
def main(seed=0, n_episodes=100, epsilon=0.05, occlusion=0): np.random.seed(seed) logger.configure(dir="breakout_train_log") env = make_atari('BreakoutNoFrameskip-v4') env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True) env = wrap_deepmind(env, frame_stack=True, scale=False, episode_life=False, clip_rewards=False) ANN = Net() ANN.load_state_dict( torch.load('../trained_networks/pytorch_breakout_dqn.pt')) if not os.path.isdir("results"): os.mkdir("results") rewards = np.zeros(n_episodes) # outputs = [] for episode in range(n_episodes): obs, done = env.reset(), False episode_rew = 0 index_array = np.array(range(80 * 80)) index_array = np.reshape(index_array, [80, 80]) positions = np.random.choice(80 * 80, size=int(6400 * occlusion / 100), replace=False) indices = np.isin(index_array, positions) indices = np.repeat(np.expand_dims(indices, axis=0), 4, axis=0) indices = np.expand_dims(indices, axis=0) while not done: state = torch.tensor(obs[None], dtype=torch.float).permute(0, 3, 1, 2) state[np.where(indices)] = 0 probabilities = policy(ANN(state)[0], epsilon) action = np.random.choice(np.arange(len(probabilities)), p=probabilities) obs, rew, done, _ = env.step(action) episode_rew += rew rewards[episode] = episode_rew print("Episode " + str(episode) + " reward", episode_rew) np.savetxt( 'results/occlusion_' + str(occlusion) + '_' + str(seed) + '.txt', rewards) env.close()
def get_player(self, train=False): if self.env: return env if self.config['ENV_TYPE'] == 'Classic': env = gym.make(self.config['ENV_NAME']) elif self.config['ENV_TYPE'] == 'Atari': if train: env = make_atari(self.config['ENV_NAME']) env = bench.Monitor(env, self.logger.get_dir()) env = deepq.wrap_atari_dqn(env) else: env = gym.make(self.config['ENV_NAME']) env = deepq.wrap_atari_dqn(env) else: raise Exception('Environment Type %s - Not Supported' % self.config['ENV_TYPE']) return env
def _thunk(): if env_type is 'CustomEnv': env = make_virtual_env(steps_until_done, rank) else: env = make_atari(env_id) if env_type == 'atari' else gym.make( env_id) env.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None): wrapper_kwargs = wrapper_kwargs or {} if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): env = FlattenObservation(env) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def train(env_id, args): from baselines.ppo1 import cnn_policy import baselines.common.tf_util as U if args.nokl: from baselines.ppo1 import nokl_pposgd_simple as pposgd_simple else: from baselines.ppo1 import pposgd_simple rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() print('_'.join([str(arg) for arg in vars(args)])) logdir = osp.join( './result/', '_'.join([str(getattr(args, arg)) for arg in vars(args)])) logger.configure(dir=logdir) workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = make_atari(env_id) def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) gym.logger.setLevel(logging.WARN) env = wrap_deepmind(env) env.seed(workerseed) pposgd_simple.learn(env, policy_fn, max_timesteps=int(args.num_timesteps * 1.1), timesteps_per_actorbatch=args.timesteps_per_actorbatch, clip_param=args.clip, entcoeff=args.entcoeff, optim_epochs=args.optim_epochs, optim_stepsize=args.optim_stepsize, optim_batchsize=args.optim_batchsize, gamma=0.99, lam=0.95, schedule='linear') env.close()
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*','',env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): env = FlattenObservation(env) # keys = env.observation_space.spaces.keys() # env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def make_eval_env(env_id, dumpdir=None, wrapper_kwargs=None, seed=None): if wrapper_kwargs is None: wrapper_kwargs = {} wrapper_kwargs['is_monte'] = 'MontezumaRevenge' in env_id wrapper_kwargs['is_pong'] = 'Pong' in env_id env = make_atari(env_id) if seed is not None: env.seed(seed) env = LimitLength(env, 20000, timeout_penalty=0.0) env = gym.wrappers.Monitor(env, dumpdir, video_callable=lambda x: True, force=True) return wrap_deepmind(env, frame_stack=True, save_original_reward=True, **wrapper_kwargs)
def _thunk(): env = make_atari(env_id) env.seed(seed + rank if seed is not None else None) if random_action: env = RandomRepeat(env) if eps_random: env = EpsRandom(env) if random_fix: env = RandomFix(env) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) return wrap_deepmind_custom(env, episode_life=episode_life, clip_rewards=clip_rewards, **wrapper_kwargs)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--checkpoint-freq', type=int, default=10000) parser.add_argument('--checkpoint-path', type=str, default=None) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, checkpoint_freq=args.checkpoint_freq, checkpoint_path=args.checkpoint_path, ) env.close()
def _thunk(): print("CUSTOM GYM:", custom_gym) if custom_gym is not None and custom_gym != "": module = importlib.import_module(custom_gym, package=None) print("imported env '{}'".format((custom_gym))) if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) if not is_atari and scale_img: env = WarpFrame(env, color_img) if duckietown: env = DuckietownRewardWrapper(env) if dt_discrete: env = DuckietownDiscreteWrapper(env) env = Normalize(env) env.seed(seed + rank) obs_shape = env.observation_space.shape if add_timestep and len( obs_shape) == 1 and str(env).find('TimeLimit') > -1: env = AddTimestep(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True) if is_atari: env = wrap_deepmind(env) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = WrapPyTorch(env) return env
def _thunk(): print("CUSTOM GYM:", custom_gym) if custom_gym is not None and custom_gym != "": module = importlib.import_module(custom_gym, package=None) print("imported env '{}'".format((custom_gym))) if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if not navi: if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--train-with-latency', type=int, default=0) parser.add_argument('--train-with-all-latency-mode', type=int, default=0) args = parser.parse_args() loggerid = "L" + (("M" + str(args.train_with_all_latency_mode)) if (args.train_with_all_latency_mode != 0) else str( args.train_with_latency)) loggerdir = "./data." + loggerid + "/" logger.configure(dir=loggerdir) set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), print_freq=1, train_with_latency=args.train_with_latency, train_with_all_latency_mode=args.train_with_all_latency_mode) act.save(loggerdir + args.env + "." + loggerid + ".pkl") env.close()
def test(model, save_path): # setting a seed seed = 42 # setting the video save path video_save_path = '/'.join((save_path, 'video')) # initializing the environment env = make_atari("BreakoutNoFrameskip-v4") # Wrapping the env with deepmind wrapper env = wrap_deepmind(env, frame_stack=True, scale=True) env.seed(seed) # Adding the monitor as a wrapper to the environment env = gym.wrappers.Monitor(env, video_save_path, video_callable=lambda episode_id: True, force=True) # setting the return parameters n_episodes = 10 rewards = np.zeros(n_episodes, dtype=float) for i in range(n_episodes): # Resetting the state for each episode state = np.array(env.reset()) done = False while not done: # Choosing an action based on greedy policy state_tensor = tf.convert_to_tensor(state) state_tensor = tf.expand_dims(state_tensor, 0) action_values = model.predict(state_tensor) action = np.argmax(action_values) # Perform action and get next state, reward and done state_next, reward, done, _ = env.step(action) state = np.array(state_next) # Update the reward observed at episode i rewards[i] += reward env.close() return rewards
def train(env_id, num_timesteps, seed): from baselines.ppo1_cmaes_layer_pl import pposgd_simple, cnn_policy import baselines.common.tf_util as U rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() if rank == 0: logger.configure(filename="PPO1-" + env_id, format_strs=['stdout', 'log', 'csv']) else: logger.configure(format_strs=[]) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = make_atari(env_id) def policy_fn(name, ob_space, ac_space): # pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) # test_env = bench.Monitor(test_env, logger.get_dir() and # osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) env = wrap_deepmind(env) env.seed(workerseed) pposgd_simple.learn(env, policy_fn, max_timesteps=int(num_timesteps * 1.1), timesteps_per_actorbatch=256, clip_param=0.1, entcoeff=0.01, optim_epochs=4, optim_stepsize=1e-6, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', seed=seed, env_id=env_id) env.close()
def worker_process(remote: multiprocessing.connection.Connection, parameters, worker_id, seed): """ This function is used as target by each of the threads in the multiprocess to build environment instances and define the commands that can be executed by each of the workers. """ # The Atari wrappers are now imported from openAI baselines # https://github.com/openai/baselines log_dir = './log' if parameters['env_type'] == 'atari': env = make_atari(parameters['scene']) env = bench.Monitor( env, os.path.join(log_dir, str(worker_id)), allow_early_resets=False) env = wrap_deepmind(env, True) if parameters['env_type'] == 'warehouse': env = Warehouse(seed, parameters) if parameters['env_type'] == 'sumo': env = LoopNetwork(parameters, seed) if parameters['env_type'] == 'minigrid': env = gym.make(parameters['scene']) # env = RGBImgPartialObsWrapper(env, tile_size=12) # Get pixel observations env = ImgObsWrapper(env) # Get rid of the 'mission' field env = wrappers.GrayScaleObservation(env, keep_dim=True) # Gray scale env = FeatureVectorWrapper(env) env.seed(seed) while True: cmd, data = remote.recv() if cmd == 'step': obs, reward, done, info = env.step(data) if done: obs = env.reset() remote.send((obs, reward, done, info)) elif cmd == 'reset': remote.send(env.reset()) elif cmd == 'action_space': remote.send(env.action_space.n) elif cmd == 'close': remote.close() break else: raise NotImplementedError
def _thunk(): info_keywords = () if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) elif env_id.startswith("osim"): info_keywords = ('rb', ) # https://github.com/stanfordnmbl/osim-rl _, task = env_id.split('.') if task == "Prosthetics": env = MyProstheticsEnv(integrator_accuracy=1e-4, **kwargs) elif task == "Arm2D": env = Arm2DEnv(integrator_accuracy=1e-4, **kwargs) else: # task == "L2Run" assert task == "L2Run" env = L2RunEnv(integrator_accuracy=1e-4, **kwargs) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if add_timestep and len( obs_shape) == 1 and str(env).find('TimeLimit') > -1: env = AddTimestep(env) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), info_keywords=info_keywords, allow_early_resets=allow_early_resets) if is_atari: env = wrap_deepmind(env) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) #추가코드 env.init_dart() env.init_sim(True, False) #env.start_render() obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--checkpoint-freq', type=int, default=10000) parser.add_argument('--checkpoint-path', type=str, default=None) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, checkpoint_freq=args.checkpoint_freq, checkpoint_path=args.checkpoint_path, ) env.close()
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
import gym from baselines.common.atari_wrappers import make_atari, wrap_deepmind ENV = 'BreakoutNoFrameskip-v4' # env = gym.make(ENV) env = wrap_deepmind(make_atari(ENV), frame_stack=True) env.reset() # for _ in range(1000): index = 0 while True: env.render() _, reward, done, info = env.step(env.action_space.sample()) print(index, reward, done, info) if done: break index += 1 env.close()