def main(): arglist = parse_args() N_episode = arglist.episodes if arglist.render_mode: N_episode = 1 reward_list = [] prey_reward_list = [] predator_reward_list = [] if arglist.game == "Pong-2p-v0": with U.single_threaded_session(): model = make_model(arglist, action_space=1, scope="pong", model_path=arglist.model_path, load_model=arglist.use_model) print('model size', model.param_count) env = make_env(arglist) U.initialize() for i in range(N_episode): reward, steps_taken = pong_simulate(model, env, arglist) if arglist.render_mode: print("terminal reward", reward, "average steps taken", np.mean(steps_taken) + 1) else: pass reward_list.append(reward[0]) if not arglist.render_mode: print("seed", arglist.seed, "average_reward", np.mean(reward_list), "stdev", np.std(reward_list)) if arglist.game == "prey_predator": with U.single_threaded_session(): prey_model = make_model(arglist, action_space=2, scope="prey", model_path=arglist.model_path, load_model=arglist.use_model) predator_model = make_model(arglist, action_space=2, scope="predator", model_path=arglist.model_path, load_model=arglist.use_model) print('model size', prey_model.param_count, predator_model.param_count) env = make_env(arglist) U.initialize() for i in range(N_episode): prey_reward, predator_reward, steps_taken = pp_simulate( [prey_model, predator_model], env, arglist) if arglist.render_mode: print("terminal reward", rewards, "average steps taken", np.mean(steps_taken) + 1) else: pass prey_reward_list.append(prey_reward) predator_reward_list.append(predator_reward) if not arglist.render_mode: print("Seed", arglist.seed, "prey average_reward", np.mean(prey_reward_list), "predator average_reward", np.mean(predator_reward_list))
def python_env(env=None, merge_env=None): if merge_env is None: merge_env = {} global python_site_env # Use site.addsitedir() to determine which search paths would be considered # if 'third_party/python_packages' was a site-packages directory. # PATH is also updated, so windows can find the DLLs that ship with pywin32. if python_site_env is None: python_site_env = {} temp = os.environ["PATH"], sys.path os.environ["PATH"], sys.path = "", [] site.addsitedir(os.path.join(libdeno_path, "build")) # Modifies PATH and sys.path. site.addsitedir(python_packages_path) # Modifies PATH and sys.path. python_site_env = {"PATH": os.environ["PATH"], "PYTHONPATH": sys.path} os.environ["PATH"], sys.path = temp # Make a new environment object. env = make_env(env=env, merge_env=merge_env) # Apply PATH and PYTHONPATH from the site-packages environment. add_env_path(python_site_env["PATH"], env=env, key="PATH") add_env_path(python_site_env["PYTHONPATH"], env=env, key="PYTHONPATH") return env
def google_env(env=None, merge_env={}, depot_tools_path=depot_tools_path): env = make_env(env=env, merge_env=merge_env) # Depot_tools to be in the PATH, before Python. path_prefix = depot_tools_path + os.path.pathsep if not env['PATH'].startswith(path_prefix): env['PATH'] = path_prefix + env['PATH'] # We're not using Google's internal infrastructure. if os.name == 'nt' and not 'DEPOT_TOOLS_WIN_TOOLCHAIN' in env: env['DEPOT_TOOLS_WIN_TOOLCHAIN'] = "0" return env
def python_env(env=None, merge_env=None): if merge_env is None: merge_env = {} global python_site_env # Use site.addsitedir() to determine which search paths would be considered # if 'third_party/python_packages' was a site-packages directory. # PATH is also updated, so windows can find the DLLs that ship with pywin32. if python_site_env is None: python_site_env = {} temp = os.environ["PATH"], sys.path os.environ["PATH"], sys.path = "", [] site.addsitedir(python_packages_path) # Modifies PATH and sys.path. python_site_env = {"PATH": os.environ["PATH"], "PYTHONPATH": sys.path} os.environ["PATH"], sys.path = temp # Make a new environment object. env = make_env(env=env, merge_env=merge_env) # Apply PATH and PYTHONPATH from the site-packages environment. add_env_path(python_site_env["PATH"], env=env, key="PATH") add_env_path(python_site_env["PYTHONPATH"], env=env, key="PYTHONPATH") return env
def google_env(env=None, merge_env={}, depot_tools_path=depot_tools_path): env = make_env(env=env, merge_env=merge_env) # Depot_tools to be in the PATH, before Python. path_prefix = depot_tools_path + os.path.pathsep if not env['PATH'].startswith(path_prefix): env['PATH'] = path_prefix + env['PATH'] if os.name == "nt": # Windows-only enviroment tweaks. # We're not using Google's internal infrastructure. if os.name == "nt" and not "DEPOT_TOOLS_WIN_TOOLCHAIN" in env: env["DEPOT_TOOLS_WIN_TOOLCHAIN"] = "0" # The 'setup_toolchain.py' script does a good job finding the Windows # SDK. Unfortunately, if any of the environment variables below are set # (as vcvarsall.bat typically would), setup_toolchain absorbs them too, # adding multiple identical -imsvc<path> items to CFLAGS. # This small variation has no effect on compiler output, but it # makes ninja rebuild everything, and causes sccache cache misses. # TODO(piscisaureus): fix this upstream. env["INCLUDE"] = "" env["LIB"] = "" env["LIBPATH"] = "" return env
if __name__ == '__main__': arglist = parse_args() if not os.path.exists(arglist.data_dir): os.makedirs(arglist.data_dir) total_frames = 0 if arglist.game == "Pong-2p-v0": with U.single_threaded_session(): model = make_model(arglist, action_space=1, scope="pong", model_path=arglist.model_path, load_model=arglist.use_model) env = make_env(arglist, full_episode=True) model.render_mode = arglist.render_mode U.initialize() for trial in range(arglist.max_trials): # 200 trials per worker try: random_generated_int = random.randint(0, 2**31 - 1) filename = arglist.data_dir + "/" + str( random_generated_int) + ".npz" recording_obs = [] recording_action = [] recording_oppo_action = [] np.random.seed(random_generated_int) env.seed(random_generated_int) # random policy model.init_random_model_params(stdev=np.random.rand() *
import numpy as np from dqn_agent import DQNAgent from util import make_env, plot_learning_curve from gym import wrappers if __name__ == "__main__": env = make_env('PongNoFrameskip-v4') best_score = -np.inf load_checkpoint = True n_games = 5 agent = DQNAgent(gamma=0.99, epsilon=0.1, lr=0.0001, input_dims=(env.observation_space.shape), # we have modified the default env settings to get env.observation_space.shape n_actions=env.action_space.n, mem_size=50000, eps_min=0.1, batch_size=32, replace=1000, eps_dec=1e-5, chkpt_dir='.\\models\\', algo='DQNAgent', env_name='PongNoFrameskip-v4') if load_checkpoint: agent.load_models() #env = wrappers.Monitor(env, '.\\video\\', video_callable=lambda episode_id: True, force=True) fname= agent.algo+'_'+agent.env_name+'_lr'+str(agent.lr)+'_'+str(n_games)+'games' figure_file = '.\\plots\\'+fname+'.png' n_steps= 0 scores, eps_history, steps_array = [], [], [] for i in range(n_games): done = False score = 0 observation = env.reset() # reset() method is overloaded in class StackFrames(gym.ObservationWrapper) , observation.shape = (4, 84, 84) # here (1, 84, 84) == (2, 84, 84) == (3, 84, 84) == (4, 84, 84)