def benchmark_score_from_local(benchmark_id, training_dir): spec = gym.benchmark_spec(benchmark_id) directories = [] for name, _, files in os.walk(training_dir): manifests = gym.monitoring.detect_training_manifests(name, files=files) if manifests: directories.append(name) benchmark_results = defaultdict(list) for training_dir in directories: results = gym.monitoring.load_results(training_dir) env_id = results['env_info']['env_id'] benchmark_result = spec.score_evaluation( env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) # from pprint import pprint # pprint(benchmark_result) benchmark_results[env_id].append(benchmark_result) return gym.benchmarks.scoring.benchmark_aggregate_score( spec, benchmark_results)
def q1_run(num_timesteps): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed, expt_dir='tmp/gym-results2') optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(1000000, 0.1) dqn_learning( env=env, q_func=DQN, runname="normal_run", optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion2(num_timesteps), replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGET_UPDATE_FREQ )
def main(): # Games that we'll be testing. game_to_ID = {'BeamRider':0, 'Breakout':1, 'Enduro':2, 'Pong':3, 'Qbert':4} # Get some arguments here. Note: num_timesteps default uses tasks default. parser = argparse.ArgumentParser() parser.add_argument('--game', type=str, default='Pong') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--num_timesteps', type=int, default=40000000) args = parser.parse_args() # Choose the game to play and set log file. benchmark = gym.benchmark_spec('Atari40M') task = benchmark.tasks[game_to_ID[args.game]] log_name = args.game+"_s"+str(args.seed).zfill(3)+".pkl" # Run training. Should change the seed if possible! # Also, the actual # of iterations run is _roughly_ num_timesteps/4. seed = args.seed env = get_env(task, seed) session = get_session() print("task = {}".format(task)) atari_learn(env, session, num_timesteps=args.num_timesteps, log_file=log_name)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--lr_multiplier', type=float, default=1.0) parser.add_argument('--target_update_freq', type=float, default=10000) parser.add_argument('--exp_name', type=str, default='Knapsack') parser.add_argument('--boltzmann_exploration', action='store_true') args = parser.parse_args() # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) # env = get_env(task, seed) env = Knapsack(10, 3) # test_env = Knapsack(5, 1) # session = get_session() knapsack_learn(env, None, num_timesteps=task.max_timesteps, lr_multiplier=args.lr_multiplier, target_update_freq=args.target_update_freq, exp_name=args.exp_name, boltzmann_exploration=args.boltzmann_exploration)
def main(): # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ["CUDA_VISIBLE_DEVICES"] = "4" # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[6] id2game = { 'QbertNoFrameskip-v4': 'qbert', 'SpaceInvadersNoFrameskip-v4': 'spaceinvaders' } g = id2game[task.env_id] # task = 'SpaceInvadersNoFrameskip-v4' # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() log_dir = os.path.join( './logs', env.spec.id, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) if not os.path.exists(log_dir): os.makedirs(log_dir) atari_learn(env, g, session, num_timesteps=task.max_timesteps, log_dir=log_dir, double_q=True, soft_q=True, use_expert=False)
def main(_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: global_step = tf.Variable(0, name='global_step', trainable=False) benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) # state_dim = np.prod(env.observation_space.shape) state_dim = env.reset().shape print('state_dim:',state_dim) action_dim = env.action_space.n/2 # 3 actions: 1: hold, 2: up, 3: down print('action_dim:',action_dim) pg = PGNetwork(sess, state_dim, action_dim, PG_LEARNING_RATE) train(sess, env, pg, global_step)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_norm', action='store_true') parser.add_argument('--max_pool', action='store_true') parser.add_argument('--doubleQ', action='store_true') parser.add_argument('--log_name', type=str, default='default') parser.add_argument('--buf_size', type=int, default=1000000) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--learn_start', type=int, default=50000) parser.add_argument('--learn_freq', type=int, default=4) parser.add_argument('--frame_hist', type=int, default=4) parser.add_argument('--targ_up_freq', type=int, default=10000) parser.add_argument('--grad_clip', type=float, default=10.0) parser.add_argument('--lr_multiplier', type=float, default=1.0) parser.add_argument('--ep_start', type=float, default=1.0) args = parser.parse_args() # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = random.randint(0, 1000) print("Seed: {}".format(seed)) env = get_env(task, seed) atari_learn(env, args, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. # VRR: We can use this to try multiple games. (0) Flat 3d space invaders (1) Breakout (2) Racing Car (3) Pong (4) Weird pyramid (5) Seaquest (6) Old looking space invaders task = benchmark.tasks[5] # Run training seed = round(time.time() ) # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) # #["[10, 30]_[20, 50]","[25, 10]_[15, 70]","[42, 10]_[20, 42]","[10, 10]_[60, 20]"] # #origin = [[10,30],[25,10],[42,10],[10,10]] # #h_and_w = [[20,50],[15,70],[20,42],[60,20]] # origin = [[50,10],[10,50],[40,20],[10,15]] # h_and_w = [[15,70],[60,20],[20,50],[20,42]] # # obs = sensor_noise(env.reset(),origin[0][0],origin[0][1],h_and_w[0][0],h_and_w[0][1]) # obs = sensor_noise(obs,origin[1][0],origin[1][1],h_and_w[1][0],h_and_w[1][1]) # obs = sensor_noise(obs,origin[2][0],origin[2][1],h_and_w[2][0],h_and_w[2][1]) # obs = sensor_noise(obs,origin[3][0],origin[3][1],h_and_w[3][0],h_and_w[3][1]) # plt.imshow(obs[:,:,0]) # plt.pause(10.0) # env.reset(); session = get_session() #atari_learn(env, session, num_timesteps=task.max_timesteps/2) robust_rl(env, session)
def q2_run(num_timesteps): schedulers = {"no_explore": ConstantSchedule(0.1), "delayed_decay": PiecewiseSchedule([(0, 1.0), (0.25e6, 1.0), (1.25e6, 0.1)], outside_value=0.1), "slower_decay": LinearSchedule(1500000, 0.1)} for name, exploration_schedule in schedulers.items(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) env.reset() optimizer_spec = OptimizerSpec(constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS)) dqn_learning( env=env, q_func=DQN, runname=name, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion2(num_timesteps), replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGET_UPDATE_FREQ )
def atari_main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. # ['BeamRiderNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'EnduroNoFrameskip-v4', # 'PongNoFrameskip-v4', 'QbertNoFrameskip-v4', 'SeaquestNoFrameskip-v4', # 'SpaceInvadersNoFrameskip-v4'] task = benchmark.tasks[1] print('availabe tasks: ', [t.env_id for t in benchmark.tasks]) print('task: ', task.env_id, 'max steps: ', task.max_timesteps) # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) last_obs = env.reset() exploration_schedule = PiecewiseSchedule( [ (0, 1.0), (1e6, 0.1), (task.max_timesteps / 2, 0.01), ], outside_value=0.01 ) dqn = DoubleDQN(image_shape=(84, 84, 1), num_actions=env.action_space.n, training_starts=50000, target_update_freq=10000, training_batch_size=32, # training_starts=2000, # target_update_freq=500, # training_batch_size=3, exploration=exploration_schedule ) reward_sum_episode = 0 num_episodes = 0 episode_rewards = deque(maxlen=100) for step in range(task.max_timesteps): if step > 0 and step % 1000 == 0: print('step: ', step, 'episodes:', num_episodes, 'epsilon:', exploration_schedule.value(step), 'learning rate:', dqn.get_learning_rate(), 'last 100 training loss mean', dqn.get_avg_loss(), 'last 100 episode mean rewards: ', np.mean(np.array(episode_rewards, dtype=np.float32))) env.render() action = dqn.choose_action(step, last_obs) obs, reward, done, info = env.step(action) reward_sum_episode += reward dqn.learn(step, action, reward, done, info) if done: last_obs = env.reset() episode_rewards.append(reward_sum_episode) reward_sum_episode = 0 num_episodes += 1 else: last_obs = obs
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos): # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests(manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark: raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id)) spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)] if not env_ids: raise error.Error("Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)) tags = json.dumps(benchmark_run_tags) _create_with_retries = util.retry_exponential_backoff( resource.BenchmarkRun.create, (error.APIConnectionError,), max_retries=5, interval=3, ) benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload_with_retries = util.retry_exponential_backoff( _upload, (error.APIConnectionError,), max_retries=5, interval=3, ) _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos) logger.info(""" **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[4] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def run_model(model_path, log_path, max_episode_count=500): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed, log_path) session = get_session() atari_run(env, session, model_path, max_episode_count)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. 3 is pong, 4 is Q*bert, 1 is breakout, 5 is seaquest, 6 is space invaders task = benchmark.tasks[6] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def gen_pong_env(seed): """Generate a pong environment, with all the bells and whistles.""" benchmark = gym.benchmark_spec('Atari40M') task = benchmark.tasks[3] env_id = task.env_id env = gym.make(env_id) env.seed(seed) # Can wrap in gym.wrappers.Monitor here if we want to record. env = wrap_deepmind(env) return env
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[2] # Run training env = mvc_env.MVC_env(7) graph_learn(env, num_timesteps=task.max_timesteps, q_func=Q_function_graph_model.Q_func)
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos): # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests(manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark: raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id)) spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)] if not env_ids: raise error.Error("Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)) benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(benchmark_run_tags)) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload_with_retries = util.retry_exponential_backoff( _upload, (error.APIConnectionError,), max_retries=5, interval=3, ) _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos) logger.info(""" **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[2] # Run training # env = get_env(task, seed) env = mvc_env.MVC_env(7) # env = tsp_env.TSP_env(5, no_move_penalty=0) graph_learn(env, num_timesteps=task.max_timesteps, q_func=Q_function_graph_model.Q_func)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[2] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) # env = get_env(task, seed) env = Knapsack(10, 3) #env = tsp_env.TSP_env(5, no_move_penalty=0, # use_alternative_state=True) knapsack_learn(env, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__)) logz.configure_output_dir(os.path.join(PROJECT_ROOT, "log/"+"_RAM_"+time.strftime("%d-%m-%Y_%H-%M-%S"))) # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps, dir_name='/Users/anil/Code/ai/deeprlcourse/hw3/dqn/atari/')
def gen_vectorized_pong_env(n): """ Generate a vectorized pong environment, with n simultaneous differently-seeded envs. For deterministic seeding, you should seed np.random.seed beforehand. """ benchmark = gym.benchmark_spec('Atari40M') task = benchmark.tasks[3] env_id = task.env_id envs = [wrap_deepmind(gym.make(env_id)) for _ in range(n)] env = MultiprocessingEnv(envs) seeds = [int(s) for s in np.random.randint(0, 2 ** 30, size=n)] env.seed(seeds) return env
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--cuda', '-gpu', action='store_true') args = parser.parse_args() # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) atari_learn(args, env, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training if len(sys.argv) > 1: seed = int(sys.argv[1]) else: seed = int(time.time( )) # Use a seed of zero (you may want to randomize the seed!) print("Seed: " + str(seed)) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def main(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. #task = benchmark.tasks[3] task = benchmark.tasks[0] # beam rider #task = benchmark.tasks[1] # breakout #task.env_id # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) #env. session = get_session() print("task:" + task.env_id + " max_timesteps:" + str(task.max_timesteps)) atari_learn(env, session, num_timesteps=task.max_timesteps)
def main(): args = parse_args() # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed, args["model_fn"]) session = get_session() max_timesteps = args.get("max_timesteps", None) or task.max_timesteps results_file = args["results_file"] model_fn = getattr(models, args["model_fn"]) atari_learn(env, session, num_timesteps=max_timesteps, model_fn=model_fn)
def benchmark_score_from_local(benchmark_id, training_dir): spec = gym.benchmark_spec(benchmark_id) directories = [] for name, _, files in os.walk(training_dir): manifests = gym.monitoring.detect_training_manifests(name, files=files) if manifests: directories.append(name) benchmark_results = defaultdict(list) for training_dir in directories: results = gym.monitoring.load_results(training_dir) env_id = results['env_info']['env_id'] benchmark_result = spec.score_evaluation(env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) # from pprint import pprint # pprint(benchmark_result) benchmark_results[env_id].append(benchmark_result) return gym.benchmarks.scoring.benchmark_aggregate_score(spec, benchmark_results)
def main(): parser = argparse.ArgumentParser(description='RL agents for atari') subparsers = parser.add_subparsers(title="subcommands", dest="subcommand") train_parser = subparsers.add_parser("train", help="train an RL agent for atari games") train_parser.add_argument("--task-id", type=int, required=True, help="0 = BeamRider, 1 = Breakout, 2 = Enduro, 3 = Pong, 4 = Qbert, 5 = Seaquest, 6 = Spaceinvaders") train_parser.add_argument("--gpu", type=int, default=None, help="ID of GPU to be used") train_parser.add_argument("--double-dqn", type=int, default=0, help="double dqn - 0 = No, 1 = Yes") train_parser.add_argument("--dueling-dqn", type=int, default=0, help="dueling dqn - 0 = No, 1 = Yes") args = parser.parse_args() # command if (args.gpu != None): if torch.cuda.is_available(): torch.cuda.set_device(args.gpu) print("CUDA Device: %d" %torch.cuda.current_device()) # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. # 0 = BeamRider # 1 = Breakout # 2 = Enduro # 3 = Pong # 4 = Qbert # 5 = Seaquest # 6 = Spaceinvaders # for i in benchmark.tasks: # print i task = benchmark.tasks[args.task_id] # task = benchmark_class('Pong-v0') # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) double_dqn = (args.double_dqn == 1) dueling_dqn = (args.dueling_dqn == 1) env = get_env(task, seed, task.env_id, double_dqn, dueling_dqn) print("Training on %s, double_dqn %d, dueling_dqn %d" %(task.env_id, double_dqn, dueling_dqn)) atari_learn(env, task.env_id, num_timesteps=task.max_timesteps, double_dqn=double_dqn, dueling_dqn=dueling_dqn)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--frame', type=int, default=4) parser.add_argument('--log', type=str, default='progress.pkl') args = parser.parse_args() # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') #print benchmark # Change the index to select a different game. task = benchmark.tasks[3] print('task.max_timesteps', task.max_timesteps) #40000000 # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps, frame_history_len=args.frame, log_file=args.log)
def main(): # Logger # https://github.com/mwhittaker/homework/commit/cb043dbc980d898547f552e07f475696ce57f1d3 format = "[%(asctime)-15s %(pathname)s:%(lineno)-3s] %(message)s" handler = logging.StreamHandler() handler.setFormatter(logging.Formatter(format)) logger = logging.getLogger("dqn") logger.propagate = False logger.addHandler(handler) logger.setLevel(logging.DEBUG) # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) session = get_session() atari_learn(env, session, num_timesteps=task.max_timesteps)
def bonus_run(num_timesteps): def make_range_black(arr: np.ndarray, start, end): arr[:, start:end, :] = 0 frame_filters = {"no_left_side": lambda x: make_range_black(x, 0, x.shape[1] // 4), "no_middle_side": lambda x: make_range_black(x, x.shape[1] // 4, x.shape[1] // 2), } for name, frame_filter in frame_filters.items(): # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = 0 # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) env.reset() optimizer_spec = OptimizerSpec(constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS)) dqn_learning( env=env, q_func=DQN, runname=name, frame_filter=frame_filter, optimizer_spec=optimizer_spec, exploration=LinearSchedule(1000000, 0.1), stopping_criterion=stopping_criterion2(num_timesteps), replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGET_UPDATE_FREQ )
episode_count += 1 def get_env(task): env_id = task.env_id env = gym.make(env_id) env = wrap_deepmind(env) return env gamma = 0.99 # discount rate for advantage estimation and reward discounting s_size = 7056 # Observations are greyscale frames of 84 * 84 * 1 load_model = False N = 20 k = 1. model_path = './qrdqn' # Get Atari games. benchmark = gym.benchmark_spec('Atari40M') # Change the index to select a different game. task = benchmark.tasks[3] tf.reset_default_graph() if not os.path.exists(model_path): os.makedirs(model_path) env = get_env(task) a_size = env.action_space.n batch_size = 10 global_episodes = tf.Variable(0,dtype=tf.int32,name='global_episodes',trainable=False) trainer = tf.train.AdamOptimizer(learning_rate=0.00015) master_network = Q_Network(s_size,a_size,'global',None) # Generate global network
import gym benchmark = gym.benchmark_spec('Atari40M') task = benchmark.tasks[3] env = gym.make(task.env_id) # env = gym.make('Pong-ram-v0') for i_episode in range(20): observation = env.reset() for t in range(100): env.render() # print(observation) # action = env.action_space.sample() action = 2 observation, reward, done, info = env.step(action) print action if done: print("Episode finished after {} timesteps".format(t + 1)) break
def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False): """Upload the results of training (as automatically recorded by your env's monitor) to OpenAI Gym. Args: training_dir (Optional[str]): A directory containing the results of a training run. algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release. writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation. tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable. api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY). """ if benchmark_id: # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests(manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark as e: raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id)) # TODO: verify that the number of trials matches spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)] if not env_ids: raise error.Error("Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info("WARNING: Evaluations do not match spec for benchmark {}. In {}, we found evaluations for {}, expected {}".format(benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))) benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(tags)) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload(training_dir, None, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info(""" **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id else: if tags is not None: logger.warn("Tags will NOT be uploaded for this submission.") # Single evalution upload benchmark_run_id = None evaluation = _upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info(""" **************************************************** You successfully uploaded your evaluation on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), evaluation.env, evaluation.web_url()) return None