def upload_training_data(training_dir, api_key=None): # Could have multiple manifests results = monitoring.load_results(training_dir) if not results: raise error.Error('''Could not find any manifest files in {}. (HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)'''.format(training_dir)) manifests = results['manifests'] env_info = results['env_info'] timestamps = results['timestamps'] episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] videos = results['videos'] env_id = env_info['env_id'] logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests)) # Do the relevant uploads if len(episode_lengths) > 0: training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key, env_id=env_id) else: training_episode_batch = None if len(videos) > MAX_VIDEOS: logger.warn('[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS) subsample_inds = np.linspace(0, len(videos)-1, MAX_VIDEOS).astype('int') videos = [videos[i] for i in subsample_inds] if len(videos) > 0: training_video = upload_training_video(videos, api_key, env_id=env_id) else: training_video = None return env_info, training_episode_batch, training_video
def test_benchmarks(): for benchmark_id in ['BernoulliBandit-v0', 'RandomTabularMDP-v0']: benchmark = registration.benchmark_spec(benchmark_id) for env_id in benchmark.env_ids: with helpers.tempdir() as temp: env = gym.make(env_id) env.seed(0) env.monitor.start(temp, video_callable=False) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark.score_benchmark({ env_id: evaluation_score['scores'], })
def upload_training_data(training_dir, api_key=None): # Could have multiple manifests results = monitoring.load_results(training_dir) if not results: raise error.Error('''Could not find any manifest files in {}. (HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)'''.format(training_dir)) manifests = results['manifests'] env_info = results['env_info'] timestamps = results['timestamps'] episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] videos = results['videos'] logger.debug('Uploading data from manifest %s', ', '.join(manifests)) # Do the relevant uploads if len(episode_lengths) > 0: training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key) else: training_episode_batch = None if len(videos) > MAX_VIDEOS: logger.warn('You recorded videos for {} episodes, but the scoreboard only supports up to {}. We will automatically subsample for you, but you also might wish to adjust your video recording rate.'.format(len(videos), MAX_VIDEOS)) skip = len(videos) / (MAX_VIDEOS - 1) videos = videos[::skip] if len(videos) > 0: training_video = upload_training_video(videos, api_key) else: training_video = None return env_info, training_episode_batch, training_video
def plot(self, full=True, dots=False, average=0, interpolated=0): print self.outdir results = monitoring.load_results(self.outdir) data = results[self.data_key] steps = results['episode_lengths'] #print steps count_steps = 0 for i in range(len(steps)): count_steps += steps[i] steps[i] = count_steps avg_data = [] if full: plt.plot(steps, data, color='blue') if dots: plt.plot(steps, data, '.', color='black') if average > 0: average = int(average) for i, val in enumerate(data): '''if i%average==0: if (i+average) < len(data)+average: avg = sum(data[i:i+average])/average avg_data.append(avg)''' if i < average: avg = np.array(data[:average]).mean() else: avg = np.array(data[(i - average):i]).mean() avg_data.append(avg) #new_data = expand(avg_data,average) plt.plot(steps, avg_data, color='red', linewidth=2.5) if interpolated > 0: avg_data = [] avg_data_points = [] n = len(data) / interpolated if n == 0: n = 1 data_fix = 0 for i, val in enumerate(data): if i % n == 0: if (i + n) <= len(data) + n: avg = sum(data[i:i + n]) / n avg_data.append(avg) avg_data_points.append(i) if (i + n) == len(data): data_fix = n x = np.arange(len(avg_data)) y = np.array(avg_data) interp = pchip(avg_data_points, avg_data) xx = np.linspace(0, len(data) - data_fix, 1000) plt.plot(xx, interp(xx), color='green', linewidth=3.5) # pause so matplotlib will display # may want to figure out matplotlib animation or use a different library in the future plt.pause(0.000001) plt.savefig('result.png')
def test_video_callable_records_videos(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp) env.reset() env.monitor.close() results = monitoring.load_results(temp) assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_video_callable_false_does_not_record(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False) env.reset() env.monitor.close() results = monitoring.load_results(temp) assert len(results['videos']) == 0
def upload_training_data(training_dir, api_key=None): # Could have multiple manifests results = monitoring.load_results(training_dir) if not results: raise error.Error('''Could not find any manifest files in {}. (HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)''' .format(training_dir)) manifests = results['manifests'] env_info = results['env_info'] data_sources = results['data_sources'] timestamps = results['timestamps'] episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] episode_types = results['episode_types'] initial_reset_timestamps = results['initial_reset_timestamps'] main_seeds = results['main_seeds'] seeds = results['seeds'] videos = results['videos'] env_id = env_info['env_id'] logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests)) # Do the relevant uploads if len(episode_lengths) > 0: training_episode_batch = upload_training_episode_batch( data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, main_seeds, seeds, api_key, env_id=env_id) else: training_episode_batch = None if len(videos) > MAX_VIDEOS: logger.warn( '[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS) subsample_inds = np.linspace(0, len(videos) - 1, MAX_VIDEOS).astype('int') videos = [videos[i] for i in subsample_inds] if len(videos) > 0: training_video = upload_training_video(videos, api_key, env_id=env_id) else: training_video = None return env_info, training_episode_batch, training_video
def select_specs(self): specs = self.specs selected_specs = [] for i, spec in enumerate(specs): training_dir = self.env_dir(spec.id) results = monitoring.load_results(training_dir) if results and self.complete_callable(results): logger.info('Skipping already-processed %s', spec.id) continue elif os.path.exists(training_dir): shutil.rmtree(training_dir) selected_specs.append((spec, training_dir)) self.selected_specs = selected_specs
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[{ 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(directory=temp, video_callable=False)(env) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), task_groups={ 'CartPole-v0': [{ 'seeds': 1, 'timesteps': 5 }, { 'seeds': 1, 'timesteps': 100 }], }) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False, seed=0) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.monitor.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
def test(): benchmark = registration.Benchmark( id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[ {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score) assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test(): benchmark = registration.Benchmark( id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), task_groups={ 'CartPole-v0': [{ 'seeds': 1, 'timesteps': 5 }, { 'seeds': 1, 'timesteps': 100 }], }) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False, seed=0) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score) assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
import pickle pickle.dump(metrics, open('sarsa_%d_%s_metrics.p' % (scale, ENV_NAME), "wb")) # load model for testing sarsa.load_weights('/home/am/Desktop/set_tests/final/sarsa_%d_%s_weights.h5f' % (scale, ENV_NAME)) # setting up monitoring tools to record the testing episodes from gym import monitoring from gym.wrappers import Monitor def episode5(episode_id): if episode_id < 5: return True else: return False #rec = StatsRecorder(env,"sarsa_1") #rec.capture_frame() temp = '/home/am/Desktop/set_tests/final/sarsa_%d_%s' % (scale, ENV_NAME) env = Monitor(env, temp, force=True, video_callable=episode5) # testing sarsa.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=2000) env.close() results = monitoring.load_results(temp)