Example #1
0
File: api.py Project: 1234-/gym
def upload_training_data(training_dir, api_key=None):
    # Could have multiple manifests
    results = monitoring.load_results(training_dir)
    if not results:
        raise error.Error('''Could not find any manifest files in {}.

(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)'''.format(training_dir))

    manifests = results['manifests']
    env_info = results['env_info']
    timestamps = results['timestamps']
    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    videos = results['videos']

    env_id = env_info['env_id']
    logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests))

    # Do the relevant uploads
    if len(episode_lengths) > 0:
        training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key, env_id=env_id)
    else:
        training_episode_batch = None

    if len(videos) > MAX_VIDEOS:
        logger.warn('[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS)
        subsample_inds = np.linspace(0, len(videos)-1, MAX_VIDEOS).astype('int')
        videos = [videos[i] for i in subsample_inds]

    if len(videos) > 0:
        training_video = upload_training_video(videos, api_key, env_id=env_id)
    else:
        training_video = None

    return env_info, training_episode_batch, training_video
Example #2
0
def test_benchmarks():
    for benchmark_id in ['BernoulliBandit-v0', 'RandomTabularMDP-v0']:

        benchmark = registration.benchmark_spec(benchmark_id)

        for env_id in benchmark.env_ids:

            with helpers.tempdir() as temp:
                env = gym.make(env_id)
                env.seed(0)
                env.monitor.start(temp, video_callable=False)

                env.monitor.configure(mode='evaluation')
                rollout(env)

                env.monitor.configure(mode='training')
                for i in range(2):
                    rollout(env)

                env.monitor.configure(mode='evaluation')
                rollout(env, good=True)

                env.monitor.close()
                results = monitoring.load_results(temp)
                evaluation_score = benchmark.score_evaluation(
                    env_id, results['data_sources'],
                    results['initial_reset_timestamps'],
                    results['episode_lengths'], results['episode_rewards'],
                    results['episode_types'], results['timestamps'])
                benchmark.score_benchmark({
                    env_id: evaluation_score['scores'],
                })
Example #3
0
def upload_training_data(training_dir, api_key=None):
    # Could have multiple manifests
    results = monitoring.load_results(training_dir)
    if not results:
        raise error.Error('''Could not find any manifest files in {}.

(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)'''.format(training_dir))

    manifests = results['manifests']
    env_info = results['env_info']
    timestamps = results['timestamps']
    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    videos = results['videos']

    logger.debug('Uploading data from manifest %s', ', '.join(manifests))

    # Do the relevant uploads
    if len(episode_lengths) > 0:
        training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key)
    else:
        training_episode_batch = None

    if len(videos) > MAX_VIDEOS:
        logger.warn('You recorded videos for {} episodes, but the scoreboard only supports up to {}. We will automatically subsample for you, but you also might wish to adjust your video recording rate.'.format(len(videos), MAX_VIDEOS))
        skip = len(videos) / (MAX_VIDEOS - 1)
        videos = videos[::skip]

    if len(videos) > 0:
        training_video = upload_training_video(videos, api_key)
    else:
        training_video = None

    return env_info, training_episode_batch, training_video
Example #4
0
    def plot(self, full=True, dots=False, average=0, interpolated=0):
        print self.outdir
        results = monitoring.load_results(self.outdir)

        data = results[self.data_key]
        steps = results['episode_lengths']
        #print steps
        count_steps = 0
        for i in range(len(steps)):
            count_steps += steps[i]
            steps[i] = count_steps

        avg_data = []

        if full:
            plt.plot(steps, data, color='blue')
        if dots:
            plt.plot(steps, data, '.', color='black')
        if average > 0:
            average = int(average)
            for i, val in enumerate(data):
                '''if i%average==0:
                    if (i+average) < len(data)+average:
                        avg =  sum(data[i:i+average])/average
                        avg_data.append(avg)'''
                if i < average:
                    avg = np.array(data[:average]).mean()
                else:
                    avg = np.array(data[(i - average):i]).mean()
                avg_data.append(avg)
            #new_data = expand(avg_data,average)
            plt.plot(steps, avg_data, color='red', linewidth=2.5)
        if interpolated > 0:
            avg_data = []
            avg_data_points = []
            n = len(data) / interpolated
            if n == 0:
                n = 1
            data_fix = 0
            for i, val in enumerate(data):
                if i % n == 0:
                    if (i + n) <= len(data) + n:
                        avg = sum(data[i:i + n]) / n
                        avg_data.append(avg)
                        avg_data_points.append(i)
                if (i + n) == len(data):
                    data_fix = n

            x = np.arange(len(avg_data))
            y = np.array(avg_data)

            interp = pchip(avg_data_points, avg_data)
            xx = np.linspace(0, len(data) - data_fix, 1000)
            plt.plot(xx, interp(xx), color='green', linewidth=3.5)

        # pause so matplotlib will display
        # may want to figure out matplotlib animation or use a different library in the future
        plt.pause(0.000001)
        plt.savefig('result.png')
Example #5
0
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp)
        env.reset()
        env.monitor.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
Example #6
0
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False)
        env.reset()
        env.monitor.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
Example #7
0
File: api.py Project: yebi2013/gym
def upload_training_data(training_dir, api_key=None):
    # Could have multiple manifests
    results = monitoring.load_results(training_dir)
    if not results:
        raise error.Error('''Could not find any manifest files in {}.

(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.monitor.close()' at the end, or exit the process.)'''
                          .format(training_dir))

    manifests = results['manifests']
    env_info = results['env_info']
    data_sources = results['data_sources']
    timestamps = results['timestamps']
    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
    episode_types = results['episode_types']
    initial_reset_timestamps = results['initial_reset_timestamps']
    main_seeds = results['main_seeds']
    seeds = results['seeds']
    videos = results['videos']

    env_id = env_info['env_id']
    logger.debug('[%s] Uploading data from manifest %s', env_id,
                 ', '.join(manifests))

    # Do the relevant uploads
    if len(episode_lengths) > 0:
        training_episode_batch = upload_training_episode_batch(
            data_sources,
            episode_lengths,
            episode_rewards,
            episode_types,
            initial_reset_timestamps,
            timestamps,
            main_seeds,
            seeds,
            api_key,
            env_id=env_id)
    else:
        training_episode_batch = None

    if len(videos) > MAX_VIDEOS:
        logger.warn(
            '[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.',
            env_id, len(videos), MAX_VIDEOS)
        subsample_inds = np.linspace(0,
                                     len(videos) - 1, MAX_VIDEOS).astype('int')
        videos = [videos[i] for i in subsample_inds]

    if len(videos) > 0:
        training_video = upload_training_video(videos, api_key, env_id=env_id)
    else:
        training_video = None

    return env_info, training_episode_batch, training_video
Example #8
0
 def select_specs(self):
     specs = self.specs
     selected_specs = []
     for i, spec in enumerate(specs):
         training_dir = self.env_dir(spec.id)
         results = monitoring.load_results(training_dir)
         if results and self.complete_callable(results):
             logger.info('Skipping already-processed %s', spec.id)
             continue
         elif os.path.exists(training_dir):
             shutil.rmtree(training_dir)
         selected_specs.append((spec, training_dir))
     self.selected_specs = selected_specs
Example #9
0
 def select_specs(self):
     specs = self.specs
     selected_specs = []
     for i, spec in enumerate(specs):
         training_dir = self.env_dir(spec.id)
         results = monitoring.load_results(training_dir)
         if results and self.complete_callable(results):
             logger.info('Skipping already-processed %s', spec.id)
             continue
         elif os.path.exists(training_dir):
             shutil.rmtree(training_dir)
         selected_specs.append((spec, training_dir))
     self.selected_specs = selected_specs
Example #10
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(directory=temp, video_callable=False)(env)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Example #11
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
Example #12
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       task_groups={
                                           'CartPole-v0': [{
                                               'seeds': 1,
                                               'timesteps': 5
                                           }, {
                                               'seeds': 1,
                                               'timesteps': 100
                                           }],
                                       })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Example #13
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env.monitor.start(temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.monitor.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
Example #14
0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
Example #15
0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        task_groups={
            'CartPole-v0': [{
                'seeds': 1,
                'timesteps': 5
            }, {
                'seeds': 1,
                'timesteps': 100
            }],
        })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
Example #16
0
import pickle
pickle.dump(metrics, open('sarsa_%d_%s_metrics.p' % (scale, ENV_NAME), "wb"))

# load model for testing
sarsa.load_weights('/home/am/Desktop/set_tests/final/sarsa_%d_%s_weights.h5f' %
                   (scale, ENV_NAME))

# setting up monitoring tools to record the testing episodes
from gym import monitoring
from gym.wrappers import Monitor


def episode5(episode_id):
    if episode_id < 5:
        return True
    else:
        return False


#rec = StatsRecorder(env,"sarsa_1")
#rec.capture_frame()

temp = '/home/am/Desktop/set_tests/final/sarsa_%d_%s' % (scale, ENV_NAME)
env = Monitor(env, temp, force=True, video_callable=episode5)

# testing
sarsa.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=2000)

env.close()
results = monitoring.load_results(temp)