コード例 #1
0
def record_expert_demo(env_name, expert_policy, i, outdir, record_video=True):
    env = gym.make('{}{}-v0'.format(env_name, i))
    outpath = os.path.join(
        outdir, 'expert_demonstration_{}.mp4'.format(env.__class__.__name__))
    total_reward = run_single_episode(env,
                                      expert_policy,
                                      record_video=record_video,
                                      video_out_path=outpath)
    assert total_reward > 0
コード例 #2
0
def test(policy, base_class_name, test_env_nums=range(11, 20), max_num_steps=50,
         record_videos=True, video_format='mp4'):
    
    env_names = ['{}{}-v0'.format(base_class_name, i) for i in test_env_nums]
    envs = [gym.make(env_name) for env_name in env_names]
    accuracies = []
    for env in envs:
        video_out_path = '/tmp/lfd_{}.{}'.format(env.__class__.__name__, video_format)
        result = run_single_episode(env, policy, max_num_steps=max_num_steps, 
            record_video=record_videos, video_out_path=video_out_path) > 0
        accuracies.append(result)

    return accuracies
コード例 #3
0
def test(policy, base_class_name, test_env_nums=range(11, 20), max_num_steps=50,
         record_videos=True, video_format='mp4'):

    if arc_tasks.prefix.format('') in base_class_name:
        test, shapes = arc_tasks.tests(base_class_name)
        acc = sum(policy(obs, point[:2]) == point[2] for obs, point in test) / len(test)
        return acc

    env_names = ['{}{}-v0'.format(base_class_name, i) for i in test_env_nums]
    envs = [gym.make(env_name) for env_name in env_names]
    accuracies = []
    for env in envs:
        video_out_path = '/tmp/lfd_{}.{}'.format(env.__class__.__name__, video_format)
        result = run_single_episode(env, policy, max_num_steps=max_num_steps, 
            record_video=record_videos, video_out_path=video_out_path) > 0
        accuracies.append(result)

    return accuracies