Beispiel #1
0
def record(log_path,
           env,
           horizon,
           kwargs,
           modes=['real']):
    _sanity_check(kwargs, modes)
    init = env.reset()
    Os = {}
    total_costs = {}
    for mode in modes:
        Os[mode] = []
        total_costs[mode] = []
        output_path = osp.join(log_path, "%s.mp4" % mode)
        encoder = ImageEncoder(output_path=output_path,
                               frame_shape=frame_size + (3,),
                               frames_per_sec=60)
        print("Generating %s" % output_path)
        obs = init
        Os[mode].append(obs)
        inner_env = _get_inner_env(env)
        if mode == 'model':
            inner_env.reset(obs)
        image = inner_env.render(mode='rgb_array')
        total_cost = 0.0
        total_costs[mode].append(total_cost)
        for t in range(horizon):
            compressed_image = to_img(image, frame_size=frame_size)
            # cv2.imshow('frame{}'.format(t), compressed_image)
            # cv2.waitKey(10)
            encoder.capture_frame(compressed_image)
            action = _get_action(kwargs, obs)
            action = np.clip(action, *env.action_space.bounds)
            next_obs, reward, done, info = _step(kwargs, env, obs, action, mode)
            total_cost -= reward
            obs = next_obs
            Os[mode].append(obs)
            if mode == 'model':
                inner_env.reset(next_obs)
            image = inner_env.render(mode='rgb_array')
            # if done:
            #     break
            total_costs[mode].append(total_cost)
        print("%s cost: %f" % (mode, total_cost))
        encoder.close()
    if len(Os) == 2:
        _analyze_trajectories(Os, total_costs, log_path)
Beispiel #2
0
    policy = data["policy"]

    env = data["env"]
    # env = SwimmerEnv()

    for idx in range(7, 8):

        encoder = ImageEncoder(output_path=osp.join(
            output_path, '%d_goalGAN_maze.mp4' % idx),
                               frame_shape=frame_size + (3, ),
                               frames_per_sec=15)

        for i in range(6):
            obs = env.reset()
        print("Generating %d_goalGAN_maze.mp4" % idx)
        image = env.render(mode='rgb_array')
        policy.reset()
        for t in range(500):
            compressed_image = to_img(image, frame_size=frame_size)
            # cv2.imshow('frame{}'.format(t), compressed_image)
            cv2.waitKey(10)
            encoder.capture_frame(compressed_image)
            action, _ = policy.get_action(obs)
            next_obs, reward, done, info = env.step(action)
            obs = next_obs
            image = env.render(mode='rgb_array')
            if done:
                break
        encoder.close()