def record(log_path, env, horizon, kwargs, modes=['real']): _sanity_check(kwargs, modes) init = env.reset() Os = {} total_costs = {} for mode in modes: Os[mode] = [] total_costs[mode] = [] output_path = osp.join(log_path, "%s.mp4" % mode) encoder = ImageEncoder(output_path=output_path, frame_shape=frame_size + (3,), frames_per_sec=60) print("Generating %s" % output_path) obs = init Os[mode].append(obs) inner_env = _get_inner_env(env) if mode == 'model': inner_env.reset(obs) image = inner_env.render(mode='rgb_array') total_cost = 0.0 total_costs[mode].append(total_cost) for t in range(horizon): compressed_image = to_img(image, frame_size=frame_size) # cv2.imshow('frame{}'.format(t), compressed_image) # cv2.waitKey(10) encoder.capture_frame(compressed_image) action = _get_action(kwargs, obs) action = np.clip(action, *env.action_space.bounds) next_obs, reward, done, info = _step(kwargs, env, obs, action, mode) total_cost -= reward obs = next_obs Os[mode].append(obs) if mode == 'model': inner_env.reset(next_obs) image = inner_env.render(mode='rgb_array') # if done: # break total_costs[mode].append(total_cost) print("%s cost: %f" % (mode, total_cost)) encoder.close() if len(Os) == 2: _analyze_trajectories(Os, total_costs, log_path)
policy = data["policy"] env = data["env"] # env = SwimmerEnv() for idx in range(7, 8): encoder = ImageEncoder(output_path=osp.join( output_path, '%d_goalGAN_maze.mp4' % idx), frame_shape=frame_size + (3, ), frames_per_sec=15) for i in range(6): obs = env.reset() print("Generating %d_goalGAN_maze.mp4" % idx) image = env.render(mode='rgb_array') policy.reset() for t in range(500): compressed_image = to_img(image, frame_size=frame_size) # cv2.imshow('frame{}'.format(t), compressed_image) cv2.waitKey(10) encoder.capture_frame(compressed_image) action, _ = policy.get_action(obs) next_obs, reward, done, info = env.step(action) obs = next_obs image = env.render(mode='rgb_array') if done: break encoder.close()