Esempio n. 1
0
def create_video(py_env: py_environment.PyEnvironment,
                 tf_env: tf_environment.TFEnvironment,
                 policy: tf_py_policy.TFPyPolicy,
                 num_episodes=10,
                 max_episode_length=60 * 30,
                 video_filename='eval_video.mp4'):
    logging.info('Generating video %s', video_filename)
    py_env.reset()
    with imageio.get_writer(video_filename, fps=60) as vid:
        for episode in range(num_episodes):
            logging.info('\tEpisode %s of %s', episode + 1, num_episodes)

            frames = 0
            time_step = tf_env.reset()
            py_env.reset()
            state = policy.get_initial_state(tf_env.batch_size)

            vid.append_data(py_env.render(mode='rgb_array'))
            while not time_step.is_last() and frames < max_episode_length:
                if frames % 60 == 0:
                    logging.info('Frame %s of %s', frames, max_episode_length)
                policy_step = policy.action(time_step, state)
                state = policy_step.state
                time_step = tf_env.step(policy_step.action)
                py_env.step(policy_step.action)
                vid.append_data(py_env.render(mode='rgb_array'))
                frames += 1
            py_env.close()
    logging.info('Finished rendering video %s', video_filename)
Esempio n. 2
0
def create_video(py_environment: PyEnvironment,
                 tf_environment: TFPyEnvironment,
                 policy: tf_policy,
                 num_episodes=10,
                 video_filename='imageio.mp4'):
    print("Generating video %s" % video_filename)
    with imageio.get_writer(video_filename, fps=60) as video:
        for episode in range(num_episodes):
            print("Generating episode %d of %d" % (episode, num_episodes))

            time_step = tf_environment.reset()
            video.append_data(py_environment.render())
            while not time_step.is_last():
                action_step = policy.action(time_step)

                time_step = tf_environment.step(action_step.action)
                video.append_data(py_environment.render())
def create_video(py_environment: PyEnvironment, tf_environment: TFEnvironment, policy: tf_policy.Base, num_episodes=10, video_filename='imageio.mp4'):
	logging.info("Generating video %s" % video_filename)
	with imageio.get_writer(video_filename, fps=60) as video:
		for episode in range(num_episodes):
			logging.info("Generating episode %d of %d" % (episode, num_episodes))

			time_step = tf_environment.reset()
			state = policy.get_initial_state(tf_environment.batch_size)

			video.append_data(py_environment.render())
			while not time_step.is_last():
				policy_step: PolicyStep = policy.action(time_step, state)
				state = policy_step.state
				time_step = tf_environment.step(policy_step.action)
				video.append_data(py_environment.render())

	logging.info("Finished video %s" % video_filename)
Esempio n. 4
0
def create_video(py_environment: PyEnvironment,
                 tf_environment: TFPyEnvironment,
                 policy: tf_policy,
                 num_episodes=10,
                 video_filename='imageio.mp4'):
    print("Generating video %s" % video_filename)
    with imageio.get_writer(video_filename, fps=60) as video:
        for episode in range(num_episodes):
            episode_return = 0.0
            time_step = tf_environment.reset()
            video.append_data(py_environment.render())
            while not time_step.is_last():
                action_step = policy.action(time_step)
                time_step = tf_environment.step(action_step.action)
                episode_return += time_step.reward
                video.append_data(py_environment.render())
            print(
                f"Generated episode {episode} of {num_episodes}. Return:{episode_return} "
            )