def simulate(step, env_ctor, duration, num_agents, agent_config, isolate_envs='none', expensive_summaries=False, name='simulate'): summaries = [] with tf.variable_scope(name): return_, image, action, reward = collect_rollouts( step=step, env_ctor=env_ctor, duration=duration, num_agents=num_agents, agent_config=agent_config, isolate_envs=isolate_envs) return_mean = tf.reduce_mean(return_) summaries.append(tf.summary.scalar('return', return_mean)) if expensive_summaries: summaries.append(tf.summary.histogram('return_hist', return_)) summaries.append(tf.summary.histogram('reward_hist', reward)) summaries.append(tf.summary.histogram('action_hist', action)) summaries.append( tools.image_strip_summary('image', image, max_length=duration)) summaries.append( tools.gif_summary('animation', image, max_outputs=1, fps=20)) summary = tf.summary.merge(summaries) return summary, return_mean
def simulate(step, env_ctor, duration, num_agents, agent_config, env_processes=False, name='simulate'): summaries = [] with tf.variable_scope(name): return_, image, action, reward = collect_rollouts( step=step, env_ctor=env_ctor, duration=duration, # i.e. max_length num_agents=num_agents, agent_config=agent_config, env_processes=env_processes) return_mean = tf.reduce_mean(return_) summaries.append(tf.summary.histogram('return_hist', return_)) summaries.append(tf.summary.scalar('return', return_mean)) summaries.append(tf.summary.histogram('reward_hist', reward)) summaries.append(tf.summary.histogram('action_hist', action)) summaries.append( tools.image_strip_summary('image', image, max_length=duration)) summary = tf.summary.merge(summaries) return summary, return_mean