def simulate_episodes( config, params, graph, cleanups, expensive_summaries, gif_summary, name): def env_ctor(): env = params.task.env_ctor() if params.save_episode_dir: env = control.wrappers.CollectGymDataset(env, params.save_episode_dir) env = control.wrappers.ConcatObservation(env, ['image']) return env bind_or_none = lambda x, **kw: x and functools.partial(x, **kw) cell = graph.cell agent_config = tools.AttrDict( cell=cell, encoder=graph.encoder, planner=functools.partial(params.planner, graph=graph), objective=bind_or_none(params.objective, graph=graph), exploration=params.exploration, preprocess_fn=config.preprocess_fn, postprocess_fn=config.postprocess_fn) params = params.copy() with params.unlocked: params.update(agent_config) with agent_config.unlocked: agent_config.update(params) summary, return_, cleanup = control.simulate( graph.step, env_ctor, params.task.max_length, params.num_agents, agent_config, config.isolate_envs, expensive_summaries, gif_summary, name=name) cleanups.append(cleanup) # Work around tf.cond() tensor return type. return summary, return_
def simulate_episodes(config, params, graph, name): def env_ctor(): env = params.task.env_ctor() if params.save_episode_dir: env = control.wrappers.CollectGymDataset(env, params.save_episode_dir) env = control.wrappers.ConcatObservation(env, ['image']) return env cell = graph.cell agent_config = tools.AttrDict( cell=cell, encoder=graph.encoder, planner=params.planner, objective=functools.partial(params.objective, graph=graph), exploration=params.exploration, preprocess_fn=config.preprocess_fn, postprocess_fn=config.postprocess_fn) params = params.copy() params.update(agent_config) agent_config.update(params) # Batch size larger crashes so we simulate the episodes individually. summaries, returns = [], [] for index in range(params.batch_size): # with tf.control_dependencies(summaries + returns): with tf.variable_scope('simulate-{}'.format(index + 1)): summary, return_ = control.simulate( graph.step, env_ctor, params.task.max_length, 1, agent_config, name=name) summaries.append(summary) returns.append(return_) summary = tf.summary.merge(summaries) return_ = tf.reduce_mean(returns) return summary, return_
def simulate_episodes(config, params, graph, expensive_summaries, name): def env_ctor(): env = params.task.env_ctor() if params.save_episode_dir: env = control.wrappers.CollectGymDataset(env, params.save_episode_dir) env = control.wrappers.ConcatObservation(env, ['image']) return env cell = graph.cell agent_config = tools.AttrDict(cell=cell, encoder=graph.encoder, planner=params.planner, objective=functools.partial(params.objective, graph=graph), exploration=params.exploration, preprocess_fn=config.preprocess_fn, postprocess_fn=config.postprocess_fn) params = params.copy() params.update(agent_config) agent_config.update(params) summary, return_ = control.simulate(graph.step, env_ctor, params.task.max_length, params.num_agents, agent_config, config.isolate_envs, expensive_summaries, name=name) return summary, return_
def simulate_episodes(config, params, graph, cleanups, expensive_summaries, gif_summary, name): def env_ctor(): #print("---------INSIDE ENV CTOR----------") env = params.task.env_ctor() #print("---CONFIG-----",config) #print("------PARAMS ALL ------",params) #print("--PARAMS TASK---",params.task) #print("---NAME---", name) #print("------ENV--",env) #assert 1==2 if params.save_episode_dir: env = control.wrappers.CollectGymDataset( env, params.save_episode_dir, logdir=params.curious_dir, is_curious=params.is_curious) env = control.wrappers.ConcatObservation(env, ['image']) return env #print("---------------SIMULATE EPISODES------------------") bind_or_none = lambda x, **kw: x and functools.partial(x, **kw) cell = graph.cell agent_config = tools.AttrDict(cell=cell, encoder=graph.encoder, planner=functools.partial(params.planner, graph=graph), objective=bind_or_none(params.objective, graph=graph), exploration=params.exploration, preprocess_fn=config.preprocess_fn, postprocess_fn=config.postprocess_fn, num_models=config.num_models) params = params.copy() with params.unlocked: params.update(agent_config) with agent_config.unlocked: agent_config.update(params) summary, return_, cleanup = control.simulate(graph.step, env_ctor, params.task.max_length, params.num_agents, agent_config, config.isolate_envs, expensive_summaries, gif_summary, name=name) cleanups.append(cleanup) # Work around tf.cond() tensor return type. return summary, return_