Example #1
0
def simulate_episodes(
    config, params, graph, cleanups, expensive_summaries, gif_summary, name):
  def env_ctor():
    env = params.task.env_ctor()
    if params.save_episode_dir:
      env = control.wrappers.CollectGymDataset(env, params.save_episode_dir)
    env = control.wrappers.ConcatObservation(env, ['image'])
    return env
  bind_or_none = lambda x, **kw: x and functools.partial(x, **kw)
  cell = graph.cell
  agent_config = tools.AttrDict(
      cell=cell,
      encoder=graph.encoder,
      planner=functools.partial(params.planner, graph=graph),
      objective=bind_or_none(params.objective, graph=graph),
      exploration=params.exploration,
      preprocess_fn=config.preprocess_fn,
      postprocess_fn=config.postprocess_fn)
  params = params.copy()
  with params.unlocked:
    params.update(agent_config)
  with agent_config.unlocked:
    agent_config.update(params)
  summary, return_, cleanup = control.simulate(
      graph.step, env_ctor, params.task.max_length,
      params.num_agents, agent_config, config.isolate_envs,
      expensive_summaries, gif_summary, name=name)
  cleanups.append(cleanup)  # Work around tf.cond() tensor return type.
  return summary, return_
Example #2
0
def simulate_episodes(config, params, graph, name):
  def env_ctor():
    env = params.task.env_ctor()
    if params.save_episode_dir:
      env = control.wrappers.CollectGymDataset(env, params.save_episode_dir)
    env = control.wrappers.ConcatObservation(env, ['image'])
    return env
  cell = graph.cell
  agent_config = tools.AttrDict(
      cell=cell,
      encoder=graph.encoder,
      planner=params.planner,
      objective=functools.partial(params.objective, graph=graph),
      exploration=params.exploration,
      preprocess_fn=config.preprocess_fn,
      postprocess_fn=config.postprocess_fn)
  params = params.copy()
  params.update(agent_config)
  agent_config.update(params)
  # Batch size larger crashes so we simulate the episodes individually.
  summaries, returns = [], []
  for index in range(params.batch_size):
    # with tf.control_dependencies(summaries + returns):
    with tf.variable_scope('simulate-{}'.format(index + 1)):
      summary, return_ = control.simulate(
          graph.step, env_ctor, params.task.max_length,
          1, agent_config, name=name)
    summaries.append(summary)
    returns.append(return_)
  summary = tf.summary.merge(summaries)
  return_ = tf.reduce_mean(returns)
  return summary, return_
Example #3
0
def simulate_episodes(config, params, graph, expensive_summaries, name):
    def env_ctor():
        env = params.task.env_ctor()
        if params.save_episode_dir:
            env = control.wrappers.CollectGymDataset(env,
                                                     params.save_episode_dir)
        env = control.wrappers.ConcatObservation(env, ['image'])
        return env

    cell = graph.cell
    agent_config = tools.AttrDict(cell=cell,
                                  encoder=graph.encoder,
                                  planner=params.planner,
                                  objective=functools.partial(params.objective,
                                                              graph=graph),
                                  exploration=params.exploration,
                                  preprocess_fn=config.preprocess_fn,
                                  postprocess_fn=config.postprocess_fn)
    params = params.copy()
    params.update(agent_config)
    agent_config.update(params)
    summary, return_ = control.simulate(graph.step,
                                        env_ctor,
                                        params.task.max_length,
                                        params.num_agents,
                                        agent_config,
                                        config.isolate_envs,
                                        expensive_summaries,
                                        name=name)
    return summary, return_
Example #4
0
def simulate_episodes(config, params, graph, cleanups, expensive_summaries,
                      gif_summary, name):
    def env_ctor():
        #print("---------INSIDE ENV CTOR----------")
        env = params.task.env_ctor()
        #print("---CONFIG-----",config)
        #print("------PARAMS ALL ------",params)
        #print("--PARAMS TASK---",params.task)
        #print("---NAME---", name)
        #print("------ENV--",env)
        #assert 1==2
        if params.save_episode_dir:
            env = control.wrappers.CollectGymDataset(
                env,
                params.save_episode_dir,
                logdir=params.curious_dir,
                is_curious=params.is_curious)
        env = control.wrappers.ConcatObservation(env, ['image'])
        return env

    #print("---------------SIMULATE EPISODES------------------")
    bind_or_none = lambda x, **kw: x and functools.partial(x, **kw)
    cell = graph.cell
    agent_config = tools.AttrDict(cell=cell,
                                  encoder=graph.encoder,
                                  planner=functools.partial(params.planner,
                                                            graph=graph),
                                  objective=bind_or_none(params.objective,
                                                         graph=graph),
                                  exploration=params.exploration,
                                  preprocess_fn=config.preprocess_fn,
                                  postprocess_fn=config.postprocess_fn,
                                  num_models=config.num_models)
    params = params.copy()
    with params.unlocked:
        params.update(agent_config)
    with agent_config.unlocked:
        agent_config.update(params)
    summary, return_, cleanup = control.simulate(graph.step,
                                                 env_ctor,
                                                 params.task.max_length,
                                                 params.num_agents,
                                                 agent_config,
                                                 config.isolate_envs,
                                                 expensive_summaries,
                                                 gif_summary,
                                                 name=name)
    cleanups.append(cleanup)  # Work around tf.cond() tensor return type.
    return summary, return_