Ejemplo n.º 1
0
def visualize(
    logdir, outdir, num_agents, num_episodes, checkpoint=None,
    env_processes=True):
  """Recover checkpoint and render videos from it.

  Args:
    logdir: Logging directory of the trained algorithm.
    outdir: Directory to store rendered videos in.
    num_agents: Number of environments to simulate in parallel.
    num_episodes: Total number of episodes to simulate.
    checkpoint: Checkpoint name to load; defaults to most recent.
    env_processes: Whether to step environments in separate processes.
  """
  config = utility.load_config(logdir)
  with tf.device('/cpu:0'):
    batch_env = utility.define_batch_env(
        lambda: _create_environment(config, outdir),
        num_agents, env_processes)
    graph = utility.define_simulation_graph(
        batch_env, config.algorithm, config)
    total_steps = num_episodes * config.max_length
    loop = _define_loop(graph, total_steps)
  saver = utility.define_saver(
      exclude=(r'.*_temporary.*', r'global_step'))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
    utility.initialize_variables(
        sess, saver, config.logdir, checkpoint, resume=True)
    for unused_score in loop.run(sess, saver, total_steps):
      pass
  batch_env.close()
Ejemplo n.º 2
0
def main(_):
    """ Create or load configuration and launch the trainer.
    """
    if FLAGS.config == 'offense':
        data = np.load('bball_strategies/pretrain/data/off_obs.npy')
        label = np.load('bball_strategies/pretrain/data/off_actions.npy')
    elif FLAGS.config == 'defense':
        data = np.load('bball_strategies/pretrain/data/def_obs.npy')
        label = np.load('bball_strategies/pretrain/data/def_actions.npy')
    else:
        raise ValueError('{} is not an available config'.format(FLAGS.config))
    utility.set_up_logging()
    if not FLAGS.resume:
        logdir = FLAGS.logdir and os.path.expanduser(
            os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp,
                                                      FLAGS.config)))
    else:
        logdir = FLAGS.logdir
    if FLAGS.vis:
        outdir = os.path.join(logdir, 'train_output')
    else:
        outdir = None
    try:
        config = utility.load_config(logdir)
    except IOError:
        if not FLAGS.config:
            raise KeyError('You must specify a configuration.')
        config = tools.AttrDict(getattr(configs, FLAGS.config)())
        config = utility.save_config(config, logdir)
    train(config, data, label, outdir)
Ejemplo n.º 3
0
def main(_):
    """ Create or load configuration and launch the trainer.
    """
    utility.set_up_logging()
    if not FLAGS.resume:
        logdir = FLAGS.logdir and os.path.expanduser(
            os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp,
                                                      FLAGS.config)))
    else:
        logdir = FLAGS.logdir
    if FLAGS.vis:
        outdir = os.path.join(logdir, 'train_output')
    else:
        outdir = None

    try:
        config = utility.load_config(logdir)
    except IOError:
        if not FLAGS.config:
            raise KeyError('You must specify a configuration.')
        config = tools.AttrDict(getattr(configs, FLAGS.config)())
        config = utility.save_config(config, logdir)

    for score in train(config, FLAGS.env_processes, outdir):
        tf.logging.info('Score {}.'.format(score))
Ejemplo n.º 4
0
def visualize(
    logdir, outdir, num_agents, num_episodes, checkpoint=None,
    env_processes=True):
  """Recover checkpoint and render videos from it.

  Args:
    logdir: Logging directory of the trained algorithm.
    outdir: Directory to store rendered videos in.
    num_agents: Number of environments to simulate in parallel.
    num_episodes: Total number of episodes to simulate.
    checkpoint: Checkpoint name to load; defaults to most recent.
    env_processes: Whether to step environments in separate processes.
  """
  config = utility.load_config(logdir)
  with tf.device('/cpu:0'):
    batch_env = utility.define_batch_env(
        lambda: _create_environment(config, outdir),
        num_agents, env_processes)
    graph = utility.define_simulation_graph(
        batch_env, config.algorithm, config)
    total_steps = num_episodes * config.max_length
    loop = _define_loop(graph, total_steps)
  saver = utility.define_saver(
      exclude=(r'.*_temporary/.*', r'global_step'))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
    utility.initialize_variables(
        sess, saver, config.logdir, checkpoint, resume=True)
    for unused_score in loop.run(sess, saver, total_steps):
      pass
  batch_env.close()
def main(argv):
  del argv  # Unused.
  config = utility.load_config(LOG_DIR)
  policy_layers = config.policy_layers
  value_layers = config.value_layers
  env = config.env(render=True)
  network = config.network

  with tf.Session() as sess:
    agent = simple_ppo_agent.SimplePPOPolicy(
        sess,
        env,
        network,
        policy_layers=policy_layers,
        value_layers=value_layers,
        checkpoint=os.path.join(LOG_DIR, CHECKPOINT))

    sum_reward = 0
    observation = env.reset()
    while True:
      action = agent.get_action([observation])
      observation, reward, done, _ = env.step(action[0])
      # This sleep is to prevent serial communication error on the real robot.
      time.sleep(0.002)
      sum_reward += reward
      if done:
        break
    tf.logging.info("reward: %s", sum_reward)
Ejemplo n.º 6
0
def main(_):
  """Create or load configuration and launch the trainer."""
  utility.set_up_logging()
  if not FLAGS.config:
    raise KeyError('You must specify a configuration.')
  logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
  try:
    config = utility.load_config(logdir)
  except IOError:
    config = tools.AttrDict(getattr(configs, FLAGS.config)())
    config = utility.save_config(config, logdir)
  for score in train(config, FLAGS.env_processes):
    tf.logging.info('Score {}.'.format(score))
Ejemplo n.º 7
0
def main(_):
    FLAGS.logdir = '../../Log'
    FLAGS.config = 'pendulum'
    FLAGS.env_processes = False
    """Create or load configuration and launch the trainer."""
    utility.set_up_logging()
    if not FLAGS.config:
        raise KeyError('You must specify a configuration.')
    logdir = FLAGS.logdir and os.path.expanduser(
        os.path.join(FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp,
                                                  FLAGS.config)))
    try:
        config = utility.load_config(logdir)
    except IOError:
        config = tools.AttrDict(getattr(configs, FLAGS.config)())
        config = utility.save_config(config, logdir)
    global globalConfig
    globalConfig = config
    for score in train(config, FLAGS.env_processes):
        tf.logging.info('Score {}.'.format(score))
Ejemplo n.º 8
0
def main(_):
    """ Create or load configuration and launch the trainer.
    """
    off_data = np.load('bball_strategies/pretrain/data/off_obs.npy')
    off_label = np.load('bball_strategies/pretrain/data/off_actions.npy')
    def_data = np.load('bball_strategies/pretrain/data/def_obs.npy')
    def_label = np.load('bball_strategies/pretrain/data/def_actions.npy')

    utility.set_up_logging()

    logdir = FLAGS.logdir
    try:
        config = utility.load_config(logdir)
    except IOError:
        if not FLAGS.config:
            raise KeyError('You must specify a configuration.')
        config = tools.AttrDict(getattr(configs, FLAGS.config)())
        config = utility.save_config(config, logdir)
    outdir = os.path.expanduser(os.path.join(FLAGS.logdir, 'vis'))

    vis_data(off_data, off_label, def_data, def_label, outdir, start_idx=0)
    testing(config, off_data, off_label, def_data, def_label, outdir)
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'exp_path',
        type=str,
        nargs='*',
        help='Full experiment path (to the dir where the config is stored)')
    parser.add_argument(
        '-nep',
        '--no_env_process',
        default=False,
        action='store_true',
        help='Step environments in separate processes to circumvent the GIL')
    parser.add_argument('-r',
                        '--render',
                        default=False,
                        action='store_true',
                        help='Whether to render the run')
    parser.add_argument('-c',
                        '--cpu',
                        default=False,
                        action='store_true',
                        help='Whether to run the training on access1-cp')
    parser.add_argument('-e',
                        '--edgar',
                        default=False,
                        action='store_true',
                        help='Whether to run the training on edgar')
    parser.add_argument('-b',
                        '--besteffort',
                        default=False,
                        action='store_true',
                        help='Whether to run in besteffort mode')
    parser.add_argument('-nc',
                        '--nb_cores',
                        type=int,
                        default=8,
                        help='Number of cores to be used on the cluster')
    parser.add_argument('-w',
                        '--wallclock',
                        type=int,
                        default=72,
                        help='Job wall clock time to be set on the cluster')
    parser.add_argument(
        '-s',
        '--steps',
        type=int,
        default=None,
        help=
        'Number of steps of the experiment (if not None, change the config)')
    args = parser.parse_args()

    sys_path_clean = utils.get_sys_path_clean()
    seed_path, timestamp_dir = os.path.split(os.path.normpath(
        args.exp_path[0]))
    exp_path, _ = os.path.split(os.path.normpath(seed_path))
    exp_name = os.path.basename(exp_path)
    rendered_envs_path = '/home/thoth/apashevi/scratch_remote/Cache/Code/{}/rlgrasp/rendered_envs.py'.format(
        exp_name)
    if not args.cpu and not args.edgar:
        # run the job locally
        utils.change_sys_path(sys_path_clean, exp_path)
        import agents.scripts.train as trainer
        from agents.scripts import utility
        assert len(args.exp_path) == 1
        config = utility.load_config(args.exp_path[0])
        with config.unlocked:
            config.num_agents = 4
            if args.steps is not None:
                config.steps = args.steps

        utils.rewrite_rendered_envs_file(args.render, rendered_envs_path)
        for score in trainer.train(config, not args.no_env_process):
            print('Score {}'.format(score))

        if args.render:
            utils.rewrite_rendered_envs_file(False, rendered_envs_path)
    else:
        if args.edgar:
            cluster = 'edgar'
        else:
            cluster = 'access1-cp'
        utils.rewrite_rendered_envs_file(False, rendered_envs_path)
        job_cluster = utils.get_job(cluster, args.besteffort, args.nb_cores,
                                    args.wallclock)
        timestamp = timestamp_dir.split('-')[0]
        config = timestamp_dir.split('-')[1]
        if len(args.exp_path) == 1:
            send_job(job_cluster, seed_path, timestamp, config, args_steps)
        else:
            for exp_path_complete in args.exp_path:
                seed_path, _ = os.path.split(
                    os.path.normpath(exp_path_complete))
                send_job(job_cluster, seed_path, timestamp, config, args.steps)
Ejemplo n.º 10
0
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        utility.initialize_variables(
            sess, saver, config.logdir, checkpoint, resume=True)
        for unused_score in loop.run(sess, saver, total_steps):
          pass

    tf.reset_default_graph()

    return batch_env.rewards_list


import pickle


for i in range(len(checkpoints)):
    model = logdir+ runs[i] + checkpoints[i]
    #print('*'*100)
    #print(logdir + runs[i])
    config = utility.load_config(logdir + runs[i])

    for j in range(len(latency_index)):
        sampling_j= sampling_index[j]
        latency_j = latency_index[j]
        res = run_setting(config, model, sampling_j, latency_j, num_of_episodes)
        print(i, j,res)

        file_name = 'data/'+save_names[i]+'_'+str(sampling_j)+'_'+str(latency_j)+'.p'
        pickle.dump([res, sampling_j, latency_j, model, config], open(file_name, "wb" ) )