def testEvalOnce(self):
        pe = PolicyMonitor(env=self.env,
                           policy_net=self.global_policy_net,
                           summary_writer=self.summary_writer)

        with self.test_session() as sess:
            sess.run(tf.initialize_all_variables())
            total_reward, episode_length = pe.eval_once(sess)
            self.assertTrue(episode_length > 0)
  def testEvalOnce(self):
    pe = PolicyMonitor(
      env=self.env,
      policy_net=self.global_policy_net,
      summary_writer=self.summary_writer)

    with self.test_session() as sess:
      sess.run(tf.initialize_all_variables())
      total_reward, episode_length = pe.eval_once(sess)
      self.assertTrue(episode_length > 0)
Exemple #3
0
            worker_summary_writer = summary_writer

        worker = Worker(name="worker_{}".format(worker_id),
                        env=make_env(),
                        policy_net=policy_net,
                        value_net=value_net,
                        global_counter=global_counter,
                        discount_factor=0.99,
                        summary_writer=worker_summary_writer,
                        max_global_steps=FLAGS.max_global_steps)
        workers.append(worker)

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=2.0, max_to_keep=10)

    pe = PolicyMonitor(env=make_env(wrap=False),
                       policy_net=policy_net,
                       summary_writer=summary_writer,
                       saver=saver)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()

    latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)

    if latest_checkpoint:
        print("Loading model checkpoint: {}".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    worker_threads = []

    for worker in workers:
# Session configs
if FLAGS.gpuid is not None:
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpuid
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.device("/cpu:0"):

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=1.0, max_to_keep=1)

    # Used to occasionally write episode rewards to Tensorboard
    pe = PolicyMonitor(envs=make_envs(),
                       policy_net=policy_net,
                       domain=FLAGS.domain,
                       instances=instances,
                       neighbourhood=FLAGS.neighbourhood,
                       summary_writer=val_summary_writer,
                       saver=saver)

results = {}

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()

    for file in os.listdir(FLAGS.restore_dir + "/checkpoints"):
        print(file)
        if file[-4:] == "meta":
            file_num = int(file[6:-5])
Exemple #5
0
      env=make_env(),
      policy_net=policy_net,
      value_net=value_net,
      global_counter=global_counter,
      discount_factor = 0.99,
      summary_writer=worker_summary_writer,
      max_global_steps=FLAGS.max_global_steps)
    workers.append(worker)

  saver = tf.train.Saver(keep_checkpoint_every_n_hours=2.0, max_to_keep=10)

  # Used to occasionally save videos for our policy net
  # and write episode rewards to Tensorboard
  pe = PolicyMonitor(
    env=make_env(),
    policy_net=policy_net,
    summary_writer=summary_writer,
    saver=saver)

with tf.Session() as sess:
  sess.run(tf.initialize_all_variables())
  coord = tf.train.Coordinator()

  # Load a previous checkpoint if it exists
  latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
  if latest_checkpoint:
    print("Loading model checkpoint: {}".format(latest_checkpoint))
    saver.restore(sess, latest_checkpoint)

  # Start worker threads
  worker_threads = []
Exemple #6
0
def main():

    # Depending on the game we may have a limited action space
    env_ = gym.make(FLAGS.env)
    num_actions = env_.action_space.n
    dim_obs = list(env_.observation_space.shape)
    assert len(dim_obs) == 3 and dim_obs[2] == 3  #make sure it is a RGB frame
    N_FRAME = FLAGS.n_frame if FLAGS.n_frame else 1
    dim_obs[2] *= N_FRAME
    print("Valid number of actions is {}".format(num_actions))
    print("The dimension of the observation space is {}".format(dim_obs))
    env_.close()

    # Set the number of workers
    NUM_WORKERS = (FLAGS.parallelism
                   if FLAGS.parallelism else multiprocessing.cpu_count())

    MODEL_DIR = FLAGS.model_dir
    CP_H = FLAGS.checkpoint_hour
    CHECKPOINT_DIR = os.path.join(MODEL_DIR, "checkpoints")
    TENSORBOARD_DIR = os.path.join(MODEL_DIR, "tb")

    # Optionally empty model directory
    if FLAGS.reset:
        shutil.rmtree(MODEL_DIR, ignore_errors=True)

    if not os.path.exists(CHECKPOINT_DIR):
        os.makedirs(CHECKPOINT_DIR)

    summary_writer = tf.summary.FileWriter(TENSORBOARD_DIR)

    with tf.device("/cpu:0"):

        # Keeps track of the number of updates we've performed
        global_step = tf.Variable(0, name="global_step", trainable=False)

        # Global policy and value nets
        with tf.variable_scope("global") as vs:
            policy_net = PolicyEstimator(num_outputs=num_actions,
                                         dim_inputs=dim_obs)
            value_net = ValueEstimator(reuse=True, dim_inputs=dim_obs)

        # Global step iterator
        global_counter = itertools.count()

        # Create worker graphs
        workers = []
        for worker_id in range(NUM_WORKERS):
            # We only write summaries in one of the workers because they're
            # pretty much identical and writing them on all workers
            # would be a waste of space
            worker_summary_writer = None
            if worker_id == 0:
                worker_summary_writer = summary_writer

            worker = Worker(name="worker_{}".format(worker_id),
                            env=gym.make(FLAGS.env),
                            policy_net=policy_net,
                            value_net=value_net,
                            global_counter=global_counter,
                            discount_factor=0.99,
                            summary_writer=worker_summary_writer,
                            max_global_steps=FLAGS.max_global_steps,
                            n_frame=N_FRAME)
            workers.append(worker)

        saver = tf.train.Saver(keep_checkpoint_every_n_hours=CP_H,
                               max_to_keep=10)

        # Used to occasionally save videos for our policy net
        # and write episode rewards to Tensorboard
        pe = PolicyMonitor(env=gym.make(FLAGS.env),
                           policy_net=policy_net,
                           summary_writer=summary_writer,
                           saver=saver)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()

        # Load a previous checkpoint if it exists
        latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
        if latest_checkpoint:
            print("Loading model checkpoint: {}".format(latest_checkpoint))
            saver.restore(sess, latest_checkpoint)

        # Start worker threads
        worker_threads = []
        for worker in workers:
            print("starting worker:")
            worker_fn = lambda: worker.run(sess, coord, FLAGS.t_max)
            t = threading.Thread(target=worker_fn)
            t.start()
            worker_threads.append(t)

        # Start a thread for policy eval task
        monitor_thread = threading.Thread(
            target=lambda: pe.continuous_eval(FLAGS.eval_every, sess, coord))
        monitor_thread.start()

        # Wait for all workers to finish
        coord.join(worker_threads)
Exemple #7
0
                        domain=FLAGS.domain,
                        instances=instances,
                        N_train=N_train_instances,
                        neighbourhood=FLAGS.neighbourhood,
                        discount_factor=0.99,
                        summary_writer=worker_summary_writer,
                        max_global_steps=FLAGS.max_global_steps)
        workers.append(worker)

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.25)

    # Used to occasionally write episode rewards to Tensorboard
    pe = PolicyMonitor(envs=make_envs(),
                       policy_net=policy_net,
                       domain=FLAGS.domain,
                       instances=instances,
                       neighbourhood=FLAGS.neighbourhood,
                       summary_writer=val_summary_writer,
                       saver=saver)

# print([n.name for n in tf.get_default_graph().as_graph_def().node])

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    # train_summary_writer.add_graph(sess.graph)
    # val_summary_writer.add_graph(sess.graph)
    coord = tf.train.Coordinator()

    if FLAGS.use_pretrained:
        # Load a previous checkpoint if it exists
        ckpt = tf.train.get_checkpoint_state(
      env=make_env(),
      policy_net=policy_net,
      value_net=value_net,
      global_counter=global_counter,
      discount_factor = 0.99,
      summary_writer=worker_summary_writer,
      max_global_steps=FLAGS.max_global_steps)
    workers.append(worker)

  saver = tf.train.Saver(keep_checkpoint_every_n_hours=2.0, max_to_keep=10)

  # Used to occasionally save videos for our policy net
  # and write episode rewards to Tensorboard
  pe = PolicyMonitor(
    env=make_env(wrap=False),
    policy_net=policy_net,
    summary_writer=summary_writer,
    saver=saver)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  coord = tf.train.Coordinator()

  # Load a previous checkpoint if it exists
  latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
  if latest_checkpoint:
    print("Loading model checkpoint: {}".format(latest_checkpoint))
    saver.restore(sess, latest_checkpoint)

  # Start worker threads
  worker_threads = []
Exemple #9
0
        worker = Worker(name="worker_{}".format(worker_id),
                        env=make_env(thread_name=str(worker_id)),
                        policy_net=policy_net,
                        value_net=value_net,
                        global_counter=global_counter,
                        discount_factor=0.99,
                        summary_writer=worker_summary_writer,
                        max_global_steps=FLAGS.max_global_steps)
        workers.append(worker)

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=2.0, max_to_keep=10)

    # Used to occasionally save videos for our policy net
    # and write episode rewards to Tensorboard
    pe = PolicyMonitor(env=make_env(thread_name=str("p")),
                       policy_net=policy_net,
                       summary_writer=summary_writer,
                       saver=saver)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()

    # Load a previous checkpoint if it exists
    latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
    if latest_checkpoint:
        print("Loading model checkpoint: {}".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    # Start worker threads
    worker_threads = []
    for worker in workers:
Exemple #10
0
        worker = Worker(name="worker_{}".format(worker_id),
                        env=getEnv(FLAGS, PORT + worker_id + 1),
                        policy_net=policy_net,
                        value_net=value_net,
                        global_counter=global_counter,
                        discount_factor=0.99,
                        summary_writer=worker_summary_writer,
                        max_global_steps=FLAGS.max_global_steps)
        workers.append(worker)

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=2.0, max_to_keep=10)

    # Used to occasionally save videos for our policy net
    # and write episode rewards to Tensorboard
    pe = PolicyMonitor(env=_env,
                       policy_net=policy_net,
                       summary_writer=summary_writer,
                       saver=saver)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()

    # Load a previous checkpoint if it exists
    latest_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
    if latest_checkpoint:
        print("Loading model checkpoint: {}".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    # Start worker threads
    worker_threads = []
    for worker in workers: