Example #1
0
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  name='global_episodes',
                                  trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            # print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except Exception as e:
                print(sys.exc_info()[0])
                print(e)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
def run_baseline():
    test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

    model_name = "baseline"

    settings = {
        "model_name": model_name,
        "game": FLAGS.game,
        "envs": test_envs,
        "exp_type": "evaluate_baseline"
    }

    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(RandomAgent(envs[i], i, settings))

    with tf.Session() as sess:
        coord = tf.train.Coordinator()

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(coord)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)