Esempio n. 1
0
    def __init__(self, game, thread_id, optimizer, global_step):
        self.name = "worker_" + str(thread_id)
        self.thread_id = thread_id
        self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)
        self.optimizer = optimizer
        self.global_episode = global_step
        self.increment_global_episode = self.global_episode.assign_add(1)
        self.episode_rewards = []

        # if not FLAGS.train:
        self.episode_optimal_rewards = []
        self.episodes_suboptimal_arms = []

        self.episode_lengths = []
        self.episode_mean_values = []
        self.summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.summaries_dir, FLAGS.model_name) + "/worker_" + str(self.thread_id))
        self.summary = tf.Summary()

        if FLAGS.use_conv:
            self.local_AC = ConvNetwork(self.name, optimizer, self.global_episode)
        else:
            self.local_AC = ACNetwork(self.name, optimizer, self.global_episode)

        self.update_local_vars = update_target_graph('global', self.name)
        self.env = game
Esempio n. 2
0
    def __init__(self, game, sess, thread_id, nb_actions, optimizer,
                 global_step):
        self.name = "worker_" + str(thread_id)
        self.thread_id = thread_id
        self.model_path = FLAGS.checkpoint_dir
        self.trainer = optimizer
        self.global_episode = global_step
        self.increment_global_episode = self.global_episode.assign_add(1)
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []

        self.sess = sess
        self.graph = sess.graph
        # self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir + "/worker_" + str(self.thread_id), self.graph)
        self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                                    "/worker_" +
                                                    str(self.thread_id))
        self.summary = tf.Summary()

        if FLAGS.lstm:
            self.local_AC = ACNetworkLSTM(self.name, nb_actions, optimizer)
        else:
            self.local_AC = ACNetwork(self.name, nb_actions, optimizer)

        self.update_local_ops = update_target_graph('global', self.name)

        self.actions = np.zeros([nb_actions])
        self.env = game
def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      name='global_episodes',
                                      trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            if FLAGS.use_conv:
                global_network = ConvNetwork('global', None)
            else:
                global_network = ACNetwork('global', None)

            # num_agents = multiprocessing.cpu_count()
            num_agents = FLAGS.nb_concurrent
            agents = []
            envs = []

            for i in range(num_agents):
                gym_env = gym.make(FLAGS.game)
                # if FLAGS.monitor:
                #     gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
                envs.append(gym_env)

            for i in range(num_agents):
                agents.append(Agent(envs[i], i, optimizer, global_step))
            saver = tf.train.Saver(max_to_keep=5)

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            thread = threading.Thread(
                target=(lambda: agent.play(sess, coord, saver)))
            thread.start()
            agent_threads.append(thread)

        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.render()

        coord.join(agent_threads)
Esempio n. 4
0
 def __init__(self, game, nb_actions, optimizer, global_step):
     self.name = "policy_eval"
     if FLAGS.lstm:
         self.local_AC = ACNetworkLSTM(self.name, nb_actions, optimizer)
     else:
         self.local_AC = ACNetwork(self.name, nb_actions, optimizer)
     self.update_local_ops = update_target_graph('global', self.name)
     self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                                 "/policy_eval")
     self.env = game
     self.actions = np.zeros([nb_actions])
     self.global_episode = global_step
def run():
    tf.reset_default_graph()

    sess = tf.Session()
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      name='global_episodes',
                                      trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            if FLAGS.use_conv:
                global_network = ConvNetwork('global', None)
            else:
                global_network = ACNetwork('global', None)
            saver = tf.train.Saver(max_to_keep=5)

            if FLAGS.resume:
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
                print("Loading Model from {}".format(
                    ckpt.model_checkpoint_path))
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(tf.global_variables_initializer())

            gym_env_monitor = gym.make(FLAGS.game)
            if FLAGS.monitor:
                gym_env_monitor = gym.wrappers.Monitor(
                    gym_env_monitor,
                    os.path.join(FLAGS.test_experiments_dir, FLAGS.model_name),
                    force=True)

            pe = PolicyMonitor(game=gym_env_monitor,
                               optimizer=optimizer,
                               global_step=global_step)

        coord = tf.train.Coordinator()

        # Start a thread for policy eval task
        monitor_thread = threading.Thread(
            target=lambda: pe.eval_nb_test_episodes(sess))
        monitor_thread.start()
        import time
        while True:
            if FLAGS.show_training:
                time.sleep(1)
                with main_lock:
                    gym_env_monitor.render()

        coord.join([monitor_thread])
Esempio n. 6
0
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  name='global_episodes',
                                  trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            # print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except Exception as e:
                print(sys.exc_info()[0])
                print(e)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
def run():

    tf.reset_default_graph()

    with tf.Session() as sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
            # optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, 0.99, 0.0, 1e-6)
            gym_env_monitor = gym.make(FLAGS.game)
            gym_env_monitor.seed(FLAGS.seed)
            gym_env_monitor_wrapper = AtariEnvironment(gym_env=gym_env_monitor, resized_width=FLAGS.resized_width,
                                                       resized_height=FLAGS.resized_height,
                                                       agent_history_length=FLAGS.agent_history_length)
            nb_actions = len(gym_env_monitor_wrapper.gym_actions)

            if FLAGS.lstm:
                global_network = ACNetworkLSTM('global', nb_actions, None)
            else:
                global_network = ACNetwork('global', nb_actions, None)


            saver = tf.train.Saver(max_to_keep=5)

        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        pe = PolicyMonitor(
            game=gym_env_monitor_wrapper,
            nb_actions=nb_actions,
            optimizer=optimizer,
            global_step=global_step
        )
        pe.eval_1000(sess)
    def __init__(self, game, thread_id, optimizer, global_step, settings):
        self.name = "agent_" + str(thread_id)
        self.thread_id = thread_id
        self.model_path = settings["checkpoint_dir"]
        self.settings = settings
        self.optimizer = optimizer
        self.global_episode = global_step
        self.increment_global_episode = self.global_episode.assign_add(1)
        self.episode_rewards = []

        # if not FLAGS.train:
        self.episode_regrets = []
        self.episodes_suboptimal_arms = []

        self.episode_lengths = []
        self.episode_mean_values = []
        self.summary_writer = tf.summary.FileWriter(settings["summaries_dir"] +
                                                    "/agent_" +
                                                    str(self.thread_id))
        self.summary = tf.Summary()

        self.local_AC = ACNetwork(self.name, optimizer, self.global_episode)
        self.update_local_vars = update_target_graph('global', self.name)
        self.env = game
Esempio n. 9
0
def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      name='global_episodes',
                                      trainable=False)
            # optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, 0.99, 0.0, 1e-6)

            num_workers = FLAGS.nb_concurrent
            #num_workers = multiprocessing.cpu_count() - 1
            workers = []
            envs = []

            for i in range(num_workers):
                gym_env = gym.make(FLAGS.game)
                if FLAGS.seed:
                    gym_env.seed(FLAGS.seed)

                if FLAGS.monitor:
                    gym_env = gym.wrappers.Monitor(
                        gym_env,
                        FLAGS.experiments_dir + '/worker_{}'.format(i))
                this_env = AtariEnvironment(
                    gym_env=gym_env,
                    resized_width=FLAGS.resized_width,
                    resized_height=FLAGS.resized_height,
                    agent_history_length=FLAGS.agent_history_length)

                envs.append(this_env)
            nb_actions = len(envs[0].gym_actions)

            if FLAGS.lstm:
                global_network = ACNetworkLSTM('global', nb_actions, None)
            else:
                global_network = ACNetwork('global', nb_actions, None)

            for i in range(num_workers):
                workers.append(
                    Worker(envs[i], sess, i, nb_actions, optimizer,
                           global_step))
            saver = tf.train.Saver(max_to_keep=5)

            # gym_env_monitor = gym.make(FLAGS.game)
            # gym_env_monitor.seed(FLAGS.seed)
            # gym_env_monitor_wrapper = AtariEnvironment(gym_env=gym_env_monitor, resized_width=FLAGS.resized_width,
            #                                            resized_height=FLAGS.resized_height,
            #                                            agent_history_length=FLAGS.agent_history_length)
            # nb_actions = len(gym_env_monitor_wrapper.gym_actions)
            # pe = PolicyMonitor(
            #     game=gym_env_monitor_wrapper,
            #     nb_actions=nb_actions,
            #     optimizer=optimizer,
            #     global_step=global_step
            # )

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        worker_threads = []
        for worker in workers:
            t = threading.Thread(target=(lambda: worker.play(coord, saver)))
            t.start()
            worker_threads.append(t)

        # Start a thread for policy eval task
        # monitor_thread = threading.Thread(target=lambda: pe.continuous_eval(FLAGS.eval_every, sess, coord))
        # monitor_thread.start()
        import time
        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.env.render()

        coord.join(worker_threads)