def hypertune(game):
    recreate_directory_structure()

    if not FLAGS.resume and FLAGS.train:
        for i in range(FLAGS.nb_hyperparam_runs):
            lr = 10**np.random.uniform(np.log10(10**(-2)), np.log10(
                (10**(-4))))
            gamma = np.random.uniform(0.7, 1.0)

            model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {
                "lr": lr,
                "gamma": gamma,
                "game": game,
                "model_name": model_name,
                "checkpoint_dir": checkpoint_dir,
                "summaries_dir": summaries_dir,
                "frames_dir": frames_dir
            }

            run(settings)
    else:
        model_instances = os.listdir(FLAGS.checkpoint_dir)
        lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances]
        gammas = [
            inst.split("__")[2].split("_")[1] for inst in model_instances
        ]

        game = model_instances[0].split("__")[0].split("_")[1]

        val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes)

        for i in range(len(model_instances)):
            lr = lrs[i]
            gamma = gammas[i]

            model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma)
            print(model_name)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {
                "lr": lr,
                "gamma": gamma,
                "game": game,
                "model_name": model_name,
                "checkpoint_dir": checkpoint_dir,
                "summaries_dir": summaries_dir,
                "frames_dir": frames_dir,
                "envs": val_envs
            }

            run(settings)
Exemple #2
0
def hypertune(game):
    recreate_directory_structure()

    if not FLAGS.resume and FLAGS.train:
        for i in range(FLAGS.nb_hyperparam_runs):
            lr = 10 ** np.random.uniform(np.log10(10 ** (-2)), np.log10((10 ** (-4))))
            gamma = np.random.uniform(0.7, 1.0)

            model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {"lr": lr,
                        "gamma": gamma,
                        "game": game,
                        "model_name": model_name,
                        "checkpoint_dir": checkpoint_dir,
                        "summaries_dir": summaries_dir,
                        "frames_dir": frames_dir}

            run(settings)
    else:
        model_instances = os.listdir(FLAGS.checkpoint_dir)
        lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances]
        gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances]

        game = model_instances[0].split("__")[0].split("_")[1]

        val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes)

        for i in range(len(model_instances)):
            lr = lrs[i]
            gamma = gammas[i]

            model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma)
            print(model_name)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {"lr": lr,
                        "gamma": gamma,
                        "game": game,
                        "model_name": model_name,
                        "checkpoint_dir": checkpoint_dir,
                        "summaries_dir": summaries_dir,
                        "frames_dir": frames_dir,
                        "envs": val_envs}

            run(settings)
def run_baseline():
    test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

    model_name = "baseline"

    settings = {
        "model_name": model_name,
        "game": FLAGS.game,
        "envs": test_envs,
        "exp_type": "evaluate_baseline"
    }

    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(RandomAgent(envs[i], i, settings))

    with tf.Session() as sess:
        coord = tf.train.Coordinator()

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(coord)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
def validate_hypertune():
    # recreate_directory_structure()

    model_instances = os.listdir(FLAGS.checkpoint_dir)
    lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances]
    gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances]

    game = model_instances[0].split("__")[0].split("_")[1]

    val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes)

    with concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
        for i in range(len(model_instances)):
            _ = executor.submit(thread_processing, lrs[i], gammas[i], val_envs, game)
def evaluate_one_test():

    # test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)
    #
    # checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)
    # summaries_dir = os.path.join(FLAGS.summaries_dir, FLAGS.model_name)
    # frames_dir = os.path.join(FLAGS.frames_dir, FLAGS.model_name)
    #
    # settings = {"lr": FLAGS.lr,
    #             "gamma": FLAGS.gamma,
    #             "game": FLAGS.game,
    #             "model_name": FLAGS.model_name,
    #             "checkpoint_dir": checkpoint_dir,
    #             "summaries_dir": summaries_dir,
    #             "frames_dir": frames_dir,
    #             "load_from": os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name),
    #             "envs": test_envs,
    #             "mode": "evaluate_once"}
    #
    # run(settings)
    for _ in range(5):
        model_name = "best_{}__lr_{}__gamma_{}".format(FLAGS.game, FLAGS.lr,
                                                       FLAGS.gamma)
        load_from_model_name = "d_{}__lr_{}__gamma_{}".format(
            FLAGS.best_model_game, FLAGS.lr, FLAGS.gamma)
        print(load_from_model_name)
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)
        test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

        settings = {
            "lr": FLAGS.lr,
            "gamma": FLAGS.gamma,
            "game": FLAGS.game,
            "model_name": model_name,
            "checkpoint_dir": checkpoint_dir,
            "summaries_dir": summaries_dir,
            "frames_dir": frames_dir,
            "load_from": os.path.join(FLAGS.checkpoint_dir,
                                      load_from_model_name),
            "envs": test_envs,
            "mode": "eval"
        }

        run(settings)

    with open(FLAGS.results_eval_file, "r") as f:
        line = f.readline()
    mean_regrets = line.rstrip('\n').split(' ')
Exemple #6
0
def test():
    if tf.gfile.Exists(FLAGS.results_test_file):
        os.remove(FLAGS.results_test_file)

    with open(FLAGS.results_val_file, "r") as f:
        content = f.readlines()
        lines = [line.rstrip('\n') for line in content]

        games = []
        lrs = []
        gammas = []
        mean_regrets = []
        mean_nb_subopt_armss = []
        for line in lines:
            results = line.split(" ")
            results = results[1:]
            game, lr, gamma, mean_regret, mean_nb_subopt_arms = [r.split("=")[1] for r in results]
            games.append(game)
            lrs.append(lr)
            gammas.append(gamma)
            mean_regrets.append(mean_regret)
            mean_nb_subopt_armss.append(mean_nb_subopt_arms)
        indices_best_n = np.asarray(mean_regrets).argsort()[-FLAGS.top:][::-1]
        best_lrs = [lrs[i] for i in indices_best_n]
        best_gammas = [gammas[i] for i in indices_best_n]
        best_game = games[0]

        test_envs = TwoArms.get_envs(best_game, FLAGS.nb_test_episodes)

        for i in range(len(indices_best_n)):
            model_name = "best_{}__lr_{}__gamma_{}".format(best_game, best_lrs[i], best_gammas[i])
            load_from_model_name = "d_{}__lr_{}__gamma_{}".format(best_game, best_lrs[i], best_gammas[i])
            print(model_name)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {"lr": best_lrs[i],
                        "gamma": best_gammas[i],
                        "game": best_game,
                        "model_name": model_name,
                        "checkpoint_dir": checkpoint_dir,
                        "summaries_dir": summaries_dir,
                        "frames_dir": frames_dir,
                        "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name),
                        "envs": test_envs}

            run(settings)
Exemple #7
0
def evaluate_one_test():

    # test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)
    #
    # checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)
    # summaries_dir = os.path.join(FLAGS.summaries_dir, FLAGS.model_name)
    # frames_dir = os.path.join(FLAGS.frames_dir, FLAGS.model_name)
    #
    # settings = {"lr": FLAGS.lr,
    #             "gamma": FLAGS.gamma,
    #             "game": FLAGS.game,
    #             "model_name": FLAGS.model_name,
    #             "checkpoint_dir": checkpoint_dir,
    #             "summaries_dir": summaries_dir,
    #             "frames_dir": frames_dir,
    #             "load_from": os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name),
    #             "envs": test_envs,
    #             "mode": "evaluate_once"}
    #
    # run(settings)
    for _ in range(5):
        model_name = "best_{}__lr_{}__gamma_{}".format(FLAGS.game, FLAGS.lr, FLAGS.gamma)
        load_from_model_name = "d_{}__lr_{}__gamma_{}".format(FLAGS.best_model_game, FLAGS.lr, FLAGS.gamma)
        print(load_from_model_name)
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)
        test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

        settings = {"lr": FLAGS.lr,
                    "gamma": FLAGS.gamma,
                    "game": FLAGS.game,
                    "model_name": model_name,
                    "checkpoint_dir": checkpoint_dir,
                    "summaries_dir": summaries_dir,
                    "frames_dir": frames_dir,
                    "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name),
                    "envs": test_envs,
                    "mode": "eval"}

        run(settings)

    with open(FLAGS.results_eval_file, "r") as f:
        line = f.readline()
    mean_regrets = line.rstrip('\n').split(' ')
    mean_regrets = [float(rg) for rg in mean_regrets]
    mean_rg_avg = np.mean(mean_regrets)
    print("Avg regret for the model is {}".format(mean_rg_avg))
Exemple #8
0
def validate_hypertune():
    # recreate_directory_structure()

    model_instances = os.listdir(FLAGS.checkpoint_dir)
    lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances]
    gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances]

    game = model_instances[0].split("__")[0].split("_")[1]

    val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes)

    with concurrent.futures.ThreadPoolExecutor(
            max_workers=multiprocessing.cpu_count()) as executor:
        for i in range(len(model_instances)):
            _ = executor.submit(thread_processing, lrs[i], gammas[i], val_envs,
                                game)
Exemple #9
0
def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  name='global_episodes',
                                  trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            # print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except Exception as e:
                print(sys.exc_info()[0])
                print(e)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
def run_one_test():

    if FLAGS.train:
        recreate_directory_structure()

        model_name = "one_test"
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)

        settings = {
            "lr": FLAGS.lr,
            "gamma": FLAGS.gamma,
            "game": FLAGS.game,
            "model_name": model_name,
            "checkpoint_dir": checkpoint_dir,
            "summaries_dir": summaries_dir,
            "frames_dir": frames_dir
        }
    else:
        test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

        model_name = "one_test"
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)

        settings = {
            "lr": FLAGS.lr,
            "gamma": FLAGS.gamma,
            "game": FLAGS.game,
            "model_name": model_name,
            "checkpoint_dir": checkpoint_dir,
            "summaries_dir": summaries_dir,
            "frames_dir": frames_dir,
            "load_from": os.path.join(FLAGS.checkpoint_dir, model_name),
            "envs": test_envs
        }

    run(settings)
Exemple #11
0
def run_baseline():
    test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

    model_name = "baseline"

    settings = {"model_name": model_name,
                "game": FLAGS.game,
                "envs": test_envs,
                "exp_type": "evaluate_baseline"}

    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(RandomAgent(envs[i], i, settings))

    with tf.Session() as sess:
        coord = tf.train.Coordinator()

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(coord)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)
Exemple #12
0
def run_one_test():

    if FLAGS.train:
        recreate_directory_structure()

        model_name = "one_test"
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)

        settings = {"lr": FLAGS.lr,
                    "gamma": FLAGS.gamma,
                    "game": FLAGS.game,
                    "model_name": model_name,
                    "checkpoint_dir": checkpoint_dir,
                    "summaries_dir": summaries_dir,
                    "frames_dir": frames_dir}
    else:
        test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

        model_name = "one_test"
        checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
        summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
        frames_dir = os.path.join(FLAGS.frames_dir, model_name)

        settings = {"lr": FLAGS.lr,
                    "gamma": FLAGS.gamma,
                    "game": FLAGS.game,
                    "model_name": model_name,
                    "checkpoint_dir": checkpoint_dir,
                    "summaries_dir": summaries_dir,
                    "frames_dir": frames_dir,
                    "load_from": os.path.join(FLAGS.checkpoint_dir, model_name),
                    "envs": test_envs}

    run(settings)
def test_hypertune():
    if tf.gfile.Exists(FLAGS.results_test_file):
        os.remove(FLAGS.results_test_file)

    with open(FLAGS.results_val_file, "r") as f:
        content = f.readlines()
        lines = [line.rstrip('\n') for line in content]

        games = []
        lrs = []
        gammas = []
        mean_regrets = []
        mean_nb_subopt_armss = []
        for line in lines:
            results = line.split(" ")
            results = results[1:]
            game, lr, gamma, mean_regret, mean_nb_subopt_arms = [
                r.split("=")[1] for r in results
            ]
            games.append(game)
            lrs.append(lr)
            gammas.append(gamma)
            mean_regrets.append(float(mean_regret))
            mean_nb_subopt_armss.append(float(mean_nb_subopt_arms))
        indices_best_n = np.asarray(mean_regrets).argsort()[:FLAGS.top]
        best_mean_regrests = [mean_regrets[i] for i in indices_best_n]
        best_lrs = [lrs[i] for i in indices_best_n]
        best_gammas = [gammas[i] for i in indices_best_n]
        best_game = FLAGS.best_model_game

        test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes)

        for i in range(len(indices_best_n)):
            model_name = "best_{}__lr_{}__gamma_{}".format(
                best_game, best_lrs[i], best_gammas[i])
            load_from_model_name = "d_{}__lr_{}__gamma_{}".format(
                best_game, best_lrs[i], best_gammas[i])
            print(model_name)
            checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name)
            summaries_dir = os.path.join(FLAGS.summaries_dir, model_name)
            frames_dir = os.path.join(FLAGS.frames_dir, model_name)

            settings = {
                "lr":
                best_lrs[i],
                "gamma":
                best_gammas[i],
                "game":
                FLAGS.game,
                "model_name":
                model_name,
                "checkpoint_dir":
                checkpoint_dir,
                "summaries_dir":
                summaries_dir,
                "frames_dir":
                frames_dir,
                "load_from":
                os.path.join(FLAGS.checkpoint_dir, load_from_model_name),
                "envs":
                test_envs,
                "mode":
                "test"
            }

            run(settings)