def simulate_learned_state_action_distribution(r1_best_dists, r2_best_dists):
    """Function to simulate AL learned action distribution for both the agents
    """
    start_state = random.choice(task_start_states_list)
    n_actions = sf.run_simulation(random.choice(r1_best_dists[start_state]), random.choice(r2_best_dists[start_state]), start_state)
    lgr.debug("Total number of actions by agents using least actions policy is %d" % n_actions)
    return n_actions
def simulate_random_state_action_distribution():
    """Function to simulate a random action distribution for both the agents
    """
    r1_dist = compute_random_state_action_distribution()
    r2_dist = compute_random_state_action_distribution()
    start_state = random.choice(task_start_states_list)
    n_actions = sf.run_simulation(r1_dist, r2_dist, start_state)
    lgr.debug("Total number of actions by agents using expert policy is %d" % n_actions)
    return n_actions
Example #3
0
def getBestDists(n_trials = 100):
    lgr.info("Loading dists.pickle file")
    with open("../pickles/dists.pickle", "r") as dists_file:
        r1_dists = pickle.load(dists_file)
        r2_dists = pickle.load(dists_file)

    n_actions_learned = np.zeros(n_trials)
    r1_best_dists = dict()
    r2_best_dists = dict()
    all_modes = list()
    lgr.info("%s", colored("Running simulation amongst %d state action distribution for getting the top 10 with lowest number of actions for various start states for %d trials" % (len(r1_dists), n_trials), 'white', attrs = ['bold']))
    for start_state in task_start_states_list:
        r1_best_dists[start_state] = list()
        r2_best_dists[start_state] = list()
        modes = np.zeros(len(r1_dists))
        for state_action_dist_idx in range(len(r1_dists)):
            r1_dist = r1_dists[state_action_dist_idx]
            r2_dist = r2_dists[state_action_dist_idx]
            for i in range(n_trials):
                n_actions_learned[i] = sf.run_simulation(r1_dist, r2_dist, start_state)
            modes[state_action_dist_idx] = stats.mode(n_actions_learned)[0][0]

        best_indices = np.where(modes == modes.min())[0]
        all_modes.append(modes)
        if len(best_indices) > MAX_BEST_STATE_ACTION_DISTS:
            best_indices = np.random.choice(best_indices, MAX_BEST_STATE_ACTION_DISTS, replace = False)

        lgr.info("%s", colored("Start State: %s" % str(start_state), 'yellow', attrs = ['bold']))
        lgr.info("%s", colored("Smallest mode: %d" % modes.min(), 'white', attrs = ['bold']))
        lgr.info("%s", colored("Distribution indices %s" % str(best_indices), 'white', attrs = ['bold']))

        for best_idx in np.nditer(best_indices):
            r1_best_dists[start_state].append(r1_dists[best_idx])
            r2_best_dists[start_state].append(r2_dists[best_idx])

    lgr.info("Writing the start state to best numpy distribution list dictionary to best_dists.pickle")
    with open("../pickles/best_dists.pickle", "wb") as best_dists_file:
        pickle.dump(r1_best_dists, best_dists_file)
        pickle.dump(r2_best_dists, best_dists_file)

    lgr.info("Writing modes.pickle file")
    with open("../pickles/modes.pickle", "wb") as modes_file:
        pickle.dump(all_modes, modes_file)
        pickle.dump(n_trials, modes_file)
    n_trials = int(sys.argv[1]) if len(sys.argv) > 1 else 100
    n_actions_expert = np.zeros(n_trials)
    n_actions_random = np.zeros(n_trials)
    n_actions_learned = np.zeros(n_trials)
    lgr.info("Loading best_dists.pickle file")
    with open("../pickles/best_dists.pickle", "r") as best_dists_file:
        r1_best_dists = pickle.load(best_dists_file)
        r2_best_dists = pickle.load(best_dists_file)

    for start_state in task_start_states_list:
        r1_best_dist = random.choice(r1_best_dists[start_state])
        r2_best_dist = random.choice(r2_best_dists[start_state])

        for i in range(n_trials):
            expert_state_action_distribution = ex.compute_expert_state_action_distribution()
            n_actions_expert[i] = sf.run_simulation(expert_state_action_distribution, expert_state_action_distribution, start_state)

            random_state_action_distribution = compute_random_state_action_distribution()
            n_actions_random[i] = sf.run_simulation(random_state_action_distribution, random_state_action_distribution, start_state)

            n_actions_learned[i] = sf.run_simulation(r1_best_dist, r2_best_dist, start_state)
        lgr.info("%s", colored("Number of trials = %d" % n_trials, 'white', attrs = ['bold']))
        lgr.info("%s", colored("Metric: Number of actions per trial", 'white', attrs = ['bold']))
        lgr.info("%s", colored("Start State: %s" % str(start_state), 'magenta', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s%s%s", colored("                Expert Policy            ", 'red', attrs = ['bold']), colored("Learned Policy        ", 'green', attrs = ['bold']), colored("Random Policy", 'blue', attrs = ['bold']))
        lgr.info("%s", colored("************************************************************************************************************", 'white', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MIN:", 'white', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amin(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MAX:", 'white', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.amax(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MEAN:", 'white', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_expert), '.3f'), 'red', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_learned), '.3f'), 'green', attrs = ['bold']), colored("%s" % format(np.mean(n_actions_random), '.3f'), 'blue', attrs = ['bold']))
        lgr.info("%s\t\t%s\t\t\t%s\t\t\t%s", colored("MODE:", 'white', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_expert)[0][0], '.3f'), 'red', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_learned)[0][0], '.3f'), 'green', attrs = ['bold']), colored("%s" % format(stats.mode(n_actions_random)[0][0], '.3f'), 'blue', attrs = ['bold']))