Esempio n. 1
0
def plot_average_actions_over_time(learner):
    actions = learner.actions_per_episode
    average_actions = compute_mean_over_time(actions)

    plt.semilogx(average_actions)
    plt.title("Average # of Actions Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Avg. # of actions")
Esempio n. 2
0
def plot_average_reward_over_time(learner):
    rewards_over_time = learner.rewards_per_episode
    mean_over_time = compute_mean_over_time(rewards_over_time)

    plt.semilogx(mean_over_time)
    plt.title("Average Reward Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Avg. Reward")
Esempio n. 3
0
    def get_corr_coefs(models):
        corr_coefs = []

        for model in models:
            average_reward_over_time = compute_mean_over_time(
                model.rewards_per_episode)
            average_food_count_over_time = compute_mean_over_time(
                model.food_count_per_episode)

            if np.all(average_reward_over_time == 0):
                continue

            corr_coef = np.corrcoef(average_reward_over_time,
                                    average_food_count_over_time)
            corr_coefs.append(corr_coef)

        return corr_coefs
Esempio n. 4
0
def diagnostic_plot_exploration_vs_exploitation_over_time(learner):
	actions = learner.actions_per_episode
	exploratory_action = learner.exploratory_actions_per_episode
	exploration = compute_mean_over_time(exploratory_action / actions)

	plt.semilogx(exploration)
	plt.title("Exploration vs. Exploitation")
	plt.xlabel("# of episodes")
	plt.ylabel("Exploration (%)")
Esempio n. 5
0
def plot_multi_average_game_score_over_time(x, game_score_per_episode, labels):
    average_game_score_over_time = [
        compute_mean_over_time(gspe) for gspe in game_score_per_episode
    ]

    for label, agsot in zip(labels, average_game_score_over_time):
        plt.semilogx(x, agsot, label=label)

    plt.title("Average Game Score Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Game Score")
    plt.legend(fontsize='x-small')
Esempio n. 6
0
def plot_multi_average_food_count_over_time(x, food_count_per_episode, labels):
    average_food_count_over_time = [
        compute_mean_over_time(fcpe) for fcpe in food_count_per_episode
    ]

    for label, afcot in zip(labels, average_food_count_over_time):
        plt.semilogx(x, afcot, label=label)

    plt.title("Average Food Count Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Food Count")
    plt.legend(fontsize='x-small')
Esempio n. 7
0
def plot_multi_average_actions_over_time(x, actions_per_episode, labels):
    average_actions_over_time = [
        compute_mean_over_time(ape) for ape in actions_per_episode
    ]

    for label, aaot in zip(labels, average_actions_over_time):
        plt.semilogx(x, aaot, label=label)

    plt.title("Average # of Actions Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("# of actions")
    plt.legend(fontsize='x-small')
Esempio n. 8
0
def plot_multi_average_reward_over_time(x, rewards_per_episode, labels):
    mean_reward_over_time = [
        compute_mean_over_time(rpe) for rpe in rewards_per_episode
    ]

    for label, mrot in zip(labels, mean_reward_over_time):
        plt.semilogx(x, mrot, label=label)

    plt.title("Average Reward Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Reward")
    plt.legend(fontsize='x-small')
Esempio n. 9
0
def plot_multi_average_self_collision_death_over_time(
        x, self_collision_death_per_episode, labels):
    average_self_collision_death_over_time = [
        compute_mean_over_time(scdpe)
        for scdpe in self_collision_death_per_episode
    ]

    for label, ascdot in zip(labels, average_self_collision_death_over_time):
        plt.semilogx(x, ascdot, label=label)

    plt.title("Average Self-Collision Death Over Time")
    plt.xlabel("# of episodes")
    plt.ylabel("Self-Collision Death")
    plt.legend(fontsize='x-small')