def analyze_aggregated(): exp_params = ExperimentParameters() exp_params.seed = get_reward_seeds()[0] image_output_dir = "../../../images/expected_sarsa/reward/%i" % exp_params.seed exp_params.image_output_dir = image_output_dir aggregated_models = get_aggregated_models("expected_sarsa", "reward", exp_params, get_reward_seeds()) filenames = list(aggregated_models.keys()) models = list(aggregated_models.values()) analyze_aggregated_models(filenames, models, exp_params)
def analyze_aggregated_reward_food_count_correlations(algorithm): def get_corr_coefs(models): corr_coefs = [] for model in models: average_reward_over_time = compute_mean_over_time( model.rewards_per_episode) average_food_count_over_time = compute_mean_over_time( model.food_count_per_episode) if np.all(average_reward_over_time == 0): continue corr_coef = np.corrcoef(average_reward_over_time, average_food_count_over_time) corr_coefs.append(corr_coef) return corr_coefs exp_params = ExperimentParameters() exp_params.seed = get_reward_seeds()[0] aggregated_models = get_aggregated_models(algorithm, "reward", exp_params, get_reward_seeds()) states = [filename.split("_")[0] for filename in aggregated_models.keys()] models = list(aggregated_models.values()) current_models, _ = \ filter_models_by_state(models, states, lambda state: state.startswith("Board")) corr_coefs = get_corr_coefs(current_models) print("Board State") print("Average correlation:", np.mean(corr_coefs)) print("Average absolute correlation:", np.mean(np.abs(corr_coefs))) current_models, _ = \ filter_models_by_state(models, states, lambda state: state.startswith("DirectionalDistance")) corr_coefs = get_corr_coefs(current_models) print("Directional Distance State") print("Average correlation:", np.mean(corr_coefs)) print("Average absolute correlation:", np.mean(np.abs(corr_coefs)))
def analyze(): exp_params = ExperimentParameters() exp_params.seed = get_reward_seeds()[2] model_output_dir = "../../../models/qlearning/reward/%i" % exp_params.seed exp_params.model_output_dir = model_output_dir image_output_dir = "../../../images/qlearning/reward/%i" % exp_params.seed exp_params.image_output_dir = image_output_dir analyze_models(exp_params)
def train(): params = SnakeParameters() params.discount_factor = StaticDiscountFactor(0.95) params.learning_rate = StaticLearningRate(0.15) env = SnakeEnvironment(params) params.policy = EpsilonGreedyPolicy(env, params.epsilon) exp_params = ExperimentParameters() exp_params.env = env exp_params.model_class = Sarsa exp_params.model_params = params for seed in get_reward_seeds(): exp_params.seed = seed output_dir = "../../../models/sarsa/reward/%i" % exp_params.seed exp_params.model_output_dir = output_dir for state in get_reward_states(): exp_params.model_params.state = state train_models(exp_params)