def train_sarsa_models(): params = {"seed": None} for seed in get_seeds(): params["seed"] = seed train_experiment(get_sarsa_experiment(params), get_states(), get_rewards())
def aggregate_models_by_avg(experiment_getter, seeds, params_getter, state_getter, reward_getter): aggregated_models = {} for seed in seeds: experiment = experiment_getter(params_getter(seed)) models, states, rewards, filenames = get_models_from_experiment(experiment, state_getter(), reward_getter(), train_if_missing=True) for model, state, reward in zip(models, states, rewards): key = (state, reward) if key not in aggregated_models: aggregated_models[key] = model else: aggregated_models[key].food_count_per_episode += model.food_count_per_episode aggregated_models[key].rewards_per_episode += model.rewards_per_episode for seed in get_seeds()[1:]: params = params_getter(seed) tmp_experiment = experiment_getter(params) tmp_models, tmp_states, tmp_rewards, _ = get_models_from_experiment( tmp_experiment, state_getter(), reward_getter(), train_if_missing=True) for true_model, true_state in zip(models, states): for tmp_model, tmp_state in zip(models, states): if true_state == tmp_state: true_model.food_count_per_episode += tmp_model.food_count_per_episode true_model.rewards_per_episode += tmp_model.rewards_per_episode for _, model in aggregated_models.items(): model.food_count_per_episode = model.food_count_per_episode / len(seeds) model.rewards_per_episode = model.rewards_per_episode / len(seeds) models, states, rewards = [], [], [] for (state, reward), model in aggregated_models.items(): models.append(model) states.append(state) rewards.append(reward) return models, states, rewards
def experiment01_sarsa(): def get_params(seed): image_output = os.path.join(get_project_path(), "images", "sarsa", "state", str(seed), "state_sarsa_average_game_score_over_time.png") return {"seed": seed, "image_output": image_output} params = get_params(42) experiment = get_sarsa_experiment(params) models, states, rewards = aggregate_models_by_avg(get_sarsa_experiment, get_seeds(), get_params, get_states(), get_rewards()) experiment01(experiment, models, states, rewards, params)
def experiment01_sarsa_directional_state(): def get_params(seed): image_output = os.path.join( get_project_path(), "images", "sarsa", "reward", str(seed), "reward_sarsa_directional_state_average_game_score_over_time.png") return {"seed": seed, "image_output": image_output} params = get_params(42) experiment = get_qlearning_experiment(params) models, states, rewards = aggregate_models_by_avg(get_sarsa_experiment, get_seeds(), get_params, get_states, get_rewards) models, states, rewards = \ filter_models_with_rewards_by_state(models, states, rewards, lambda state: state.startswith("Directional")) experiment01(experiment, models, states, rewards, params)
def get_experiment(): experiment = setup_experiment(QLearning, "info_augmentation", get_seeds()[0]) experiment.train_episodes = 1000000 return experiment