def validation_experiment(base_folder, env_entry_point): experiment_name = "setup_validation" experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: f.write(gen_exp_descr(experiment_name, None)) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE), base_folder=experiment_folder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, )
def exploration_experiment( base_folder, explor_params, env_entry_point, experiment_name="exploration(rate_and_discount)_variation"): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join([ "{} - exploration rate = {}, exploration rate decay = {}".format( i, params[0], params[1]) for i, params in enumerate(explor_params) ]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for expl_rate, expl_decay in explor_params: subfolder = "{}/expl_rate={}_decay={}".format(experiment_folder, expl_rate, expl_decay) os.mkdir(subfolder) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=expl_rate, exploration_decay_rate=expl_decay, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES), base_folder=subfolder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, )
def reward_experiment(base_folder, env_entry_point, compute_reward_s, experiment_name="reward_variation"): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) param_i_correspondence = "\n".join([ "{} - reward = {}".format(i, comp_rew[0]) for i, comp_rew in enumerate(compute_reward_s) ]) with open(description_file, "w") as f: f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for comp_rew in compute_reward_s: name, func = comp_rew subfolder = "{}/reward-{}".format(experiment_folder, name) os.mkdir(subfolder) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES), base_folder=subfolder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=func, )
def best_combination_experiment(base_folder, env_entry_point, t_s, experiment_name="best_parameters_combination", **kwargs): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join( ["{} - time_step = {}".format(i, t) for i, t in enumerate(t_s)]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for t in t_s: subfolder = "{}/time_step={}".format(experiment_folder, t) os.mkdir(subfolder) max_n_steps = MAX_NUMBER_OF_STEPS // t run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=max_n_steps, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES, n_test_steps=N_TEST_STEPS), base_folder=subfolder, n_repeat=N_REPEAT, time_step=t, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, **kwargs)