def validation_experiment(base_folder, env_entry_point):
    experiment_name = "setup_validation"
    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        f.write(gen_exp_descr(experiment_name, None))
    run_ps_agent_experiment_with_result_files(
        agent_train_test_once=lambda env: ps_train_test_osp_ql(
            ps_env=env,
            max_number_of_steps=MAX_NUMBER_OF_STEPS,
            n_episodes=N_EPISODES,
            rand_qtab=RAND_QTAB,
            learning_rate=LEARNING_RATE,
            discount_factor=DISCOUNT_FACTOR,
            exploration_rate=EXPLORATION_RATE,
            exploration_decay_rate=EXPLORATION_DECAY_RATE,
            k_s=ACTIONS,
            visualize=VISUALIZE),
        base_folder=experiment_folder,
        n_repeat=N_REPEAT,
        time_step=TIME_STEP,
        p_reff=P_REF,
        log_level=LOG_LEVEL,
        env_entry_point=env_entry_point,
        compute_reward=None,
    )
def exploration_experiment(
        base_folder,
        explor_params,
        env_entry_point,
        experiment_name="exploration(rate_and_discount)_variation"):

    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join([
            "{} - exploration rate = {}, exploration rate decay = {}".format(
                i, params[0], params[1])
            for i, params in enumerate(explor_params)
        ])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for expl_rate, expl_decay in explor_params:
        subfolder = "{}/expl_rate={}_decay={}".format(experiment_folder,
                                                      expl_rate, expl_decay)
        os.mkdir(subfolder)
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_osp_ql(
                ps_env=env,
                max_number_of_steps=MAX_NUMBER_OF_STEPS,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=expl_rate,
                exploration_decay_rate=expl_decay,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=TIME_STEP,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=None,
        )
def reward_experiment(base_folder,
                      env_entry_point,
                      compute_reward_s,
                      experiment_name="reward_variation"):
    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)

    param_i_correspondence = "\n".join([
        "{} - reward = {}".format(i, comp_rew[0])
        for i, comp_rew in enumerate(compute_reward_s)
    ])

    with open(description_file, "w") as f:
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for comp_rew in compute_reward_s:
        name, func = comp_rew
        subfolder = "{}/reward-{}".format(experiment_folder, name)
        os.mkdir(subfolder)
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_osp_ql(
                ps_env=env,
                max_number_of_steps=MAX_NUMBER_OF_STEPS,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=EXPLORATION_RATE,
                exploration_decay_rate=EXPLORATION_DECAY_RATE,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=TIME_STEP,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=func,
        )
def best_combination_experiment(base_folder,
                                env_entry_point,
                                t_s,
                                experiment_name="best_parameters_combination",
                                **kwargs):

    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join(
            ["{} - time_step = {}".format(i, t) for i, t in enumerate(t_s)])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for t in t_s:
        subfolder = "{}/time_step={}".format(experiment_folder, t)
        os.mkdir(subfolder)
        max_n_steps = MAX_NUMBER_OF_STEPS // t
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_osp_ql(
                ps_env=env,
                max_number_of_steps=max_n_steps,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=EXPLORATION_RATE,
                exploration_decay_rate=EXPLORATION_DECAY_RATE,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES,
                n_test_steps=N_TEST_STEPS),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=t,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=None,
            **kwargs)