예제 #1
0
    def __init__(self,
                 policy_func: types.MethodType = None,
                 trained_path: str = "./_trained/Q_Learning/"):
        self.policy_func = policy_func if policy_func else policy.ActionPolicies(
        ).greedy_maximum

        self.trained_path = trained_path
        if not os.path.exists(trained_path):
            raise RuntimeError("[Error] Trained Path NOT Found")

        # Model Related Initialization
        self.q_table = None
        self.epsilon = None
예제 #2
0
    def __init__(self,
                 train_epoch: int = 10000,
                 policy_func: types.MethodType = None,
                 update_func: types.MethodType = None,
                 output_path: str = "./_trained/Q_Learning/",
                 progress_bar: bool = True):
        self.train_epoch = train_epoch
        self.env_obj = environment.Easy21Env()
        self.policy_func = policy_func if policy_func else policy.ActionPolicies(
        ).greedy_epsilon
        self.update_func = update_func if update_func \
            else update.UpdateQTable().q_function  # learning_rate=0.1, discount_factor=0.5
        self.progress_bar = progress_bar

        _ = check_output_path(output_path=output_path)
        self.output_path = output_path
                evaluate_result_path, "evaluation_report_%s" %
                datetime.now().strftime("%Y%m%d%H%M%S")), "w") as f:
        for _model in models_result:
            f.write("Filename:%s\t"
                    "Epoch:%d\tLearning Rate:%f\t"
                    "Discount Factor:%f\tEpsilon:%f\t"
                    "WIN:%d\tTIE:%d\tLOSE:%d\tERR:%d\tALL:%d\n" %
                    (_model["filename"], _model["setting"]["epoch"],
                     _model["setting"]["learning rate"],
                     _model["setting"]["discount factor"],
                     _model["setting"]["epsilon"], _model["result"][1],
                     _model["result"][0], _model["result"][-1],
                     _model["result"]["err"], evaluate_rounds))

            if _model["result"][1] > max_win_cnt:
                max_win_cnt = _model["result"][1]
                max_win_rate_setting = _model["setting"]

    print("\nEvaluation Done\n")
    print("[Max Win Count]", max_win_cnt)
    print("[Max Count Setting]", max_win_rate_setting)


if "__main__" == __name__:
    eval_env_obj = evaluate.EvaluateEnv(show_details=False)

    policy_func = policy.ActionPolicies().greedy_epsilon
    ql_env_obj = evaluate.QLearningEnv(**{"policy_func": policy_func})

    eval_param(trained_model_path="../../_trained/", evaluate_rounds=1000)