def __init__(self, policy_func: types.MethodType = None, trained_path: str = "./_trained/Q_Learning/"): self.policy_func = policy_func if policy_func else policy.ActionPolicies( ).greedy_maximum self.trained_path = trained_path if not os.path.exists(trained_path): raise RuntimeError("[Error] Trained Path NOT Found") # Model Related Initialization self.q_table = None self.epsilon = None
def __init__(self, train_epoch: int = 10000, policy_func: types.MethodType = None, update_func: types.MethodType = None, output_path: str = "./_trained/Q_Learning/", progress_bar: bool = True): self.train_epoch = train_epoch self.env_obj = environment.Easy21Env() self.policy_func = policy_func if policy_func else policy.ActionPolicies( ).greedy_epsilon self.update_func = update_func if update_func \ else update.UpdateQTable().q_function # learning_rate=0.1, discount_factor=0.5 self.progress_bar = progress_bar _ = check_output_path(output_path=output_path) self.output_path = output_path
evaluate_result_path, "evaluation_report_%s" % datetime.now().strftime("%Y%m%d%H%M%S")), "w") as f: for _model in models_result: f.write("Filename:%s\t" "Epoch:%d\tLearning Rate:%f\t" "Discount Factor:%f\tEpsilon:%f\t" "WIN:%d\tTIE:%d\tLOSE:%d\tERR:%d\tALL:%d\n" % (_model["filename"], _model["setting"]["epoch"], _model["setting"]["learning rate"], _model["setting"]["discount factor"], _model["setting"]["epsilon"], _model["result"][1], _model["result"][0], _model["result"][-1], _model["result"]["err"], evaluate_rounds)) if _model["result"][1] > max_win_cnt: max_win_cnt = _model["result"][1] max_win_rate_setting = _model["setting"] print("\nEvaluation Done\n") print("[Max Win Count]", max_win_cnt) print("[Max Count Setting]", max_win_rate_setting) if "__main__" == __name__: eval_env_obj = evaluate.EvaluateEnv(show_details=False) policy_func = policy.ActionPolicies().greedy_epsilon ql_env_obj = evaluate.QLearningEnv(**{"policy_func": policy_func}) eval_param(trained_model_path="../../_trained/", evaluate_rounds=1000)