use_budget_control=use_budget_control, use_prioritized_experience_replay=use_prioritized_replay, max_trajectory_length=user_max_request_time, update_times_per_train=update_times_per_train) if train: run_env(agent=agent, user_num=user_num, training_episode=1000, training_log_interval=1, test_interval_list=[100, 5], test_round=1, seed=seed, init_roi_th=init_cpr_thr, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=user_max_request_time) else: eval_env( agent, "./result/20200206_learn_multiseed_result/MSBCB/train/1000_user/action_n=2/per=0/seed=1/1580959853/best_model/model-1000", user_num=user_num, seed=seed, test_epoch=1, init_roi_th=init_cpr_thr, print_log=False, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=user_max_request_time)
update_times_per_train=update_times_per_train) if train: run_env( agent=agent, user_num=user_num, training_episode=200, training_log_interval=1, test_interval_list=[100, 5], # user_num=user_num, training_episode=1000, training_log_interval=1, test_interval_list=[100, 5], test_round=1, seed=seed, init_roi_th=init_cpr_thr, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=user_max_request_time) else: eval_env( agent, "./exp/learning_result/GreedyDQN/train/1000_user/action_n=11/per=0/seed=1/1581679892/best_model/model-235", user_num=user_num, seed=seed, test_epoch=10, init_roi_th=init_cpr_thr, print_log=False, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=user_max_request_time)
use_budget_control=use_budget_control, use_prioritized_experience_replay=use_prioritized_replay, max_trajectory_length=7, update_times_per_train=update_times_per_train) if train: run_env(agent=agent, user_num=user_num, training_episode=500 * 2, training_log_interval=1, test_interval_list=[100, 5], test_round=1, seed=seed, init_roi_th=init_cpr_thr, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=7) else: eval_env( agent, "result/1000_user_no_roi_thr/GreedyDDPG/train/1000_user/action_n=1/per=0/seed=1/1578469558/best_model/model-800", user_num=user_num, seed=seed, test_epoch=1, init_roi_th=init_cpr_thr, print_log=False, use_prioritized_replay=use_prioritized_replay, budget=budget, use_budget_control=use_budget_control, user_max_request_time=7)