예제 #1
0
                  use_budget_control=use_budget_control,
                  use_prioritized_experience_replay=use_prioritized_replay,
                  max_trajectory_length=user_max_request_time,
                  update_times_per_train=update_times_per_train)
    if train:
        run_env(agent=agent,
                user_num=user_num,
                training_episode=1000,
                training_log_interval=1,
                test_interval_list=[100, 5],
                test_round=1,
                seed=seed,
                init_roi_th=init_cpr_thr,
                use_prioritized_replay=use_prioritized_replay,
                budget=budget,
                use_budget_control=use_budget_control,
                user_max_request_time=user_max_request_time)
    else:
        eval_env(
            agent,
            "./result/20200206_learn_multiseed_result/MSBCB/train/1000_user/action_n=2/per=0/seed=1/1580959853/best_model/model-1000",
            user_num=user_num,
            seed=seed,
            test_epoch=1,
            init_roi_th=init_cpr_thr,
            print_log=False,
            use_prioritized_replay=use_prioritized_replay,
            budget=budget,
            use_budget_control=use_budget_control,
            user_max_request_time=user_max_request_time)
예제 #2
0
                      update_times_per_train=update_times_per_train)

    if train:
        run_env(
            agent=agent,
            user_num=user_num,
            training_episode=200,
            training_log_interval=1,
            test_interval_list=[100, 5],
            # user_num=user_num, training_episode=1000, training_log_interval=1, test_interval_list=[100, 5],
            test_round=1,
            seed=seed,
            init_roi_th=init_cpr_thr,
            use_prioritized_replay=use_prioritized_replay,
            budget=budget,
            use_budget_control=use_budget_control,
            user_max_request_time=user_max_request_time)
    else:
        eval_env(
            agent,
            "./exp/learning_result/GreedyDQN/train/1000_user/action_n=11/per=0/seed=1/1581679892/best_model/model-235",
            user_num=user_num,
            seed=seed,
            test_epoch=10,
            init_roi_th=init_cpr_thr,
            print_log=False,
            use_prioritized_replay=use_prioritized_replay,
            budget=budget,
            use_budget_control=use_budget_control,
            user_max_request_time=user_max_request_time)
예제 #3
0
        use_budget_control=use_budget_control,
        use_prioritized_experience_replay=use_prioritized_replay,
        max_trajectory_length=7,
        update_times_per_train=update_times_per_train)
    if train:
        run_env(agent=agent,
                user_num=user_num,
                training_episode=500 * 2,
                training_log_interval=1,
                test_interval_list=[100, 5],
                test_round=1,
                seed=seed,
                init_roi_th=init_cpr_thr,
                use_prioritized_replay=use_prioritized_replay,
                budget=budget,
                use_budget_control=use_budget_control,
                user_max_request_time=7)
    else:
        eval_env(
            agent,
            "result/1000_user_no_roi_thr/GreedyDDPG/train/1000_user/action_n=1/per=0/seed=1/1578469558/best_model/model-800",
            user_num=user_num,
            seed=seed,
            test_epoch=1,
            init_roi_th=init_cpr_thr,
            print_log=False,
            use_prioritized_replay=use_prioritized_replay,
            budget=budget,
            use_budget_control=use_budget_control,
            user_max_request_time=7)