head_datetime = start_time - time_step * backoff_epochs
tail_datetime = head_datetime + total_time
TOTAL_EPOCHS = int(total_time / time_step)
#    |- Reward
rewarding = {'serve': Rs, 'wait': Rw, 'fail': Rf}

te = TrafficEmulator(session_df=session_df,
                     time_step=time_step,
                     head_datetime=head_datetime,
                     tail_datetime=tail_datetime,
                     rewarding=rewarding,
                     verbose=2)

ts = TrafficServer(cost=(Co, Cw), verbose=2)

env_model = SJTUModel(traffic_params, queue_params, reward_params, 2)

agent = Dyna_QAgentNN(
    env_model=env_model,
    num_sim=num_sim,
    # agent = Phi_QAgentNN(
    #     phi_length=phi_length,
    dim_state=dim_state,
    range_state=range_state,
    f_build_net=None,
    batch_size=batch_size,
    learning_rate=learning_rate,
    momentum=momentum,
    reward_scaling=reward_scaling,
    reward_scaling_update=reward_scaling_update,
    rs_period=rs_period,
num_sim = 10

# Build entities
rewarding = {'serve': Rs, 'wait': Rw, 'fail': Rf}
te = TrafficEmulator(session_df=session_df,
                     time_step=time_step,
                     rewarding=rewarding,
                     verbose=1)

ts = TrafficServer(verbose=2, cost=(Co, Cs))

traffic_params = (model_type, traffic_window_size, stride, n_iter,
                  adjust_offset, eval_period, eval_len, n_belief_bins)
queue_params = (max_queue_len, )
reward_params = (Rs, Rw, Rf, Co, Cs, None)
env_model = SJTUModel(traffic_params, queue_params, reward_params, verbose=1)

agent = DynaQAgent(env_model=env_model,
                   num_sim=num_sim,
                   actions=actions,
                   alpha=0.5,
                   gamma=0.5,
                   explore_strategy='epsilon',
                   epsilon=0.1,
                   verbose=2)
#agent = QAgentNN(dim_state=(1, 1, 3), range_state=((((0, 10), (0, 10), (0, 10),),),),
#                 learning_rate=0.01, reward_scaling=10, batch_size=100, freeze_period=50, memory_size=200, num_buffer=2,
#                 actions=actions, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1,
#                 verbose=2
#                 )
c = QController(agent=agent)