Exemple #1
0
         if end:
             end_user.add(user)
         env.set_state(user, s_)
         reward += r
         memory.store_transition(s, a, r, s_)
 # 网络训练和参数更新
 b_s, b_a, b_r, b_s_ = memory.sample(BATCH_SIZE)
 q_ = target_AC.get_q(b_s_)
 target_q = b_r + GAMMA * q_
 train_AC.train(target_q, s=b_s)
 target_AC.set_params(train_AC.a_params, train_AC.c_params, t=0.2)
 # 统计用户的指标
 user_auc = 0
 for user in range(1, 944):
     if user not in end_user:
         items = env._find_positive_history(user)
         s = env.state[user]
         a = train_AC.get_action(s)
         all_label_is, auc = env.auc(user, a)
         if auc >= 0:
             user_auc += auc
         elif all_label_is == 0:
             neg_user.add(user)
         else:
             pos_user.add(user)
 if 943 - len(end_user) - len(neg_user) - len(pos_user) == 0:
     break
 # 记录各项指标
 all_neg_user_num.append(len(neg_user))
 all_pos_user_num.append(len(pos_user))
 end_user_num.append(len(end_user))