def evaluate(self, X): qid_unique = np.unique(X["qid"]) #去重并排序 n = len(qid_unique) losses = np.zeros(n) ndcgs = np.zeros(n) ndcgs_all = np.zeros(n) errs = np.zeros(n) for e, qid in enumerate(qid_unique): ind = np.where(X["qid"] == qid)[0] #提取出的查询编号 feed_dict = self._get_feed_dict(X, ind, training=False) loss, score = self.sess.run((self.loss, self.score), feed_dict=feed_dict) df = pd.DataFrame({ "label": X["label"][ind].flatten(), "score": score.flatten() }) df.sort_values("score", ascending=False, inplace=True) losses[e] = loss ndcgs[e] = ndcg(df["label"]) ndcgs_all[e] = ndcg(df["label"], top_ten=False) errs[e] = calc_err(df["label"]) losses_mean = np.mean(losses) ndcgs_mean = np.mean(ndcgs) ndcgs_all_mean = np.mean(ndcgs_all) errs_mean = np.mean(errs) return losses_mean, errs_mean, ndcgs_mean, ndcgs_all_mean
def evaluate_tile(valset, probs, tiles_per_pos, threshold): """tile mode 的验证""" val_groups = np.array(valset.tileIDX) order = np.lexsort((probs, val_groups)) # 对 tile 按预测概率排序 val_groups = val_groups[order] val_probs = probs[order] val_index = np.array([prob > threshold for prob in val_probs]) # 制作分类用的 label:根据计数标签 = n,前 n * tiles_per_pos 个 tile 为阳性 labels = np.zeros(len(val_probs)) for i in range(1, len(val_probs) + 1): if i == len(val_probs) or val_groups[i] != val_groups[i - 1]: labels[i - valset.labels[val_groups[i - 1]] * tiles_per_pos:i] = [ 1 ] * valset.labels[val_groups[i - 1]] * tiles_per_pos # 计算错误率、FPR、FNR err, fpr, fnr = calc_err(val_index, labels) return err, fpr, fnr