def test(net, config, logger, test_loader, test_info, step, model_file=None): with torch.no_grad(): net.eval() if model_file is not None: net.load_state_dict(torch.load(model_file)) final_res = {} final_res['version'] = 'VERSION 1.3' final_res['results'] = {} final_res['external_data'] = { 'used': True, 'details': 'Features from I3D Network' } num_correct = 0. num_total = 0. load_iter = iter(test_loader) for i in range(len(test_loader.dataset)): # _data: 视频特征 _label : 视频标签 vid_name: 视频名称 vid_num_seg:视频特征序列实际长度 _data, _label, _, vid_name, vid_num_seg = next(load_iter) _data = _data.cuda() _label = _label.cuda() """ cas_base: (1,T,21) score_supp: (1,21) cas_supp: (1,T,21) fore_weights: (1,T,1) """ _, cas_base, score_supp, cas_supp, fore_weights = net(_data) label_np = _label.cpu().numpy() score_np = score_supp[ 0, :-1].cpu().data.numpy() #获取动作类的得分,不考虑背景类 (1,20) score_np[np.where( score_np < config.class_thresh)] = 0 # cls_thresh = 0.25 score_np[np.where(score_np >= config.class_thresh)] = 1 correct_pred = np.sum( label_np == score_np, axis=1) # 统计的是预测的类别和label能够对应上的数目,只有20个类别全部预测正确才认为这个视频预测正确 num_correct += np.sum((correct_pred == config.num_classes).astype( np.float32)) # 预测正确的视频数1 num_total += correct_pred.shape[0] # 视频数 # 对数值进行限定,更加稳定 cas_base = utils.minmax_norm(cas_base) # (B,T,C+1) cas_supp = utils.minmax_norm(cas_supp) # (B,T,C+1) pred = np.where( score_np > config.class_thresh)[0] # 0.25, 当前视频预测动作类别索引 if pred.any(): cas_pred = cas_supp[0].cpu().numpy()[:, pred] # (T, C+1)-->T cas_pred = np.reshape(cas_pred, (config.num_segments, -1, 1)) # (T,1,1) # [[[-0.035]],[[-0.025]],.....[[0.0029]]] (18000,1,1) cas_pred = utils.upgrade_resolution(cas_pred, config.scale) # scale:24 proposal_dict = {} for i in range(len(config.act_thresh) ): #act_thresh = np.arange(0.0, 0.25, 0.025) cas_temp = cas_pred.copy() # (18000,1,1) # [0,1,2,3,1531,1532,.......9910] zero_location = np.where( cas_temp[:, :, 0] < config.act_thresh[i]) cas_temp[zero_location] = 0 # cas_temp: (18000,len(pred),1) 其中len(pred)为满足条件的类别数目 seg_list = [ ] # [[],[],..[]] # 保存每个类别的预测结果,其中每一个子列表中保存对应类别着pos的索引 for c in range(len(pred)): pos = np.where( cas_temp[:, c, 0] > 0) # [4,5,6,.....17999] seg_list.append(pos) # [[[5,0.0025,169.42,169.6]]] :(class, score, start, end) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg[0].cpu().item(), config.feature_fps, config.num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append( utils.nms(proposal_dict[class_id], 0.7)) final_res['results'][vid_name[0]] = utils.result2json( final_proposals) test_acc = num_correct / num_total json_path = os.path.join(config.output_path, 'temp_result.json') with open(json_path, 'w') as f: json.dump(final_res, f) f.close() tIoU_thresh = np.linspace(0.1, 0.9, 9) anet_detection = ANETdetection(config.gt_path, json_path, subset='test', tiou_thresholds=tIoU_thresh, verbose=False, check_status=False) mAP, average_mAP = anet_detection.evaluate() logger.log_value('Test accuracy', test_acc, step) for i in range(tIoU_thresh.shape[0]): logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step) logger.log_value('Average mAP', average_mAP, step) test_info["step"].append(step) test_info["test_acc"].append(test_acc) test_info["average_mAP"].append(average_mAP) for i in range(tIoU_thresh.shape[0]): test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
def test(sess, model, init, input_list, test_iter): ckpt = input_list['ckpt'] scale = input_list['scale'] class_threshold = input_list['class_threshold'] rgb_saver = tf.train.Saver() flow_saver = tf.train.Saver() test_vid_list = open( 'THUMOS14_test_vid_list.txt', 'r') # file for matching 'video number' and 'video name' lines = test_vid_list.read().splitlines() # Define json File (output) final_result = {} final_result['version'] = 'VERSION 1.3' final_result['results'] = {} final_result['external_data'] = { 'used': True, 'details': 'Features from I3D Net' } for i in range(1, TEST_NUM + 1): vid_name = lines[i - 1] # Load Frames rgb_features, flow_features, temp_seg, vid_len = utils.processTestVid( i, INPUT_PATHS['test'], NUM_SEGMENTS) rgb_features = rgb_features.astype(np.float32) flow_features = flow_features.astype(np.float32) # RGB Stream sess.run(init) rgb_saver.restore(sess, os.path.join(ckpt['rgb'], 'rgb_' + str(test_iter))) rgb_class_w = tf.get_default_graph().get_tensor_by_name( 'Classification/class_weight/kernel:0').eval() rgb_attention, rgb_raw, rgb_class_result = sess.run( [model.attention_weights, model.class_weight, model.class_result], feed_dict={model.X: rgb_features}) # Flow Stream sess.run(init) flow_saver.restore( sess, os.path.join(ckpt['flow'], 'flow_' + str(test_iter))) flow_class_w = tf.get_default_graph().get_tensor_by_name( 'Classification/class_weight/kernel:0').eval() flow_attention, flow_raw, flow_class_result = sess.run( [model.attention_weights, model.class_weight, model.class_result], feed_dict={model.X: flow_features}) # Gathering Classification Result rgb_class_prediction = np.where(rgb_class_result > class_threshold)[1] flow_class_prediction = np.where( flow_class_result > class_threshold)[1] rgb_tCam = utils.get_tCAM(rgb_features, rgb_class_w) flow_tCam = utils.get_tCAM(flow_features, flow_class_w) r_check = False f_check = False if rgb_class_prediction.any(): r_check = True # Weighted T-CAM rgb_wtCam = utils.get_wtCAM(rgb_tCam, flow_tCam, rgb_attention, ALPHA, rgb_class_prediction) # Interpolate W-TCAM rgb_int_wtCam = utils.interpolated_wtCAM(rgb_wtCam, scale) # Get segment list of rgb_int_wtCam rgb_temp_idx = utils.get_tempseg_list(rgb_int_wtCam, len(rgb_class_prediction)) # Temporal Proposal rgb_temp_prop = utils.get_temp_proposal(rgb_temp_idx, rgb_int_wtCam, rgb_class_prediction, scale, vid_len) if flow_class_prediction.any(): f_check = True # Weighted T-CAM flow_wtCam = utils.get_wtCAM(flow_tCam, rgb_tCam, flow_attention, 1 - ALPHA, flow_class_prediction) # Interpolate W-TCAM flow_int_wtCam = utils.interpolated_wtCAM(flow_wtCam, scale) # Get segment list of flow_int_wtCam flow_temp_idx = utils.get_tempseg_list(flow_int_wtCam, len(flow_class_prediction)) # Temporal Proposal flow_temp_prop = utils.get_temp_proposal(flow_temp_idx, flow_int_wtCam, flow_class_prediction, scale, vid_len) if r_check and f_check: # Fuse two stream and perform non-maximum suppression temp_prop = utils.integrated_prop(rgb_temp_prop, flow_temp_prop, list(rgb_class_prediction), list(flow_class_prediction)) final_result['results'][vid_name] = utils.result2json([temp_prop]) elif r_check and not f_check: final_result['results'][vid_name] = utils.result2json( rgb_temp_prop) elif not r_check and f_check: final_result['results'][vid_name] = utils.result2json( flow_temp_prop) utils.inf_progress(i, TEST_NUM, 'Progress', 'Complete', 1, 50) # Save Results json_path = os.path.join(ckpt['path'], 'results.json') with open(json_path, 'w') as fp: json.dump(final_result, fp) txt_path = os.path.join(ckpt['path'], 'results.txt') with open(txt_path, 'w') as tp: utils.json2txt(final_result['results'], tp) test_vid_list.close()
def test(net, config, logger, test_loader, test_info, step, model_file=None): with torch.no_grad(): net.eval() if model_file is not None: net.load_state_dict(torch.load(model_file)) final_res = {} final_res['version'] = 'VERSION 1.3' final_res['results'] = {} final_res['external_data'] = { 'used': True, 'details': 'Features from I3D Network' } num_correct = 0. num_total = 0. load_iter = iter(test_loader) for i in range(len(test_loader.dataset)): _data, _label, _, vid_name, vid_num_seg = next(load_iter) _data = _data.cuda() _label = _label.cuda() vid_num_seg = vid_num_seg[0].cpu().item() num_segments = _data.shape[1] score_act, _, feat_act, feat_bkg, features, cas_softmax = net( _data) feat_magnitudes_act = torch.mean(torch.norm(feat_act, dim=2), dim=1) feat_magnitudes_bkg = torch.mean(torch.norm(feat_bkg, dim=2), dim=1) label_np = _label.cpu().data.numpy() score_np = score_act[0].cpu().data.numpy() pred_np = np.zeros_like(score_np) pred_np[np.where(score_np < config.class_thresh)] = 0 pred_np[np.where(score_np >= config.class_thresh)] = 1 correct_pred = np.sum(label_np == pred_np, axis=1) num_correct += np.sum( (correct_pred == config.num_classes).astype(np.float32)) num_total += correct_pred.shape[0] feat_magnitudes = torch.norm(features, p=2, dim=2) feat_magnitudes = utils.minmax_norm(feat_magnitudes, max_val=feat_magnitudes_act, min_val=feat_magnitudes_bkg) feat_magnitudes = feat_magnitudes.repeat( (config.num_classes, 1, 1)).permute(1, 2, 0) cas = utils.minmax_norm(cas_softmax * feat_magnitudes) pred = np.where(score_np >= config.class_thresh)[0] if len(pred) == 0: pred = np.array([np.argmax(score_np)]) cas_pred = cas[0].cpu().numpy()[:, pred] cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) cas_pred = utils.upgrade_resolution(cas_pred, config.scale) proposal_dict = {} feat_magnitudes_np = feat_magnitudes[0].cpu().data.numpy()[:, pred] feat_magnitudes_np = np.reshape(feat_magnitudes_np, (num_segments, -1, 1)) feat_magnitudes_np = utils.upgrade_resolution( feat_magnitudes_np, config.scale) for i in range(len(config.act_thresh_cas)): cas_temp = cas_pred.copy() zero_location = np.where( cas_temp[:, :, 0] < config.act_thresh_cas[i]) cas_temp[zero_location] = 0 seg_list = [] for c in range(len(pred)): pos = np.where(cas_temp[:, c, 0] > 0) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg, config.feature_fps, num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] for i in range(len(config.act_thresh_magnitudes)): cas_temp = cas_pred.copy() feat_magnitudes_np_temp = feat_magnitudes_np.copy() zero_location = np.where(feat_magnitudes_np_temp[:, :, 0] < config.act_thresh_magnitudes[i]) feat_magnitudes_np_temp[zero_location] = 0 seg_list = [] for c in range(len(pred)): pos = np.where(feat_magnitudes_np_temp[:, c, 0] > 0) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, score_np, pred, config.scale, \ vid_num_seg, config.feature_fps, num_segments) for i in range(len(proposals)): class_id = proposals[i][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[i] final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append(utils.nms(proposal_dict[class_id], 0.6)) final_res['results'][vid_name[0]] = utils.result2json( final_proposals) test_acc = num_correct / num_total json_path = os.path.join(config.output_path, 'result.json') with open(json_path, 'w') as f: json.dump(final_res, f) f.close() tIoU_thresh = np.linspace(0.1, 0.7, 7) anet_detection = ANETdetection(config.gt_path, json_path, subset='test', tiou_thresholds=tIoU_thresh, verbose=False, check_status=False) mAP, average_mAP = anet_detection.evaluate() logger.log_value('Test accuracy', test_acc, step) for i in range(tIoU_thresh.shape[0]): logger.log_value('mAP@{:.1f}'.format(tIoU_thresh[i]), mAP[i], step) logger.log_value('Average mAP', average_mAP, step) test_info["step"].append(step) test_info["test_acc"].append(test_acc) test_info["average_mAP"].append(average_mAP) for i in range(tIoU_thresh.shape[0]): test_info["mAP@{:.1f}".format(tIoU_thresh[i])].append(mAP[i])
def eval_one_batch(self, data, net, class_dict): features, _label, segm, vid_name, vid_num_seg = data if _label.sum() == 0: return elem, _, _, element_atn = net(features) element_logits = elem * element_atn label_np = _label.squeeze().cpu().numpy() pred_vid_score = get_cls_score(element_logits, rat=self.config.rat) score_np = pred_vid_score.copy() self.class_true.append(label_np) self.class_pred.append(pred_vid_score) score_np[score_np < self.config.class_thresh] = 0 score_np[score_np >= self.config.class_thresh] = 1 correct_pred = np.sum(label_np == score_np) self.num_correct += np.sum( (correct_pred == self.config.num_class).astype(np.float32)) self.num_total += 1 cas_supp = element_logits[..., :-1] cas_supp_atn = element_atn logit_atn = cas_supp_atn.expand_as( cas_supp).squeeze().data.cpu().numpy() self.dict_pred[vid_name[0]]["logit"] = logit_atn self.dict_pred[vid_name[0]]["duration"] = int(vid_num_seg.item() * 16 / 25) pred = np.where(pred_vid_score >= self.config.class_thresh)[0] # NOTE: threshold act_thresh = self.config.act_thresh if len(pred) > 0: cas_pred = cas_supp[0].cpu().numpy()[:, pred] num_segments = cas_pred.shape[0] cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) cas_pred_atn = cas_supp_atn[0].cpu().numpy()[:, [0]] cas_pred_atn = np.reshape(cas_pred_atn, (num_segments, -1, 1)) proposal_dict = {} for i in range(len(act_thresh)): cas_temp = cas_pred.copy() cas_temp_atn = cas_pred_atn.copy() seg_list = [] for c in range(len(pred)): pos = np.where(cas_temp_atn[:, 0, 0] > act_thresh[i]) seg_list.append(pos) proposals = utils.get_proposal_oic(seg_list, cas_temp, pred_vid_score, pred, self.config.scale, vid_num_seg[0].cpu().item(), self.config.feature_fps, num_segments, gamma=self.config.gamma_oic) for j in range(len(proposals)): try: class_id = proposals[j][0][0] if class_id not in proposal_dict.keys(): proposal_dict[class_id] = [] proposal_dict[class_id] += proposals[j] except IndexError: logger.error(f"Index error") final_proposals = [] for class_id in proposal_dict.keys(): final_proposals.append( utils.soft_nms(proposal_dict[class_id], 0.7, sigma=0.3)) self.final_res["results"][vid_name[0]] = utils.result2json( final_proposals, class_dict)