def do_for_one_vid_seg(self, props, prop_feats, gt_boxs, gt_frms, out_file, save=True): """ props: all the proposal boxes gt_boxs: all the groundtruth_boxes out_props: props with highest IoU with gt_box # nframes x 1, one-to-one correspondence Also, used to calculate recall. """ props = torch.tensor(props).float() gt_boxs = torch.tensor(gt_boxs).float() gt_frms = torch.tensor(gt_frms).float() ngt = len(gt_boxs) prop_frms = props[:, 4] frm_msk = prop_frms[:, None] == gt_frms if len(gt_boxs) > 0 and len(props) > 0: ious = box_iou(props[:, :4], gt_boxs) * frm_msk.float() ious_max, ious_arg_max = ious.max(dim=0) recall = (ious_max > 0.5).sum().float() out_props = props[ious_arg_max] else: self.cfg.no_gt_count += 1 ngt = 1 recall = 0 ious = torch.zeros(props.size(0), 1) out_props = props[0] nprop = ngt if save: prop_dim = prop_feats.size(-1) prop_feats = torch.tensor(prop_feats).float() out_prop_feats = prop_feats[ious_arg_max].view( 1, ngt, prop_dim).detach().cpu().numpy() assert list(out_prop_feats.shape[:2]) == [1, ngt] np.save(out_file, out_prop_feats) return { 'out_props': out_props, 'recall': recall, 'num_prop': nprop, 'num_gt': ngt }
def compute_one_srl(self, pred_cmp, pred_boxes_for_srl, pred_scores_for_srl, targ_cmp, gt_boxes_with_frames, gt_frames_all, cmp_msk): """ For sep pred_cmp: is the chosen video (1) targ_cmp: is the target video (1) pred_boxes_for_srl: predicted boxes for given srl (#nvids x #nframes x #1-prop(4)) """ # nvids = len(pred_boxes_for_srl) # nfrms = len(pred_boxes_for_srl[0]) if pred_cmp == targ_cmp: gt_frms = gt_boxes_with_frames[:, -1].long().tolist() pred_boxes = pred_boxes_for_srl[pred_cmp] pred_scores = pred_scores_for_srl[pred_cmp] for frm_idx_ind, frm_idx in enumerate(gt_frms): predicted_box = torch.tensor(pred_boxes[frm_idx][:4]) pbox = torch.tensor(pred_boxes[frm_idx]) groundtruth_box = gt_boxes_with_frames[frm_idx_ind][:4] prediction_score = pred_scores[frm_idx] assert gt_boxes_with_frames[frm_idx_ind][4] == frm_idx iou = box_iou( predicted_box.float(), groundtruth_box.float() ) # TODO: Check why prediction scores # are ridiculously low!! if iou > 0.5 and prediction_score > self.prob_thresh: return { 'targ_cmp': targ_cmp, 'pred_cmp': pred_cmp, 'predicted_box': predicted_box, 'pbox': pbox, 'gt_box': groundtruth_box, 'frm_idx': frm_idx, 'iou': iou } return { 'targ_cmp': targ_cmp, 'pred_cmp': pred_cmp, 'iou': torch.tensor(0) }
def compute_one_srl( self, pred_cmp_for_srl, pred_boxes_for_srl, pred_scores_for_srl, targ_cmp, gt_boxes_with_frames, gt_frames_all, cmp_msk ): """ For spatial targ_cmp: is the target video (1) pred_boxes_for_srl: predicted boxes for given srl (#nvids x #1-prop(4)) """ nfrms = len(pred_boxes_for_srl[0]) con_vid = -1 con_vid_score = 0 con_vid_scores = {} con_vid_boxes = {} # req_frms = list(set([frm.tolist() for frm1 in gt_frames_all # for frm in frm1])) req_frms = [i for i in range(nfrms)] con_outs = {nv: False for nv in req_frms} gt_frms = set(gt_boxes_with_frames[:, -1].long().tolist()) assert gt_frms.intersection( set(gt_frames_all[targ_cmp].tolist()) ) == gt_frms delta = torch.zeros(gt_boxes_with_frames.size(1)).long() delta[[0, 2]] = 720 gt_box_for_frms = {} for g in gt_boxes_with_frames: gfrm = g[4].item() if gfrm not in gt_box_for_frms: gt_box_for_frms[gfrm] = [] gt_box_for_frms[gfrm].append(g + delta * targ_cmp) for nf in req_frms: nv = pred_cmp_for_srl[nf] assert cmp_msk[nv] == 1 prediction_score = pred_scores_for_srl[nv][nf] pred_boxes = pred_boxes_for_srl[nv][nf] if nf in gt_frms: if nv == targ_cmp: predicted_box = torch.tensor( pred_boxes[:4] ) pbox = torch.tensor( pred_boxes ) assert nf in gt_box_for_frms groundtruth_boxes = gt_box_for_frms[nf] for groundtruth_box in groundtruth_boxes: assert groundtruth_box[4] == nf iou = box_iou( predicted_box.float(), groundtruth_box[:4].float() ) # TODO: Check why prediction scores # are ridiculously low!! if iou > 0.5 and prediction_score > self.prob_thresh: con_iou = iou con_box = predicted_box con_box_full = pbox con_gt = groundtruth_box con_frm = nf con_vid_score = prediction_score con_outs[nf] = True con_vid = nv else: corr = False con_outs[nf] = corr else: corr = True if nv != targ_cmp and prediction_score > self.prob_thresh: corr = False con_outs[nf] = corr # if not corr: con_vid_scores[nf] = prediction_score con_vid_boxes[nf] = pred_boxes if all(list(con_outs.values())): return { 'targ_cmp': targ_cmp, 'pred_cmp': con_vid, 'pred_score': con_vid_score, 'predicted_box': con_box, 'pbox': con_box_full, 'gt_box': con_gt, 'frm_idx': con_frm, 'iou': con_iou } con_vid_list = sorted( [(k, v, con_vid_boxes[k]) for k, v in con_vid_scores.items()], key=lambda x: x[1], reverse=True ) if len(con_vid_list) > 0: con_vid = -con_vid_list[0][0] con_vid_score = con_vid_list[0][1] con_vid_box = torch.tensor(con_vid_list[0][2]) else: con_vid = -5 con_vid_score = 0 con_vid_box = torch.tensor([0, 0, 0, 0, 0]) return { 'targ_cmp': targ_cmp, 'pred_cmp': con_vid, 'pred_score': con_vid_score, 'predicted_box': con_vid_box, 'gt_box': gt_box_for_frms, 'iou': torch.tensor(0) }
def compute_one_srl( self, pred_cmp, pred_boxes_for_srl, pred_scores_for_srl, targ_cmp, gt_boxes_with_frames, gt_frames_all, cmp_msk ): """ For sep pred_cmp: is the chosen video (1) targ_cmp: is the target video (1) pred_boxes_for_srl: predicted boxes for given srl (#nvids x #nframes x #1-prop(4)) """ nvids = len(pred_boxes_for_srl) assert len(cmp_msk) == nvids # nfrms = len(pred_boxes_for_srl[0]) # corr_outs = [False for _ in range(nvids)] con_outs = {nv: False for nv in range(nvids)} # con_boxs = {} # con_gts = {} # con_frms = {} # con_vid = {} con_vid = -1 con_vid_score = 0 con_vid_scores = {} for nv in range(nvids): if not cmp_msk[nv] == 1: con_outs[nv] = True assert [ps0 == 0. for ps0 in pred_scores_for_srl[nv]] continue pred_boxes = pred_boxes_for_srl[nv] pred_scores = pred_scores_for_srl[nv] if nv == targ_cmp: gt_frms = gt_boxes_with_frames[:, -1].long().tolist() assert set(gt_frms).intersection( set(gt_frames_all[nv].tolist()) ) == set(gt_frms) for frm_idx_ind, frm_idx in enumerate(gt_frms): predicted_box = torch.tensor(pred_boxes[frm_idx][:4]) pbox = torch.tensor(pred_boxes[frm_idx]) groundtruth_box = gt_boxes_with_frames[frm_idx_ind][:4] prediction_score = pred_scores[frm_idx] assert gt_boxes_with_frames[frm_idx_ind][4] == frm_idx iou = box_iou( predicted_box.float(), groundtruth_box.float() ) # TODO: Check why prediction scores # are ridiculously low!! if iou > 0.5 and prediction_score > self.prob_thresh: con_iou = iou con_box = predicted_box con_box_full = pbox con_gt = groundtruth_box con_frm = frm_idx con_vid_score = prediction_score con_outs[nv] = True con_vid = nv else: gt_frms = gt_frames_all[nv] # rfrms = [i for i in range( corr = True for frm_idx_ind, frm_idx in enumerate(gt_frms): prediction_score = pred_scores[frm_idx] if prediction_score > self.prob_thresh: corr = False break con_outs[nv] = corr if not corr: con_vid = nv con_vid_scores[nv] = prediction_score if all(list(con_outs.values())): return { 'targ_cmp': targ_cmp, 'pred_cmp': con_vid, 'pred_score': con_vid_score, 'predicted_box': con_box, 'pbox': con_box_full, 'gt_box': con_gt, 'frm_idx': con_frm, 'iou': con_iou } con_vid_list = sorted( [(k, v) for k, v in con_vid_scores.items()], key=lambda x: x[-1], reverse=True ) if len(con_vid_list) > 0: con_vid = con_vid_list[0][0] con_vid_score = con_vid_list[0][1] else: con_vid = -1 con_vid_score = 0 return { 'targ_cmp': targ_cmp, 'pred_cmp': con_vid, 'pred_score': con_vid_score, 'iou': torch.tensor(0) }
def eval_one_sent_idx(self, pred_row, gt_rows): assert len(gt_rows) == 1 # if len(gt_rows) == 1: gt_row = gt_rows.iloc[0] gt_row_ind = gt_row.name results_dict = {} tot_dict = {} considered_boxes = [] vid_seg = gt_row.vid_seg vid, seg = vid_seg.split('_segment_') seg = str(int(seg)) anet_ann_row = self.anet_annots[vid]['segments'][seg] all_gt_boxes = torch.tensor(anet_ann_row['bbox']) all_gt_frames = torch.tensor(anet_ann_row['frm_idx']) assert len(all_gt_boxes) == len(all_gt_frames) pred_boxes_for_verb = self.get_req_pred_from_row( pred_row, gt_row, gt_row_ind ) if pred_boxes_for_verb == -1: return -1 for srl_ind, ( srl_arg, srl_arg_box_indicator, srl_arg_box_ind ) in enumerate(gt_row.req_cls_pats_mask): if srl_arg_box_indicator == 1: if gt_row_ind not in results_dict: results_dict[gt_row_ind] = 0 if gt_row_ind not in tot_dict: tot_dict[gt_row_ind] = 0 tot_dict[gt_row_ind] += 1 if srl_ind >= len(pred_boxes_for_verb): continue box_inds = torch.tensor(srl_arg_box_ind) gt_boxes = torch.index_select(all_gt_boxes, 0, box_inds) frm_idxs = torch.index_select(all_gt_frames, 0, box_inds) pred_boxes = pred_boxes_for_verb[srl_ind] for frm_idx_ind, frm_idx in enumerate(frm_idxs): predicted_box = torch.tensor(pred_boxes[frm_idx][:4]) groundtruth_box = gt_boxes[frm_idx_ind] iou = box_iou(predicted_box.float(), groundtruth_box.float()) considered_boxes.append({ 'predicted_box': predicted_box, 'gt_box': groundtruth_box, 'frm_idx': frm_idx, 'srl_ind': srl_ind, 'iou': iou }) if iou > 0.5: results_dict[gt_row_ind] += 1 return { 'res_dict': results_dict, 'tot_dict': tot_dict, 'considered_boxes': considered_boxes }
def no_gt_prop10_one_vid_seg(self, props, prop_feats, gt_boxs, gt_frms, out_file, save=True): nfrms = 10 props = torch.tensor(props).float() prop_feats = torch.tensor(prop_feats).float() # gt_frms_dict = {} # for gfrm, gbox in zip(gt_frms, gt_boxs): # if gfrm not in gt_frms_dict: # gt_frms_dict[gfrm] = [] # gt_frms_dict[gfrm].append(gbox) gt_frms_set = set(gt_frms) gt_boxs = torch.tensor(gt_boxs).float() gt_frms = torch.tensor(gt_frms).float() ngt = len(gt_boxs) nppf = 100 fin_out_props = {} props1 = props.view(10, 100, 7) prop_dim = prop_feats.size(-1) prop_feats1 = prop_feats.view(10, 100, prop_dim) for frm in range(nfrms): if frm not in fin_out_props: fin_out_props[frm] = [] # if frm in gt_frms_set: # props_inds_gt_in_frm = out_props_inds[out_props[..., 4] == frm] # fin_out_props[frm] += props_inds_gt_in_frm.tolist() props_to_use_inds = props1[frm, ..., 6].argsort(descending=True)[:nppf] fin_out_props[frm] += props_to_use_inds.tolist() fin_out_props[frm] = list(OrderedDict.fromkeys( fin_out_props[frm]))[:nppf] props_output = torch.zeros(10, nppf, 7) prop_feats_output = torch.zeros(10, nppf, prop_dim) for frm in fin_out_props: inds = fin_out_props[frm] props_output[frm] = props1[frm][inds] prop_feats_output[frm] = prop_feats1[frm][inds] props_output = props_output.view(nfrms * nppf, 7) prop_feats_output = prop_feats_output.view( nfrms, nppf, prop_dim).detach().cpu().numpy() if len(gt_boxs) > 0 and len(props_output) > 0: prop_frms = props_output[:, 4] frm_msk = prop_frms[:, None] == gt_frms ious = box_iou(props_output[:, :4], gt_boxs) * frm_msk.float() ious_max, ious_arg_max = ious.max(dim=0) recall = (ious_max > 0.5).sum() else: self.cfg.no_gt_count += 1 ngt = 1 recall = 0 ious = torch.zeros(props.size(0), 1) props_output = props_output.detach().cpu().numpy() if save: np.save(out_file, prop_feats_output) return { 'out_props': props_output, 'recall': recall, 'num_prop': 100, 'num_gt': ngt }