def validation(epoch, device, model, dataset_folder, sample_duration, spatial_transform, temporal_transform, boxes_file, splt_txt_path, cls2idx, batch_size, n_threads): iou_thresh = 0.5 # Intersection Over Union thresh # iou_thresh = 0.1 # Intersection Over Union thresh data = Video_Dataset_small_clip(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file=boxes_file, split_txt_path=splt_txt_path, mode='test', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=16, shuffle=True, num_workers=0, pin_memory=True) # data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size*4, # shuffle=True, num_workers=0, pin_memory=True) model.eval() sgl_true_pos = 0 sgl_false_neg = 0 ## 2 rois : 1450 tubes_sum = 0 for step, data in enumerate(data_loader): # if step == 10: # break print('step :', step) clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) # for i in range(2): # print('gt_rois :',gt_rois[i,:n_actions[i]]) tubes, _, _, _, _, _, _, \ sgl_rois_bbox_pred, _ = model(clips, im_info, None, None, None) tubes_ = tubes.contiguous() n_tubes = len(tubes) tubes = tubes.view(-1, sample_duration * 4 + 2) tubes[:,1:-1] = tube_transform_inv(tubes[:,1:-1],\ sgl_rois_bbox_pred.view(-1,sample_duration*4),(1.0,1.0,1.0,1.0)) tubes = tubes.view(n_tubes, -1, sample_duration * 4 + 2) tubes[:, :, 1:-1] = clip_boxes(tubes[:, :, 1:-1], im_info, tubes.size(0)) # print('tubes[0]:',tubes.shape) # exit(-1) # print('tubes.cpu().numpy() :',tubes.cpu().numpy()) # exit(-1) # print('gt_rois_[:,0] :',gt_rois_[:,0]) for i in range(tubes.size(0)): # how many frames we have tubes_t = tubes[i, :, 1:-1].contiguous() gt_rois_t = gt_rois_[i, :, :, :4].contiguous().view( -1, sample_duration * 4) rois_overlaps = tube_overlaps(tubes_t, gt_rois_t) # rois_overlaps = Tube_Overlaps()(tubes_t,gt_rois_t) gt_max_overlaps_sgl, max_indices = torch.max(rois_overlaps, 0) non_empty_indices = gt_rois_t.ne(0).any(dim=1).nonzero().view(-1) n_elems = non_empty_indices.nelement() # print('non_empty_indices :',non_empty_indices) # if gt_tubes_r[i,0,5] - gt_tubes_r[i,0,2 ] < 12 and gt_tubes_r[i,0,5] - gt_tubes_r[i,0,2 ] > 0: # print('tubes_t.cpu().numpy() :',tubes_t[:5].detach().cpu().numpy()) # print('sgl_rois_bbox_pred.cpu().numpy() :',sgl_rois_bbox_pred[i,:5].detach().cpu().numpy()) # print('tubes_.detach.cpu().numpy() :',tubes_[i,:5].detach().cpu().numpy()) # print('gt_rubes_r[i] :',gt_tubes_r[i]) # exit(-1) if gt_max_overlaps_sgl[0] > 0.5 and gt_rois_t[0, -4:].sum() == 0: print('max_indices :', max_indices, max_indices.shape, gt_max_overlaps_sgl) print('tubes_t[max_indices[0]] :', tubes_t[max_indices[0]]) print('gt_rois_t[0] :', gt_rois_t[0]) gt_max_overlaps_sgl = torch.where( gt_max_overlaps_sgl > iou_thresh, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl[non_empty_indices].ne(0).sum() sgl_true_pos += sgl_detected sgl_false_neg += n_elems - sgl_detected # if step == 0: # break # # exit(-1) recall = float(sgl_true_pos) / (float(sgl_true_pos) + float(sgl_false_neg)) print(' -----------------------') print('| Validation Epoch: {: >3} | '.format(epoch + 1)) print('| |') print('| Proposed Action Tubes |') print('| |') print('| Single frame |') print('| |') print('| In {: >6} steps : |'.format(step)) print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos, sgl_false_neg, recall)) print(' -----------------------')
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] scores_3_4 = input[1][:, self._num_anchors:, :, :] scores_2 = input[2][:, self._num_anchors:, :, :] scores_4 = input[3][:, self._num_anchors:, :, :] bbox_frame = input[4] bbox_frame_3_4 = input[5] bbox_frame_2 = input[6] bbox_frame_4 = input[7] im_info = input[8] cfg_key = input[9] batch_size = bbox_frame.size(0) pre_nms_topN = conf[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = conf[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = conf[cfg_key].RPN_NMS_THRESH min_size = conf[cfg_key].RPN_MIN_SIZE ################## # Create anchors # ################## # print('batch_size :', batch_size) feat_time = scores.size(2) feat_time_3_4 = scores_3_4.size(2) feat_time_2 = scores_2.size(2) feat_time_4 = scores_4.size(2) feat_height, feat_width = scores.size(3), scores.size( 4) # (batch_size, 512/256, 7,7, 16/8) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack(( shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(), )).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._anchors.size(0) K = shifts.size(0) anchors = self._anchors.view(1, A, 4).type_as(shifts) + shifts.view( K, 1, 4) anchors = anchors.view(K * A, 4) bboxes = [bbox_frame, bbox_frame_3_4, bbox_frame_2, bbox_frame_4] anchors_all = [] bbox_frame_all = [] # for i in range(1,2): for i in range(len(self.time_dim)): for j in range(0, self.sample_duration - self.time_dim[i] + 1): anc = torch.zeros((self.sample_duration, anchors.size(0), 4)) bbox = torch.zeros( (batch_size, anchors.size(0), self.sample_duration, 4)) anc[j:j + self.time_dim[i]] = anchors anc = anc.permute(1, 0, 2) t = bboxes[i][:, :, j].permute(0, 2, 3, 1).contiguous().view( batch_size, anchors.size(0), self.time_dim[i], 4) bbox[:, :, j:j + self.time_dim[i], :] = t anchors_all.append(anc) bbox_frame_all.append(bbox) anchors_all = torch.stack(anchors_all, 0).type_as(scores) bbox_frame_all = torch.stack(bbox_frame_all, 1).type_as(scores) anchors_all = anchors_all.view(1, -1, self.sample_duration * 4) anchors_all = anchors_all.expand(batch_size, anchors_all.size(1), self.sample_duration * 4) bbox_frame_all = bbox_frame_all.view(batch_size, -1, self.sample_duration * 4) # print('bbox_frame_all.shape :',bbox_frame_all.shape) # print('bbox_frame[0,3000] :',bbox_frame_all[0,17500:18000].cpu().numpy()) # print('anchors_all.shape :',anchors_all.shape) # print('anchors_all[0,2000:3000,:12] :',anchors_all[0,2900:3000,:12].cpu().numpy()) # print('bbox_frame_all[0,2000:3000,:12] :',bbox_frame_all[0,2900:3000,:12].cpu().numpy()) # # Same story for the scores: scores = scores.permute(0, 2, 3, 4, 1).contiguous() scores = scores.view(batch_size, -1) scores_3_4 = scores_3_4.permute(0, 2, 3, 4, 1).contiguous() scores_3_4 = scores_3_4.view(batch_size, -1) scores_2 = scores_2.permute(0, 2, 3, 4, 1).contiguous() scores_2 = scores_2.view(batch_size, -1) scores_4 = scores_4.permute(0, 2, 3, 4, 1).contiguous() scores_4 = scores_4.view(batch_size, -1) scores_all = torch.cat([scores, scores_3_4, scores_2, scores_4], 1) # print('anchors_all[0,:150,:4] :',anchors_all[0,:150,:4].cpu().numpy()) # print('bbox_frame_all[0,:150,:4] :',bbox_frame_all[0,:150,:4].cpu().numpy()) # Convert anchors into proposals via bbox transformations # print('anchors_all.shape :',anchors_all.shape) # print('bbox_frame_all.shape :',bbox_frame_all.shape) proposals = bbox_transform_inv(anchors_all.contiguous().view(-1, self.sample_duration*4), \ bbox_frame_all.contiguous().view(-1, self.sample_duration*4), \ (1.0, 1.0, 1.0, 1.0)) # proposals have 441 * time_dim shape # 2. clip predicted boxes to image ## if any dimension exceeds the dims of the original image, clamp_ them proposals = proposals.view(batch_size, -1, self.sample_duration * 4) proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores_all proposals_keep = proposals _, order = torch.sort(scores_all, 1, True) output = scores.new(batch_size, post_nms_topN, self.sample_duration * 4 + 2).zero_() # print('output.shape :',output.shape) for i in range(batch_size): if cfg_key == 'TEST': # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # print('scores_single.shape :',scores_single.shape) # print('proposals_single.shape :',proposals_single.shape) # print('order[i].shape :',order[i].shape) # exit(-1) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # print('proposals_single :',proposals_single) # print('proposals_single[11] :',proposals_single[[0]]) # print('proposals_single[12] :',proposals_single[48]) # print('scores_single :',scores_single[11]) # print('scores_single :',scores_single[12]) # print('scores_single:',scores_single) # keep_idx_i = torch.Tensor(nms_cpu(torch.cat((proposals_single, scores_single), 1).cpu().numpy(), nms_thresh)).long() # print('keep_idx_i :',keep_idx_i.cpu().numpy(), keep_idx_i.nelement()) keep_idx_i = nms_gpu( torch.cat((proposals_single, scores_single), 1), nms_thresh).type_as(scores_single) keep_idx_i = keep_idx_i.long().view(-1) # print('keep_idx_i :',keep_idx_i.cpu().numpy(),keep_idx_i.nelement()) # exit(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] # print('proposal_single :',proposals_single[:,:4].cpu().numpy()) # exit(-1) scores_single = scores_single[keep_idx_i, :] # adding score at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:-1] = proposals_single output[i, :num_proposal, -1] = scores_single.squeeze() else: proposals_single = proposals_keep[i] scores_single = scores_keep[i] order_single = order[i] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) proposals_single = proposals_single[:post_nms_topN, :] scores_single = scores_single[:post_nms_topN] # adding score at the end. num_proposal = proposals_single.size(0) output[i, :num_proposal, 0] = i output[i, :num_proposal, 1:-1] = proposals_single output[i, :num_proposal, -1] = scores_single.squeeze() return output
def validation(epoch, device, model, dataset_folder, sample_duration, spatial_transform, temporal_transform, boxes_file, splt_txt_path, cls2idx, batch_size, n_threads): iou_thresh = 0.5 # Intersection Over Union thresh iou_thresh_4 = 0.4 # Intersection Over Union thresh iou_thresh_3 = 0.3 # Intersection Over Union thresh data = Video(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, json_file=boxes_file, split_txt_path=splt_txt_path, mode='test', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=4, shuffle=True, num_workers=0, pin_memory=True) model.eval() sgl_true_pos = 0 sgl_false_neg = 0 sgl_true_pos_4 = 0 sgl_false_neg_4 = 0 sgl_true_pos_3 = 0 sgl_false_neg_3 = 0 ## 2 rois : 1450 tubes_sum = 0 for step, data in enumerate(data_loader): # if step == 1: # break # print('step :',step) clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.float().to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) tubes, _, _, _, _, _, _, \ sgl_rois_bbox_pred, _ = model(clips, im_info, None, None, None) n_tubes = len(tubes) tubes = tubes.view(-1, sample_duration * 4 + 2) tubes[:,1:-1] = tube_transform_inv(tubes[:,1:-1],\ sgl_rois_bbox_pred.view(-1,sample_duration*4),(1.0,1.0,1.0,1.0)) tubes = tubes.view(n_tubes, -1, sample_duration * 4 + 2) tubes[:, :, 1:-1] = clip_boxes(tubes[:, :, 1:-1], im_info, tubes.size(0)) for i in range(tubes.size(0)): # how many frames we have tubes_t = tubes[i, :, 1:-1].contiguous() gt_rois_t = gt_rois_[i, :, :, :4].contiguous().view( -1, sample_duration * 4) rois_overlaps = tube_overlaps(tubes_t, gt_rois_t) gt_max_overlaps_sgl, _ = torch.max(rois_overlaps, 0) non_empty_indices = gt_rois_t.ne(0).any(dim=1).nonzero().view(-1) n_elems = non_empty_indices.nelement() # 0.5 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl_[non_empty_indices].ne(0).sum() sgl_true_pos += sgl_detected sgl_false_neg += n_elems - sgl_detected # 0.4 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh_4, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl_.ne(0).sum() sgl_true_pos_4 += sgl_detected sgl_false_neg_4 += n_elems - sgl_detected # 0.3 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh_3, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl_.ne(0).sum() sgl_true_pos_3 += sgl_detected sgl_false_neg_3 += n_elems - sgl_detected recall = float(sgl_true_pos) / (float(sgl_true_pos) + float(sgl_false_neg)) recall_4 = float(sgl_true_pos_4) / (float(sgl_true_pos_4) + float(sgl_false_neg_4)) recall_3 = float(sgl_true_pos_3) / (float(sgl_true_pos_3) + float(sgl_false_neg_3)) f = open('../images_etc/recall_jhmdb.txt', 'a') f.write('| Validation Epoch: {: >3} |\n'.format(epoch + 1)) f.write('| Threshold : 0.5 |\n') f.write( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |\n' .format(sgl_true_pos, sgl_false_neg, recall)) f.write('| Threshold : 0.4 |\n') f.write( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |\n' .format(sgl_true_pos_4, sgl_false_neg_4, recall_4)) f.write('| Threshold : 0.3 |\n') f.write( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |\n' .format(sgl_true_pos_3, sgl_false_neg_3, recall_3)) f.close() print(' -----------------------') print('| Validation Epoch: {: >3} | '.format(epoch + 1)) print('| |') print('| Proposed Action Tubes |') print('| |') print('| Single frame |') print('| |') print('| In {: >6} steps : |'.format(step)) print('| |') print('| Threshold : 0.5 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos, sgl_false_neg, recall)) print('| |') print('| Threshold : 0.4 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos_4, sgl_false_neg_4, recall_4)) print('| |') print('| Threshold : 0.3 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos_3, sgl_false_neg_3, recall_3)) print(' -----------------------')
def forward(self, n_devs, dataset_folder, vid_names, clips, vid_id, boxes, mode, cls2idx, num_actions, num_frames, h_, w_): ''' TODO describe procedure ''' # print('boxes.shape :',boxes.shape) ## define a dataloader for the whole video # print('----------Inside----------') # print('num_frames :',num_frames) # print('clips.shape :',clips.shape) clips = clips.squeeze(0) ret_n_frames = clips.size(0) clips = clips[:num_frames] # print('num_frames :',num_frames) # print('clips.shape :',clips.shape) # exit(-1) if self.training: boxes = boxes.squeeze(0).permute(1, 0, 2).cpu() boxes = boxes[:num_frames, :num_actions].clamp_(min=0) batch_size = 4 # # batch_size = 2 # # batch_size = 16 # num_images = 1 rois_per_image = int(conf.TRAIN.BATCH_SIZE / num_images) if self.training else 150 data = single_video(dataset_folder, h_, w_, vid_names, vid_id, frames_dur=self.sample_duration, sample_size=self.sample_size, classes_idx=cls2idx, n_frames=num_frames) data_loader = torch.utils.data.DataLoader( data, batch_size=batch_size, pin_memory=False, # num_workers=num_workers, pin_memory=True, # shuffle=False, num_workers=8) shuffle=False) n_clips = data.__len__() features = torch.zeros(n_clips, rois_per_image, self.p_feat_size, self.sample_duration).type_as(clips) p_tubes = torch.zeros(n_clips, rois_per_image, self.sample_duration * 4).type_as(clips) # all the proposed tube-rois actioness_score = torch.zeros(n_clips, rois_per_image).type_as(clips) overlaps_scores = torch.zeros(n_clips, rois_per_image, rois_per_image).type_as(clips) f_tubes = [] # # # overlaps_scores = torch.zeros(n_clips, rois_per_image, rois_per_image).type_as(overlaps_scores) if self.training: f_gt_tubes = torch.zeros(n_clips, num_actions, self.sample_duration * 4) # gt_tubes tubes_labels = torch.zeros(n_clips, rois_per_image) # tubes rois loops = int(np.ceil(n_clips / batch_size)) labels = torch.zeros(num_actions) for i in range(num_actions): idx = boxes[:, i, 4].nonzero().view(-1) labels[i] = boxes[idx[0], i, 4] ## Init connect thresh self.calc.thresh = self.connection_thresh for step, dt in enumerate(data_loader): frame_indices, im_info, start_fr = dt clips_ = clips[frame_indices].cuda() if self.training: boxes_ = boxes[frame_indices].cuda() box_ = boxes_.permute(0, 2, 1, 3).float().contiguous()[:, :, :, :-1] else: box_ = None im_info = im_info.cuda() start_fr = start_fr.cuda() with torch.no_grad(): tubes, pooled_feat, \ rpn_loss_cls, rpn_loss_bbox, \ _,_, rois_label, \ sgl_rois_bbox_pred, sgl_rois_bbox_loss = self.act_net(clips_.permute(0,2,1,3,4), im_info, None, box_, start_fr) pooled_feat = pooled_feat.mean(-1).mean(-1) pooled_feat = pooled_feat.view(-1, rois_per_image, self.p_feat_size, self.sample_duration) # regression n_tubes = len(tubes) if not self.training: tubes = tubes.view(-1, self.sample_duration * 4 + 2) tubes[:,1:-1] = tube_transform_inv(tubes[:,1:-1],\ sgl_rois_bbox_pred.view(-1,self.sample_duration*4),(1.0,1.0,1.0,1.0)) tubes = tubes.view(n_tubes, rois_per_image, self.sample_duration * 4 + 2) tubes[:, :, 1:-1] = clip_boxes(tubes[:, :, 1:-1], im_info, tubes.size(0)) indexes_ = (torch.arange(0, tubes.size(0)) * int(self.sample_duration / 2) + start_fr[0].cpu()).unsqueeze(1) indexes_ = indexes_.expand(tubes.size(0), tubes.size(1)).type_as(tubes) idx_s = step * batch_size idx_e = min(step * batch_size + batch_size, n_clips) features[idx_s:idx_e] = pooled_feat p_tubes[idx_s:idx_e, ] = tubes[:, :, 1:-1] actioness_score[idx_s:idx_e] = tubes[:, :, -1] if self.training: box = boxes_.permute(0, 2, 1, 3).contiguous()[:, :, :, :-2] box = box.contiguous().view(box.size(0), box.size(1), -1) f_gt_tubes[idx_s:idx_e] = box # connection algo for i in range(idx_s, idx_e): if i == 0: # Init tensors for connecting offset = torch.arange(0, rois_per_image).int().cuda() ones_t = torch.ones(rois_per_image).int().cuda() zeros_t = torch.zeros(rois_per_image, n_clips, 2).int().cuda() - 1 pos = torch.zeros(rois_per_image, n_clips, 2).int().cuda() - 1 # initial pos pos[:, 0, 0] = 0 pos[:, 0, 1] = offset.contiguous( ) # contains the current tubes to be connected pos_indices = torch.zeros(rois_per_image).int().cuda( ) # contains the pos of the last element of the previous tensor actioness_scr = actioness_score[0].float().cuda( ) # actioness sum of active tubes overlaps_scr = torch.zeros(rois_per_image).float().cuda( ) # overlaps sum of active tubes final_scores = torch.Tensor().float().cuda( ) # final scores final_poss = torch.Tensor().int().cuda() # final tubes continue overlaps_ = tube_overlaps( p_tubes[i - 1, :, int(self.sample_duration * 4 / 2):], p_tubes[i, :, :int(self.sample_duration * 4 / 2)]).type_as(p_tubes) pos, pos_indices, \ f_scores, actioness_scr, \ overlaps_scr = self.calc(torch.Tensor([n_clips]),torch.Tensor([rois_per_image]),torch.Tensor([pos.size(0)]), pos, pos_indices, actioness_scr, overlaps_scr, overlaps_, actioness_score[i], torch.Tensor([i])) if pos.size(0) > self.update_thresh: final_scores, final_poss, pos , pos_indices, \ actioness_scr, overlaps_scr, f_scores = self.calc.update_scores(final_scores,final_poss, f_scores, pos, pos_indices, actioness_scr, overlaps_scr) if f_scores.dim() == 0: f_scores = f_scores.unsqueeze(0) pos = pos.unsqueeze(0) pos_indices = pos_indices.unsqueeze(0) actioness_scr = actioness_scr.unsqueeze(0) overlaps_scr = overlaps_scr.unsqueeze(0) if final_scores.dim() == 0: final_scores = final_scores.unsqueeze(0) final_poss = final_poss.unsqueeze(0) try: final_scores = torch.cat((final_scores, f_scores)) except: print('final_scores :', final_scores) print('final_scores.shape :', final_scores.shape) print('final_scores.dim() :', final_scores.dim()) print('f_scores :', f_scores) print('f_scores.shape :', f_scores.shape) print('f_scores.dim() :', f_scores.dim()) exit(-1) try: final_poss = torch.cat((final_poss, pos)) except: print('final_poss :', final_poss) print('final_poss.shape :', final_poss.shape) print('final_poss.dim() :', final_poss.dim()) print('pos :', pos) print('pos.shape :', pos.shape) print('pos.dim() :', pos.dim()) exit(-1) # add new tubes pos = torch.cat((pos, zeros_t)) pos[-rois_per_image:, 0, 0] = ones_t * i pos[-rois_per_image:, 0, 1] = offset pos_indices = torch.cat( (pos_indices, torch.zeros( (rois_per_image)).type_as(pos_indices))) actioness_scr = torch.cat((actioness_scr, actioness_score[i])) overlaps_scr = torch.cat( (overlaps_scr, torch.zeros( (rois_per_image)).type_as(overlaps_scr))) ## add only last layers ## TODO check again indices = actioness_score[-1].ge(self.calc.thresh).nonzero().view(-1) if indices.nelement() > 0: zeros_t[:, 0, 0] = idx_e - 1 zeros_t[:, 0, 1] = offset final_poss = torch.cat([final_poss, zeros_t[indices]]) if pos.size(0) > self.update_thresh: print('Updating thresh...', final_scores.shape, final_poss.shape, pos.shape, f_scores.shape, pos_indices.shape) final_scores, final_poss, pos , pos_indices, \ actioness_scr, overlaps_scr, f_scores = self.calc.update_scores(final_scores,final_poss, f_scores, pos, pos_indices, actioness_scr, overlaps_scr) print('Updating thresh...', final_scores.shape, final_poss.shape, pos.shape, f_scores.shape, pos_indices.shape) final_tubes = torch.zeros(final_poss.size(0), num_frames, 4) f_tubes = [] for i in range(final_poss.size(0)): tub = [] for j in range(final_poss.size(1)): curr_ = final_poss[i, j] start_fr = curr_[0] * int(self.sample_duration / 2) end_fr = min((curr_[0] * int(self.sample_duration / 2) + self.sample_duration).type_as(num_frames), num_frames).type_as(start_fr) if curr_[0] == -1: break curr_frames = p_tubes[curr_[0], curr_[1]] tub.append((curr_[0].item(), curr_[1].item())) ## TODO change with avg final_tubes[i, start_fr:end_fr] = torch.max( curr_frames.view(-1, 4).contiguous()[:(end_fr - start_fr).long()], final_tubes[i, start_fr:end_fr].type_as(curr_frames)) f_tubes.append(tub) ################################################### # Choose gth Tubes for RCNN\TCN # ################################################### if self.training: # # get gt tubes and feats ## calculate overlaps boxes_ = boxes.permute(1, 0, 2).contiguous() boxes_ = boxes_[:, :, :4].contiguous().view(num_actions, -1) if final_tubes.nelement() == 0: print('problem final_tubes ...') print('boxes :', boxes.cpu().numpy()) print('boxes_ :', boxes_) print('boxes_.shape :', boxes_.shape) print('final_tubes :', final_tubes) print('self.calc.thresh:', self.calc.thresh) print('final_scores :', final_scores.shape) print('final_pos.shape :', final_poss.shape) if final_tubes.nelement() > 0: overlaps = tube_overlaps(final_tubes.view(-1, num_frames * 4), boxes_.type_as(final_tubes)) max_overlaps, _ = torch.max(overlaps, 1) max_overlaps = max_overlaps.clamp_(min=0) ## TODO change numbers bg_tubes_indices = max_overlaps.lt(0.3).nonzero() if bg_tubes_indices.nelement() > 0: bg_tubes_indices_picked = ( torch.rand(9) * bg_tubes_indices.size(0)).long() bg_tubes_list = [ f_tubes[i] for i in bg_tubes_indices[bg_tubes_indices_picked] ] bg_labels = torch.zeros(len(bg_tubes_list)) else: bg_tubes_list = [] bg_labels = torch.Tensor([]) else: bg_tubes_list = [] bg_labels = torch.Tensor([]) gt_tubes_list = [[] for i in range(num_actions)] # print('n_clips :',n_clips) for i in range(n_clips): # print('i :',i) # print('p_tubes.shape :',p_tubes.shape) # print('f_gt_tubes.shape :',f_gt_tubes.shape) # print('p_tubes.shape :',p_tubes[i]) # print('f_gt_tubes.shape :',f_gt_tubes[i]) overlaps = tube_overlaps(p_tubes[i], f_gt_tubes[i].type_as(p_tubes)) # print('overlaps :',overlaps) max_overlaps, argmax_overlaps = torch.max(overlaps, 0) for j in range(num_actions): if max_overlaps[j] == 1.0: gt_tubes_list[j].append((i, j)) gt_tubes_list = [i for i in gt_tubes_list if i != []] if len(gt_tubes_list) != num_actions: print('len(gt_tubes_list :', len(gt_tubes_list)) print('num_actions :', num_actions) print('boxes.cpu().numpy() :', boxes.cpu().numpy()) # print('gt_tubes_list :',gt_tubes_list) ## concate fb, bg tubes if gt_tubes_list == [[]]: print('overlaps :', overlaps) print('max_overlaps :', max_overlaps) print('p_tubes :', p_tubes) print('f_gt_tubes :', f_gt_tubes) exit(-1) if bg_tubes_list != []: f_tubes = gt_tubes_list + bg_tubes_list target_lbl = torch.cat([labels, bg_labels], dim=0) else: f_tubes = gt_tubes_list target_lbl = labels # print('num_frames :',num_frames) # print('gt_tubes_list :',gt_tubes_list, ' labels :',labels) # print('f_tubes :',f_tubes, ' target_lbl :',target_lbl) ############################################## if len(f_tubes) == 0: print('------------------') print(' empty tube ') print(' vid_id :', vid_id) print('self.calc.thresh :', self.calc.thresh) return torch.Tensor([]).cuda(), torch.Tensor([]).cuda(), None max_seq = reduce(lambda x, y: y if len(y) > len(x) else x, f_tubes) max_length = len(max_seq) ## calculate input rois ## f_feats.shape : [#f_tubes, max_length, 512] # final_video_tubes = torch.zeros(len(f_tubes),6).cuda() prob_out = torch.zeros(len(f_tubes), self.n_classes).cuda() # final_feats = [] f_feats = torch.zeros(len(f_tubes), n_clips, 64, self.sample_duration).type_as(features) - 1 f_feats_len = torch.zeros(len(f_tubes)).type_as(features) - 1 for i in range(len(f_tubes)): seq = f_tubes[i] # tmp_tube = torch.Tensor(len(seq),6) # feats = torch.Tensor(len(seq),self.p_feat_size) feats = torch.Tensor(len(seq), self.p_feat_size, self.sample_duration) for j in range(len(seq)): # feats[j] = features[seq[j][0],seq[j][1]].mean(1) feats[j] = features[seq[j][0], seq[j][1]] # tmp_tube[j] = p_tubes[seq[j]][1:7] f_feats_len[i] = len(seq) f_feats[i, :len(seq)] = feats prob_out[i] = self.act_rnn( feats.mean(0).view(1, -1).contiguous().cuda()) # # feats = torch.mean(feats, dim=0) # if mode == 'extract': # final_feats.append(feats) # try: # prob_out[i] = self.act_rnn(feats.view(-1).cuda()) # except Exception as e: # print('feats.shape :',feats.shape) # print('seq :',seq) # for i in range(len(f_tubes)): # print('seq[i] :',f_tubes[i]) # print('e :',e) # exit(-1) # if prob_out[i,0] != prob_out[i,0]: # print(' prob_out :', prob_out ,' feats :',feats.cpu().numpy(), ' numpy(), feats.shape :,', feats.shape ,' target_lbl :',target_lbl, \ # ' \ntmp_tube :',tmp_tube, ) # exit(-1) if mode == 'extract': # now we use mean so we can have a tensor containing all features # final_tubes = final_tubes.cuda() target_lbl = target_lbl.cuda() # max_length = torch.Tensor([max_length]).cuda() return f_feats, target_lbl, f_feats_len # ########################################## # # Time for Linear Loss # # ########################################## cls_loss = torch.Tensor([0]).cuda() final_tubes = final_tubes.type_as(final_poss) # # classification probability if self.training: cls_loss = F.cross_entropy(prob_out.cpu(), target_lbl.long()).cuda() if self.training: return None, prob_out, cls_loss, else: # init padding tubes because of multi-GPU system if final_tubes.size(0) > conf.UPDATE_THRESH: _, indices = torch.sort(final_scores) final_tubes = final_tubes[ indices[:conf.UPDATE_THRESH]].contiguous() prob_out = prob_out[indices[:conf.UPDATE_THRESH]].contiguous() max_prob_out, _ = torch.max(prob_out, 1) f_tubes = torch.cat([ final_tubes.view(-1, num_frames * 4), max_prob_out.view(-1, 1).type_as(final_tubes) ], dim=1) keep = torch.Tensor(py_cpu_nms_tubes(f_tubes.float(), 0.5)).long() final_tubes = final_tubes[keep] prob_out = prob_out[keep] ret_tubes = torch.zeros(1, conf.UPDATE_THRESH, ret_n_frames, 4).type_as(final_tubes).float() - 1 ret_prob_out = torch.zeros( 1, conf.UPDATE_THRESH, self.n_classes).type_as(final_tubes).float() - 1 ret_tubes[0, :final_tubes.size(0), :num_frames] = final_tubes ret_prob_out[0, :final_tubes.size(0)] = prob_out return ret_tubes, ret_prob_out, torch.Tensor([final_tubes.size(0) ]).cuda()