def training(epoch, device, model, dataset_folder, sample_duration, spatial_transform, temporal_transform, boxes_file, splt_txt_path, cls2idx, batch_size, n_threads, lr, mode = 1): data = Video_Dataset_small_clip(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file= boxes_file, split_txt_path=splt_txt_path, mode='train', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size*16, shuffle=True, num_workers=32, pin_memory=True) # data_loader = torch.utils.data.DataLoader(data, batch_size=2, # shuffle=True, num_workers=0, pin_memory=True) model.train() loss_temp = 0 ## 2 rois : 1450 for step, data in enumerate(data_loader): # if step == 2: # break clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) inputs = Variable(clips_) tubes, _, \ rpn_loss_cls, rpn_loss_bbox, \ rpn_loss_cls_16,\ rpn_loss_bbox_16, rois_label, \ sgl_rois_bbox_pred, sgl_rois_bbox_loss, = model(inputs, \ im_info_, gt_tubes_r_, gt_rois_, start_fr) if mode == 3: loss = sgl_rois_bbox_loss.mean() elif mode == 4: loss = rpn_loss_cls.mean() + rpn_loss_bbox.mean() elif mode == 5: loss = rpn_loss_cls.mean() + rpn_loss_bbox.mean() + sgl_rois_bbox_loss.mean() loss_temp += loss.item() # backw\ard optimizer.zero_grad() loss.backward() optimizer.step() print('Train Epoch: {} \tLoss: {:.6f}\t lr : {:.6f}'.format( epoch+1,loss_temp/(step+1), lr)) return model, loss_temp
def validation(epoch, device, model, data_loader, n_threads): iou_thresh = 0.5 # Intersection Over Union thresh iou_thresh_4 = 0.4 # Intersection Over Union thresh iou_thresh_3 = 0.3 # Intersection Over Union thresh data = Video_Dataset_small_clip(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file=boxes_file, split_txt_path=splt_txt_path, mode='test', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=4, shuffle=True, num_workers=0, pin_memory=True) model.eval() sgl_true_pos = 0 sgl_false_neg = 0 sgl_true_pos_4 = 0 sgl_false_neg_4 = 0 sgl_true_pos_3 = 0 sgl_false_neg_3 = 0 ## 2 rois : 1450 tubes_sum = 0 for step, data in enumerate(data_loader): # if step == 2: # break # print('step :',step) clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) tubes, _, _, _, _, _, _, \ sgl_rois_bbox_pred, _ = model(clips, im_info, None, None, None) batch_size = len(tubes) tubes = tubes.view(-1, sample_duration * 4 + 2) tubes[:,1:-1] = tube_transform_inv(tubes[:,1:-1],\ sgl_rois_bbox_pred.view(-1,sample_duration*4),(1.0,1.0,1.0,1.0)) tubes = tubes.view(batch_size, -1, sample_duration * 4 + 2) for i in range(tubes.size(0)): # how many frames we have tubes_t = tubes[i, :, 1:-1].contiguous() gt_rois_t = gt_rois_[i, :, :, :4].contiguous().view( -1, sample_duration * 4) rois_overlaps = tube_overlaps(tubes_t, gt_rois_t) gt_max_overlaps_sgl, _ = torch.max(rois_overlaps, 0) n_elems = gt_tubes_r[i, :, -1].ne(0).sum().item() # 0.5 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) # print('gt_max_overlaps_sgl_.shape :',gt_max_overlaps_sgl_.shape) # print('gt_max_overlaps_sgl_.shape :',gt_max_overlaps_sgl_) sgl_detected = gt_max_overlaps_sgl_.ne(0).sum() sgl_true_pos += sgl_detected sgl_false_neg += n_elems - sgl_detected # print('sgl_detected :',sgl_detected) # print('sgl_detected :',sgl_true_pos) # print('sgl_detected :',sgl_false_neg) # 0.4 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh_4, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl_.ne(0).sum() sgl_true_pos_4 += sgl_detected sgl_false_neg_4 += n_elems - sgl_detected # print('sgl_detected :',sgl_detected) # print('sgl_detected :',sgl_true_pos) # print('sgl_detected :',sgl_false_neg) # 0.3 gt_max_overlaps_sgl_ = torch.where( gt_max_overlaps_sgl > iou_thresh_3, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl_.ne(0).sum() sgl_true_pos_3 += sgl_detected sgl_false_neg_3 += n_elems - sgl_detected # print('sgl_true_pos :',sgl_true_pos) recall = float(sgl_true_pos) / (float(sgl_true_pos) + float(sgl_false_neg)) recall_4 = float(sgl_true_pos_4) / (float(sgl_true_pos_4) + float(sgl_false_neg_4)) recall_3 = float(sgl_true_pos_3) / (float(sgl_true_pos_3) + float(sgl_false_neg_3)) print(' -----------------------') print('| Validation Epoch: {: >3} | '.format(epoch + 1)) print('| |') print('| Proposed Action Tubes |') print('| |') print('| Single frame |') print('| |') print('| In {: >6} steps : |'.format(step)) print('| |') print('| Threshold : 0.5 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos, sgl_false_neg, recall)) print('| |') print('| Threshold : 0.4 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos_4, sgl_false_neg_4, recall_4)) print('| |') print('| Threshold : 0.3 |') print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos_3, sgl_false_neg_3, recall_3)) print(' -----------------------')
n_classes = len(actions) ####################################################### # Part 1-1 - train nTPN - without reg # ####################################################### print(' -----------------------------------------------------') print('| Part 1-1 - train TPN - without reg |') print(' -----------------------------------------------------') ## Define Dataloaders train_data = Video_Dataset_small_clip( video_path=dataset_frames, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file=boxes_file, split_txt_path=split_txt_path, mode='train', classes_idx=cls2idx) train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) # Init action_net act_model = ACT_net(actions, sample_duration, device=device) act_model.create_architecture(model_path=model_path)
def validation(epoch, device, model, dataset_folder, sample_duration, spatial_transform, temporal_transform, boxes_file, splt_txt_path, cls2idx, batch_size, n_threads): iou_thresh = 0.5 # Intersection Over Union thresh # iou_thresh = 0.1 # Intersection Over Union thresh data = Video_Dataset_small_clip(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file=boxes_file, split_txt_path=splt_txt_path, mode='test', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=16, shuffle=True, num_workers=0, pin_memory=True) # data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size*4, # shuffle=True, num_workers=0, pin_memory=True) model.eval() sgl_true_pos = 0 sgl_false_neg = 0 ## 2 rois : 1450 tubes_sum = 0 for step, data in enumerate(data_loader): # if step == 10: # break print('step :', step) clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) # for i in range(2): # print('gt_rois :',gt_rois[i,:n_actions[i]]) tubes, _, _, _, _, _, _, \ sgl_rois_bbox_pred, _ = model(clips, im_info, None, None, None) tubes_ = tubes.contiguous() n_tubes = len(tubes) tubes = tubes.view(-1, sample_duration * 4 + 2) tubes[:,1:-1] = tube_transform_inv(tubes[:,1:-1],\ sgl_rois_bbox_pred.view(-1,sample_duration*4),(1.0,1.0,1.0,1.0)) tubes = tubes.view(n_tubes, -1, sample_duration * 4 + 2) tubes[:, :, 1:-1] = clip_boxes(tubes[:, :, 1:-1], im_info, tubes.size(0)) # print('tubes[0]:',tubes.shape) # exit(-1) # print('tubes.cpu().numpy() :',tubes.cpu().numpy()) # exit(-1) # print('gt_rois_[:,0] :',gt_rois_[:,0]) for i in range(tubes.size(0)): # how many frames we have tubes_t = tubes[i, :, 1:-1].contiguous() gt_rois_t = gt_rois_[i, :, :, :4].contiguous().view( -1, sample_duration * 4) rois_overlaps = tube_overlaps(tubes_t, gt_rois_t) # rois_overlaps = Tube_Overlaps()(tubes_t,gt_rois_t) gt_max_overlaps_sgl, max_indices = torch.max(rois_overlaps, 0) non_empty_indices = gt_rois_t.ne(0).any(dim=1).nonzero().view(-1) n_elems = non_empty_indices.nelement() # print('non_empty_indices :',non_empty_indices) # if gt_tubes_r[i,0,5] - gt_tubes_r[i,0,2 ] < 12 and gt_tubes_r[i,0,5] - gt_tubes_r[i,0,2 ] > 0: # print('tubes_t.cpu().numpy() :',tubes_t[:5].detach().cpu().numpy()) # print('sgl_rois_bbox_pred.cpu().numpy() :',sgl_rois_bbox_pred[i,:5].detach().cpu().numpy()) # print('tubes_.detach.cpu().numpy() :',tubes_[i,:5].detach().cpu().numpy()) # print('gt_rubes_r[i] :',gt_tubes_r[i]) # exit(-1) if gt_max_overlaps_sgl[0] > 0.5 and gt_rois_t[0, -4:].sum() == 0: print('max_indices :', max_indices, max_indices.shape, gt_max_overlaps_sgl) print('tubes_t[max_indices[0]] :', tubes_t[max_indices[0]]) print('gt_rois_t[0] :', gt_rois_t[0]) gt_max_overlaps_sgl = torch.where( gt_max_overlaps_sgl > iou_thresh, gt_max_overlaps_sgl, torch.zeros_like(gt_max_overlaps_sgl).type_as( gt_max_overlaps_sgl)) sgl_detected = gt_max_overlaps_sgl[non_empty_indices].ne(0).sum() sgl_true_pos += sgl_detected sgl_false_neg += n_elems - sgl_detected # if step == 0: # break # # exit(-1) recall = float(sgl_true_pos) / (float(sgl_true_pos) + float(sgl_false_neg)) print(' -----------------------') print('| Validation Epoch: {: >3} | '.format(epoch + 1)) print('| |') print('| Proposed Action Tubes |') print('| |') print('| Single frame |') print('| |') print('| In {: >6} steps : |'.format(step)) print('| |') print( '| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(sgl_true_pos, sgl_false_neg, recall)) print(' -----------------------')
def validation(epoch, device, model, dataset_folder, sample_duration, spatial_transform, temporal_transform, boxes_file, splt_txt_path, cls2idx, batch_size, n_threads): iou_thresh = 0.5 # Intersection Over Union thresh data = Video_Dataset_small_clip(dataset_folder, frames_dur=sample_duration, spatial_transform=spatial_transform, temporal_transform=temporal_transform, bboxes_file=boxes_file, split_txt_path=splt_txt_path, mode='test', classes_idx=cls2idx) data_loader = torch.utils.data.DataLoader(data, batch_size=2, shuffle=True, num_workers=0, pin_memory=True) model.eval() true_pos = 0 false_neg = 0 true_pos_xy = 0 false_neg_xy = 0 true_pos_t = 0 false_neg_t = 0 sgl_true_pos = 0 sgl_false_neg = 0 ## 2 rois : 1450 tubes_sum = 0 for step, data in enumerate(data_loader): # if step == 2: # break print('step :', step) clips, h, w, gt_tubes_r, gt_rois, n_actions, n_frames, im_info = data clips_ = clips.to(device) gt_tubes_r_ = gt_tubes_r.to(device) gt_rois_ = gt_rois.to(device) n_actions_ = n_actions.to(device) im_info_ = im_info.to(device) start_fr = torch.zeros(clips_.size(0)).to(device) # for i in range(2): # print('gt_rois :',gt_rois[i,:n_actions[i]]) tubes, bbox_pred, _, _, _, _, _, _, _, _, sgl_rois_bbox_pred, _ = model( clips, im_info, None, None, None) n_tubes = len(tubes) # init tensor for final frames for i in range(tubes.size(0)): # how many frames we have # calculate single frame overlaps tubes_t = tubes[i] gt_tub = gt_tubes_r[i] non_empty = gt_tub.sum(1).nonzero() if non_empty.nelement() == 0: continue non_empty = non_empty.view(-1) gt_tub = gt_tub[non_empty] overlaps, overlaps_xy, overlaps_t = bbox_overlaps_batch_3d( tubes_t, gt_tub.unsqueeze(0).type_as( tubes_t)) # check one video each time ## for the whole tube gt_max_overlaps, _ = torch.max(overlaps, 1) gt_max_overlaps = torch.where( gt_max_overlaps > iou_thresh, gt_max_overlaps, torch.zeros_like(gt_max_overlaps).type_as(gt_max_overlaps)) detected = gt_max_overlaps.ne(0).sum() n_elements = gt_max_overlaps.nelement() true_pos += detected false_neg += n_elements - detected # ## for xy - area # gt_max_overlaps_xy, _ = torch.max(overlaps_xy, 1) # gt_max_overlaps_xy = torch.where(gt_max_overlaps_xy > iou_thresh, gt_max_overlaps_xy, torch.zeros_like(gt_max_overlaps_xy).type_as(gt_max_overlaps_xy)) # detected_xy = gt_max_overlaps_xy.ne(0).sum() # n_elements_xy = gt_max_overlaps_xy.nelement() # true_pos_xy += detected_xy # false_neg_xy += n_elements_xy - detected_xy # ## for t - area # gt_max_overlaps_t, _ = torch.max(overlaps_t, 1) # gt_max_overlaps_t = torch.where(gt_max_overlaps_t > iou_thresh, gt_max_overlaps_t, torch.zeros_like(gt_max_overlaps_t).type_as(gt_max_overlaps_t)) # detected_t = gt_max_overlaps_t.ne(0).sum() # n_elements_t = gt_max_overlaps_t.nelement() # true_pos_t += detected_t # false_neg_t += n_elements_t - detected_t tubes_sum += 1 recall = float(true_pos) / (float(true_pos) + float(false_neg)) # recall_xy = float(true_pos_xy) / (float(true_pos_xy) + float(false_neg_xy)) # recall_t = float(true_pos_t) / (float(true_pos_t) + float(false_neg_t)) # sgl_recall = float(sgl_true_pos) / (float(sgl_true_pos) + float(sgl_false_neg)) print(' -----------------------') print('| Validation Epoch: {: >3} | '.format(epoch + 1)) print('| |') print('| Proposed Action Tubes |') print('| |') print( '| In {: >6} steps : |\n| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |' .format(step, true_pos, false_neg, recall)) # print('| |') # print('| In xy area |') # print('| |') # print('| In {: >6} steps : |\n| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |'.format( # step, true_pos_xy, false_neg_xy, recall_xy)) # print('| |') # print('| In time area |') # print('| |') # print('| In {: >6} steps : |\n| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |'.format( # step, true_pos_t, false_neg_t, recall_t)) # print('| |') # print('| Single frame |') # print('| |') # print('| In {: >6} steps : |'.format(step)) # print('| |') # print('| True_pos --> {: >6} |\n| False_neg --> {: >6} | \n| Recall --> {: >6.4f} |'.format( # sgl_true_pos, sgl_false_neg, sgl_recall)) print(' -----------------------')