def generate_candidate(predictions): batch_Size = predictions['loc'].size(0) candidate = [] prior_data = predictions['priors'].squeeze(0) for i in range(batch_Size): loc_data = predictions['loc'][i] conf_data = predictions['conf'][i] candidate_cur = { 'T2S_feat': predictions['T2S_feat'][i].unsqueeze(0), 'fpn_feat': predictions['fpn_feat'][i].unsqueeze(0) } with timer.env('Detect'): decoded_boxes = decode(loc_data, prior_data) conf_data = conf_data.t().contiguous() conf_scores, _ = torch.max(conf_data[1:, :], dim=0) keep = (conf_scores > cfg.eval_conf_thresh) candidate_cur['proto'] = predictions['proto'][i] candidate_cur['conf'] = conf_data[:, keep].t() candidate_cur['box'] = decoded_boxes[keep, :] candidate_cur['mask_coeff'] = predictions['mask_coeff'][i][keep, :] candidate_cur['track'] = predictions['track'][i][ keep, :] if cfg.train_track else None if cfg.train_centerness: candidate_cur['centerness'] = predictions['centerness'][i][ keep].view(-1) candidate.append(candidate_cur) return candidate
def get_ann2(dets, cut_off=0.5, th=0.33, fpups=False): loc, conf, priors = dets conf = softmax(Variable(conf)).data #.cpu() decoded_boxes = decode(loc, priors, [0.1, 0.2]) conf_scores = conf.t().contiguous() conf_p, conf_cl = conf_scores[1:, :].max(0) cl = 0 c_mask = conf_scores[cl] < 0.5 scores = conf_scores[cl][c_mask] l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) if (boxes.nelement() == 0): return None boxes_cl = (conf_cl.squeeze()[c_mask]).unsqueeze(1).float() boxes_p = (conf_p.squeeze()[c_mask]).unsqueeze(1).float() dets = torch.cat([boxes_cl, boxes_p * 100, boxes * args.size], 1) #ids, count = nms(boxes, 1-scores, th, 200) #ids = ids.cpu() #ann_dets = (boxes[ids[:count]]*300).cpu().round().numpy() dets = dets.round().cpu().numpy().astype('int32') ann = Ann(dets=dets) return ann
def CandidateShift(net, ref_candidate, next_candidate, img=None, img_meta=None, display=False): """ The function try to shift the candidates of reference frame to that of target frame. The most important step is to shift the bounding box of reference frame to that of target frame :param net: network :param next_candidate: features of the last layer to predict bounding box on target frame :param ref_candidate: the candidate dictionary that includes 'box', 'conf', 'mask_coeff', 'track' items. :return: candidates on the target frame """ ref_candidate_shift = {} for k, v in next_candidate.items(): if k in {'proto', 'fpn_feat', 'T2S_feat'}: ref_candidate_shift[k] = v.clone() # we only use the features in the P3 layer to perform correlation operation T2S_feat_ref, T2S_feat_next = ref_candidate['T2S_feat'], next_candidate[ 'T2S_feat'] fpn_feat_ref, fpn_feat_next = ref_candidate['fpn_feat'], next_candidate[ 'fpn_feat'] x_corr = correlate(fpn_feat_ref, fpn_feat_next, patch_size=cfg.correlation_patch_size) concatenated_features = F.relu( torch.cat([x_corr, T2S_feat_ref, T2S_feat_next], dim=1)) box_ref = ref_candidate['box'].clone() feat_h, feat_w = fpn_feat_ref.size()[2:] bbox_feat_input = bbox_feat_extractor(concatenated_features, box_ref, feat_h, feat_w, 7) loc_ref_shift, mask_coeff_shift = net.TemporalNet(bbox_feat_input) box_ref_shift = decode(loc_ref_shift, center_size(box_ref)) mask_coeff_ref_shift = ref_candidate['mask_coeff'].clone( ) + mask_coeff_shift masks_ref_shift = generate_mask(next_candidate['proto'], mask_coeff_ref_shift, box_ref_shift) # display = 1 if display: # display_correlation_map_patch(bbox_feat_input[:, :121], img_meta) display_box_shift(box_ref, box_ref_shift, mask_shift=masks_ref_shift, img_meta=img_meta, img_gpu=img) ref_candidate_shift['box'] = box_ref_shift.clone() ref_candidate_shift['score'] = ref_candidate['score'].clone() * 0.95 ref_candidate_shift['mask_coeff'] = mask_coeff_ref_shift.clone() ref_candidate_shift['mask'] = masks_ref_shift.clone() return ref_candidate_shift
def detect_in_thread(class_data_proxy, num_classes, trained_model_path, use_cuda, cfg): #li_margin_ratio_l_r_t_b = compute_margin_ratio_l_r_t_b(w_h_cam, w_h_net) net = init_ssd(num_classes, trained_model_path, use_cuda) fps_det = FPS().start() print('class_data.end_of_capture of detect in thread : ', class_data_proxy.get_eoc()) #; exit() is_huda = False while not class_data_proxy.get_eoc(): batch_rgb = class_data_proxy.get_batch_rgb() #print('batch_rgb.shape : ', batch_rgb.shape) if batch_rgb is None: print('batch_rgb is None !!!') #exit() if is_huda: class_data_proxy.set_eoc() print('class_data.end_of_capture of detect in thread is True') #exit() continue is_huda = True start = time.time() # net forwarding loc_data, conf_preds, prior_data = net(batch_rgb) decoded_boxes = decode(loc_data[0].data, prior_data.data, cfg['variance']).clone() conf_scores = net.softmax(conf_preds[0]).data.clone() class_data_proxy.set_net_result((decoded_boxes, conf_scores)) # post process output #li_det = post_process_output(im_bgr, net, CLASSES, loc_data, conf_preds, prior_data, w_h_cam, li_margin_ratio_l_r_t_b, li_color_class, th_conf, th_nms, None) #class_data_proxy.set_li_det(li_det) fps_det.update() class_data_proxy.set_fps_det(fps_det.fps()) #print('fps_det : ', fps_det.fps()) print("class_data.end_of_capture is True : detect_in_thread")
def get_ann(dets, p=0.33, th=0.33, fpups=False): loc, conf, priors = dets decoded_boxes = decode(loc, priors, [0.1, 0.2]) conf_scores = conf.t().contiguous() cl = 0 c_mask = conf_scores[cl].lt(p) if fpups: p_mask = conf_scores[-1].lt(p) c_mask = c_mask & p_mask scores = conf_scores[cl][c_mask] l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) if (boxes.nelement() == 0): return None ids, count = nms(boxes, 1 - scores, th, 200) ids = ids.cpu() #print(boxes) ann_dets = (boxes[ids[:count]] * args.size).round().numpy() #print(ann_dets) ann = Ann(dets=ann_dets) return ann
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.dim() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def validate(args, net, val_data_loader, val_dataset, iteration_num, iou_thresh=0.5): """Test a SSD network on an image database.""" print('Validating at ', iteration_num) num_images = len(val_dataset) num_classes = args.num_classes det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True batch_iterator = None val_step = 100 count = 0 torch.cuda.synchronize() ts = time.perf_counter() for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, targets, img_indexs = next(batch_iterator) batch_size = images.size(0) height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images) loc_data = output[0] conf_preds = output[1] prior_data = output[2] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, args.cfg['variance']).clone() conf_scores = net.softmax(conf_preds[b]).data.clone() for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration_num) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=iou_thresh)
def __call__(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch, num_priors, 4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch, num_priors, num_classes] mask_data: (tensor) Mask preds from mask layers Shape: [batch, num_priors, mask_dim] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [num_priors, 4] proto_data: (tensor) If using mask_type.lincomb, the prototype masks Shape: [batch, mask_h, mask_w, mask_dim] Returns: output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim) These outputs are in the order: class idx, confidence, bbox coords, and mask. Note that the outputs are sorted only if cross_class_nms is False """ loc_data = predictions['loc'] conf_data = predictions['conf'] mask_data = predictions['mask'] prior_data = predictions['priors'] proto_data = predictions['proto'] if 'proto' in predictions else None inst_data = predictions['inst'] if 'inst' in predictions else None out = [] with timer.env('Detect'): batch_size = loc_data.size(0) num_priors = prior_data.size(0) #view-->resize conf_preds = conf_data.view(batch_size, num_priors, self.num_classes).transpose( 2, 1).contiguous() for batch_idx in range(batch_size): decoded_boxes = decode(loc_data[batch_idx], prior_data) result = self.detect(batch_idx, conf_preds, decoded_boxes, mask_data, inst_data) if result is not None and proto_data is not None: result['proto'] = proto_data[batch_idx] #ipdb> result.keys() #dict_keys(['box', 'mask', 'class', 'score', 'proto']) # #ipdb> result['box'].shape #torch.Size([100, 4]) # #ipdb> result['mask'].shape #torch.Size([100, 32]) # #ipdb> result['class'].shape #torch.Size([100]) # #ipdb> result['score'].shape #torch.Size([100]) # #ipdb> result['proto'].shape #torch.Size([138, 138, 32]) out.append(result) return out
def test_net(net, save_root, exp_name, input_type, dataset, iteration, num_classes, thresh=0.5): """ Test a SSD network on an Action image database. """ val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) image_ids = dataset.ids save_ids = [] val_step = 250 num_images = len(dataset) video_list = dataset.video_list det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True batch_iterator = None count = 0 torch.cuda.synchronize() ts = time.perf_counter() num_batches = len(val_data_loader) det_file = save_root + 'cache/' + exp_name + '/detection-' + str( iteration).zfill(6) + '.pkl' print('Number of images ', len(dataset), ' number of batchs', num_batches) frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str( iteration).zfill(6) + '/' print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n') for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, targets, img_indexs = next(batch_iterator) batch_size = images.size(0) height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images) loc_data = output[0] conf_preds = output[1] prior_data = output[2] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone() conf_scores = net.softmax(conf_preds[b]).data.clone() index = img_indexs[b] annot_info = image_ids[index] frame_num = annot_info[1] video_id = annot_info[0] videoname = video_list[video_id] # output_dir = frame_save_dir+videoname # if not os.path.isdir(output_dir): # os.makedirs(output_dir) # # output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num)) # save_ids.append(output_file_name) # sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()}) for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration) # #Save detection after NMS along with GT # with open(det_file, 'wb') as f: # pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh)
def validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=0.5, num_gpu=1): """Test a SSD network on an image database.""" print('Validating at ', epoch) num_images = len(val_dataset) num_classes = args.num_classes det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True val_step = 100 count = 0 net.eval() # switch net to evaluation modelen(val_data_loader)-2, torch.cuda.synchronize() ts = time.perf_counter() # create batch iterator batch_iterator = [[] for i in range(num_gpu)] max_x_y = 0 min_x_y = [] for i in range(num_gpu): batch_iterator[i] = iter(val_data_loader[i]) min_x_y.append(len(val_data_loader[i])) max_x_y = max(max_x_y, len(val_data_loader[i])) # print("len: ", len(train_data_loader[i])) iter_count = 0 t0 = time.perf_counter() dtype = torch.cuda.FloatTensor for val_itr in range(max_x_y): img_indexs = [] for ii in range(num_gpu): if val_itr >= min_x_y[ii]: batch_iterator[ii] = iter(val_data_loader[ii]) torch.cuda.synchronize() t1 = time.perf_counter() img_indexs = [] images, targets, img_in = next(batch_iterator[0]) img_indexs.append(img_in) img = torch.zeros([1, 3, 300, 300]) images = torch.cat((images, img.type_as(images)), 0) for ii in range(num_gpu - 1): img, targ, img_in = next(batch_iterator[ii + 1]) images = torch.cat((images, img), 0) img = (torch.ones([1, 3, 300, 300]) + ii) images = torch.cat((images, img.type_as(images)), 0) for iii in range(len(targ)): targets.append(targ[iii]) img_indexs.append(img_in) batch_size = images.size(0) - num_gpu height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images, img_indexs) loc_data = output[0] conf_preds = output[1] prior_data = output[2] prior_data = prior_data[:loc_data.size(1), :] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, args.cfg['variance']).clone() conf_scores = net.module.softmax(conf_preds[b]).data.clone() for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for epoch number ', epoch) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=iou_thresh)
def test_net(net, save_root, exp_name, input_type, dataset, iteration, li_color_class, means_bgr, n_record_per_class, th_iou): """ Test a SSD network on an Action image database. """ ''' print('type(means) : ', type(means)) print('means : ', means) ''' #li_color_class = make_class_color_list(num_classes) shall_record = n_record_per_class > 0 th_conf = args.conf_thresh th_nms = args.nms_thresh top_k = args.topk t3 = np.asarray(means_bgr) means_rgb = np.flipud(t3) #means_rgb_2 = np.fliplr(t3) #print('t3 : ', t3); print('means_rgb_1 : ', means_rgb_1); exit(); #print('means_rgb_2 : ', means_bgr_2); exit() #val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) image_ids = dataset.ids save_ids = [] val_step = 250 num_images = len(dataset) video_list = dataset.video_list ''' print('type(dataset) : ', type(dataset)); print('num_images : ', num_images); print('len(video_list) : ', len(video_list)); exit() ''' det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True batch_iterator = None count = 0 torch.cuda.synchronize() ts = time.perf_counter() num_batches = len(val_data_loader) det_file = save_root + 'cache/' + exp_name + '/detection-' + input_type + '_' + str( iteration).zfill(6) + '.pkl' print('det_file : ', det_file) #exit() print('Number of images ', len(dataset), ' number of batchs', num_batches) frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str( iteration).zfill(6) + '/' print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n') if shall_record: di_class_num_processed = {} fn_record = 'action_recognition_images_conf_thres_{:.2f}_nms_thres_{:.1f}_fpc_{}.avi'.format( th_conf, th_nms, n_record_per_class) writer = make_video_recorder(fn_record, (300, 300), 20) shall_stop = False for val_itr in range(len(val_data_loader)): print('\nval_itr : {} / {}'.format(val_itr, len(val_data_loader))) if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images_rgb, targets, img_indexs = next(batch_iterator) batch_size = images_rgb.size(0) if shall_record: skip_this_batch = False for b in range(batch_size): img_idx = img_indexs[b] annot_info = dataset.ids[img_idx] video_id = annot_info[0] video_name = dataset.video_list[video_id].split("/")[0] if video_name in di_class_num_processed: if di_class_num_processed[video_name] > n_record_per_class: skip_this_batch = True break di_class_num_processed[video_name] += 1 else: di_class_num_processed[video_name] = 1 if skip_this_batch: continue height, width = images_rgb.size(2), images_rgb.size(3) li_margin_ratio_l_r_t_b = [0, 0, 0, 0] if args.cuda: images_rgb = Variable(images_rgb.cuda(), volatile=True) #exit() #print('images_rgb.shape : ', images_rgb.shape) ######## networking forwarding ###################################################### output = net(images_rgb) ###################################################################################### loc_data = output[0] conf_preds = output[1] prior_data = output[2] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) # for each image in this batch for b in range(batch_size): #print('b : {} / {}'.format(b, batch_size)) img_idx = img_indexs[b] annot_info = dataset.ids[img_idx] #print('annot_info : ', annot_info) video_id = annot_info[0] frame_num = annot_info[1] #print('video_id : ', video_id) video_name = dataset.video_list[video_id] video_class = video_name.split("/")[0] img_name = dataset._imgpath + '/{:s}/{:05d}.jpg'.format( video_name, frame_num) #print('video_name : ', video_name) #print('video_class : ', video_class) print('img_name : ', img_name) #t1_rgb = np.transpose(images_rgb[b].cpu().numpy(), (1, 2, 0)) #exit() t1_rgb = np.transpose(images_rgb[b].cpu().data.numpy(), (1, 2, 0)) t2_rgb = t1_rgb + means_rgb t3_bgr = cv2.cvtColor(t2_rgb.astype(np.uint8), cv2.COLOR_RGB2BGR) gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height #print('type(gt) : ', type(gt)); exit() #cv2.putText(t3_bgr, video_name, (60, 20), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255)) id_vid = dataset.CLASSES.index(video_class) cv2.putText(t3_bgr, video_class, (X_OFFSET_GT_VID, Y_OFFSET_GT_VID), cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE_GT_VID, li_color_class[id_vid]) cv2.putText(t3_bgr, "conf. thres. : {:.2f}".format(th_conf), (int(width * 0.5 - 85), int(height - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255)) if not shall_record: t3_bgr = mark_ground_truth(t3_bgr, gt, dataset.CLASSES, li_color_class) gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone() conf_scores = net.softmax(conf_preds[b]).data.clone() t3_bgr, det_boxes = mark_detections(t3_bgr, conf_scores, dataset.CLASSES, decoded_boxes, (width, height), li_margin_ratio_l_r_t_b, li_color_class, top_k, th_conf, th_nms, det_boxes) #index = img_indexs[b] annot_info = image_ids[img_idx] #exit() frame_num = annot_info[1] video_id = annot_info[0] videoname = video_list[video_id] output_dir = frame_save_dir + videoname if not os.path.isdir(output_dir): os.makedirs(output_dir) output_file_name = output_dir + '/{:05d}.mat'.format( int(frame_num)) save_ids.append(output_file_name) sio.savemat(output_file_name, mdict={ 'scores': conf_scores.cpu().numpy(), 'loc': decoded_boxes.cpu().numpy() }) if shall_record: writer.write(t3_bgr) count += 1 cv2.imshow('t3_bgr', t3_bgr) #cv2.waitKey(1) k = cv2.waitKey() & 0xFF #k = cv2.waitKey(1) ''' if 255 != k: print('k : ', k) ''' if 27 == k: shall_stop = True if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) if shall_stop: break print('Evaluating detections for itration number ', iteration) #Save detection after NMS along with GT with open(det_file, 'wb') as f: pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL) if shall_record: writer.release() convert_vid_2_animated_gif(fn_record) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=th_iou)
def main(): mean = (104, 117, 123) if 'FPN' in backbone: from model.refinedet_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) else: from model.ssd4scale_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn, deform=deform) print('loading model!') static_net.load_state_dict(torch.load(static_dir)) static_net.eval() static_net = static_net.to(device) net.load_state_dict(torch.load(trn_dir)) net.eval() net = net.to(device) print('Finished loading model!', static_dir, trn_dir) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) frame_num = 0 cap = cv2.VideoCapture(video_name) w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) size = (640, 480) if save_dir: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') record = cv2.VideoWriter( os.path.join(save_dir, video_name.split('/')[-1].split('.')[0] + '.avi'), fourcc, cap.get(cv2.CAP_PROP_FPS), size) # static_flag = True offset_list = list() ref_loc = list() while (cap.isOpened()): ret, frame = cap.read() if not ret: break h, w, _ = frame.shape frame_draw = frame.copy() im_trans = base_transform(frame, ssd_dim, mean) with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if frame_num % interval == 0: # if static_flag: static_out = static_net(x, ret_loc=deform) priors_static = center_size( decode(static_out[0][0], priors, [0.1, 0.2])) if deform: ref_loc = static_out[ 2] # [o * args.loose for o in static_out[2]] offset_list = list() out = net(x, ref_loc=ref_loc, offset_list=offset_list, ret_off=(False, True)[deform and not offset_list]) detections = detector.forward(out[0], out[1], priors_static, scale=torch.cuda.FloatTensor( [w, h, w, h])) if len(detections) == 3: offset_list = out[2] ref_loc = list() # if static_flag: # ref_mask = mask.clone()mask # print('static') # static_flag = False # else: # time1 = time.time() # s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float() # static_flag = (False, True)[s_score<0.45] # time2 = time.time() # print(s_score, 'match time:', time2-time1) out = list() for j in range(1, detections.size(1)): if detections[0, j, :, :].sum() == 0: continue for k in range(detections.size(2)): dets = detections[0, j, k, :] if dets.sum() == 0: continue boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:] identity = dets[-1] if dets.size(0) == 6 else -1 x_min = int(boxes[0] * w) x_max = int(boxes[2] * w) y_min = int(boxes[1] * h) y_max = int(boxes[3] * h) score = dets[0] if score > confidence_threshold: put_str = VID_CLASSES_name[j - 1] + ':' + str( np.around(score, decimals=2)).split('(')[-1].split(',')[0][:4] color = (255, 0, 0) cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max), color, thickness=2) cv2.putText(frame_draw, put_str, (x_min + 10, y_min - 10), cv2.FONT_HERSHEY_DUPLEX, 0.8, color=color, thickness=1) print(str(frame_num)) frame_num += 1 frame_show = cv2.resize(frame_draw, size) cv2.imshow('frame', frame_show) # 255* mask.cpu().numpy()) if save_dir: record.write(frame_show) ch = cv2.waitKey(1) if ch == 32: # if frame_num % 1 ==0: while 1: in_ch = cv2.waitKey(10) if in_ch == 115: # 's' if save_dir: print('save: ', frame_num) torch.save( out, os.path.join(save_dir, '_%s.pkl' % str(frame_num))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(frame_num)), frame) elif in_ch == 32: break cap.release() if save_dir: record.release() cv2.destroyAllWindows()