class BSUVNet: # Model with FPM model_path = "./trained_models/BSUVNet-emptyBG-recentBG-FPM.mdl" seg_network = seg.segModel(SemanticSegmentation.yaml_path, SemanticSegmentation.encoder_path, SemanticSegmentation.decoder_path) # model without FPM # model_path = "./trained_models/BSUVNet-emptyBG-recentBG.mdl" # seg_network = None emtpy_bg = "automatic" # Automatically create an empty BG frame as median of initial frames empty_win_len = 50 # Number of initial frames to be used for the empty BG model recent_bg = 50 # Number of last frames to be used for recent BG model seg_ch = False if seg_network is None else True mean_rgb = [0.485, 0.456, 0.406] std_rgb = [0.229, 0.224, 0.225] transforms_pre = [] transforms_post = [ aug.ToTensor(), aug.NormalizeTensor(mean_rgb=mean_rgb, std_rgb=std_rgb, mean_seg=[0.5], std_seg=[0.5], segmentation_ch=seg_ch) ]
class BSUVNet: """ # Model with FPM model_path = "./trained_models/BSUVNet-emptyBG-recentBG-FPM.mdl" seg_network = seg.segModel(SemanticSegmentation.yaml_path, SemanticSegmentation.encoder_path, SemanticSegmentation.decoder_path) """ # model without FPM model_path = "./trained_models/BSUVNet-emptyBG-recentBG.mdl" seg_network = None emtpy_bg = "manual" # Automatically create an empty BG frame as median of initial frames empty_win_len = 30 # Number of initial frames to be used for the empty BG model when empty_bg="automatic" empty_bg_path = "examples/Candela_m1_10_empty_BG.jpg" # Path of the empty background. Only used when empty_bg="manual" recent_bg = 10 # Number of last frames to be used for recent BG model seg_ch = False if seg_network is None else True mean_rgb = [0.485, 0.456, 0.406] std_rgb = [0.229, 0.224, 0.225] transforms_pre = [] transforms_post = [ aug.ToTensor(), aug.NormalizeTensor(mean_rgb=mean_rgb, std_rgb=std_rgb, mean_seg=[0.5], std_seg=[0.5], segmentation_ch=seg_ch) ]
if aug_noise: noise = 0.01 additional_augs.append([aug.AdditiveNoise(noise)]) mean_rgb = [x for x in [0.485, 0.456, 0.406]] std_rgb = [x for x in [0.229, 0.224, 0.225]] mean_seg = [x for x in [0.5]] std_seg = [x for x in [0.5]] transforms_tr = [ crop_and_aug, *additional_augs, [aug.ToTensor()], [ aug.NormalizeTensor(mean_rgb=mean_rgb, std_rgb=std_rgb, mean_seg=mean_seg, std_seg=std_seg, segmentation_ch=seg_ch) ] ] transforms_test = [[aug.CenterCrop(inp_size)], [aug.ToTensor()], [ aug.NormalizeTensor(mean_rgb=mean_rgb, std_rgb=std_rgb, mean_seg=mean_seg, std_seg=std_seg, segmentation_ch=seg_ch) ]] dataloader_tr = CDNet2014Loader(
def evalVideo(cat, vid, model, empty_bg=False, recent_bg=False, segmentation_ch=False, eps=1e-5, save_vid=False, save_outputs="", model_name="", debug=False, use_selected=False, multiplier=16): """ Evalautes the trained model on all ROI frames of cat/vid Args: :cat (string): Category :video (string): Video :model (torch model): Trained PyTorch model :empty_bg (boolean): Boolean for using the empty background frame :recent_bg (boolean): Boolean for using the recent background frame :segmentation_ch (boolean): Boolean for using the segmentation maps :eps (float): A small multiplier for making the operations easier :save_vid (boolean): Boolean for saving the output as a video :save_outputs (str): Folder path to save the outputs If = "" do not save :model_name (string): Name of the model for logging. Important when save_vid=True :debug (boolean): Use for quick debugging """ transforms = [[aug.ToTensor()], [ aug.NormalizeTensor(mean_rgb=[0.485, 0.456, 0.406], std_rgb=[0.229, 0.224, 0.225], mean_seg=[0.5], std_seg=[0.5], segmentation_ch=segmentation_ch) ]] dataloader = CDNet2014Loader({cat: [vid]}, empty_bg=empty_bg, recent_bg=recent_bg, segmentation_ch=segmentation_ch, transforms=transforms, use_selected=use_selected, multiplier=0) tensorloader = torch.utils.data.DataLoader(dataset=dataloader, batch_size=1, shuffle=False, num_workers=1) if save_vid: im = next(iter(dataloader))[0][0] h, w = im.shape if model_name.endswith("_manualBG"): model_name = model_name[:-9] if model_name.endswith("_autoBG"): model_name = model_name[:-7] vid_path = os.path.join(data_config.save_dir, model_name, f"{cat}_{vid}.mp4") print(vid_path) vid = cv2.VideoWriter(vid_path, cv2.VideoWriter_fourcc(*'MP4V'), 30, (3 * w + 20, h)) if save_outputs: output_path = os.path.join(data_config.save_dir, "outputs", save_outputs) if not os.path.exists(output_path): os.makedirs(output_path) output_path = os.path.join(output_path, "results") if not os.path.exists(output_path): os.makedirs(output_path) if not os.path.exists(os.path.join(output_path, cat)): os.makedirs(os.path.join(output_path, cat)) if not os.path.exists(os.path.join(output_path, cat, vid)): os.makedirs(os.path.join(output_path, cat, vid)) model.eval() # Evaluation mode tp, fp, fn = 0, 0, 0 for i, data in enumerate(tensorloader): if debug and i >= 100: break if (i + 1) % 1000 == 0: print("%d/%d" % (i + 1, len(tensorloader))) input, label = data[0].float(), data[1].float() input, label = input.cuda(), label.cuda() _, _, h, w = input.shape right_pad, bottom_pad = -w % multiplier, -h % multiplier zeropad = torch.nn.ZeroPad2d((0, right_pad, 0, bottom_pad)) input = zeropad(input) output = model(input) output = output[:, :, :h, :w] label_1d, output_1d = getValid(label, output) if save_vid: input_np = tensor2double(input) label_np = label.cpu().detach().numpy()[0, 0, :, :] output_np = output.cpu().detach().numpy()[0, 0, :, :] vid_fr = np.ones((h, 3 * w + 20, 3)) * 0.5 #print(vid_fr.shape, input_np.shape, label_np.shape, output_np.shape) vid_fr[:, :w, :] = input_np[:, :, -3:] for k in range(3): vid_fr[:, w + 10:2 * w + 10, k] = label_np vid_fr[:, 2 * w + 20:, k] = output_np vid.write((vid_fr[:, :, ::-1] * 255).astype(np.uint8)) if save_outputs: output_np = output.cpu().detach().numpy()[0, 0, :, :] output_np = (output_np > 0.5) * 1 h, w = output_np.shape output_fr = np.ones((h, w, 3)) for k in range(3): output_fr[:, :, k] = output_np fname = os.path.join(output_path, cat, vid, f"bin{str(i+1).zfill(6)}.png") cv2.imwrite(fname, (output_fr * 255).astype(np.uint8)) tp += eps * torch.sum(label_1d * output_1d).item() fp += eps * torch.sum((1 - label_1d) * output_1d).item() fn += eps * torch.sum(label_1d * (1 - output_1d)).item() del input, label, output, label_1d, output_1d # Calculate the statistics prec = tp / (tp + fp) if tp + fp > 0 else float('nan') recall = tp / (tp + fn) if tp + fn > 0 else float('nan') f_score = 2 * (prec * recall) / ( prec + recall) if prec + recall > 0 else float('nan') if save_vid: vid.release() return 1 - recall, prec, f_score
def evalVideo(cat, vid, model, empty_bg=False, recent_bg=False, segmentation_ch=False, eps=1e-5, adversary="no"): """ Evalautes the trained model on all ROI frames of cat/vid Args: :cat (string): Category :video (string): Video :model (torch model): Trained PyTorch model :empty_bg (boolean): Boolean for using the empty background frame :recent_bg (boolean): Boolean for using the recent background frame :segmentation_ch (boolean): Boolean for using the segmentation maps :eps (float): A small multiplier for making the operations easier :adversary (str): "no": No adversarial part "dann": Domain adversarial neural network """ transforms = [ aug.ToTensor(), aug.NormalizeTensor(mean_rgb=[0.485, 0.456, 0.406], std_rgb=[0.229, 0.224, 0.225], mean_seg=[0.5], std_seg=[0.5], segmentation_ch=segmentation_ch) ] dataloader = CDNet2014Loader({cat: [vid]}, empty_bg=empty_bg, recent_bg=recent_bg, segmentation_ch=segmentation_ch, transforms=transforms, use_selected=False, empty_bg_radnomize=False) tensorloader = torch.utils.data.DataLoader(dataset=dataloader, batch_size=1, shuffle=False, num_workers=1) model.eval() # Evaluation mode tp, fp, fn = 0, 0, 0 for i, data in enumerate(tensorloader): if (i + 1) % 100 == 0: print("%d/%d" % (i + 1, len(tensorloader))) input, label = data[0].float(), data[1].float() """TO-DO! Do not use .cuda() here. it is not modular. Try to make these more modular """ input, label = input.cuda(), label.cuda() if adversary == "no": output = model(input) elif adversary == "dann": output, _ = model(input) elif adversary == "agnostic_dann": output, _, _ = model(input, alpha=[0, 0]) label_1d, output_1d = getValid(label, output) # tp += eps * torch.sum(label_1d * output_1d).item() fp += eps * torch.sum((1 - label_1d) * output_1d).item() fn += eps * torch.sum(label_1d * (1 - output_1d)).item() # Calculate the statistics prec = tp / (tp + fp) if tp + fp > 0 else float('nan') recall = tp / (tp + fn) if tp + fn > 0 else float('nan') f_score = 2 * (prec * recall) / ( prec + recall) if prec + recall > 0 else float('nan') return 1 - recall, prec, f_score