Ejemplo n.º 1
0
class BSUVNet:

    # Model with FPM
    model_path = "./trained_models/BSUVNet-emptyBG-recentBG-FPM.mdl"
    seg_network = seg.segModel(SemanticSegmentation.yaml_path,
                               SemanticSegmentation.encoder_path,
                               SemanticSegmentation.decoder_path)

    # model without FPM
    # model_path = "./trained_models/BSUVNet-emptyBG-recentBG.mdl"
    # seg_network = None

    emtpy_bg = "automatic"  # Automatically create an empty BG frame as median of initial frames
    empty_win_len = 50  # Number of initial frames to be used for the empty BG model
    recent_bg = 50  # Number of last frames to be used for recent BG model

    seg_ch = False if seg_network is None else True
    mean_rgb = [0.485, 0.456, 0.406]
    std_rgb = [0.229, 0.224, 0.225]
    transforms_pre = []
    transforms_post = [
        aug.ToTensor(),
        aug.NormalizeTensor(mean_rgb=mean_rgb,
                            std_rgb=std_rgb,
                            mean_seg=[0.5],
                            std_seg=[0.5],
                            segmentation_ch=seg_ch)
    ]
Ejemplo n.º 2
0
class BSUVNet:
    """
    # Model with FPM
    model_path = "./trained_models/BSUVNet-emptyBG-recentBG-FPM.mdl"
    seg_network = seg.segModel(SemanticSegmentation.yaml_path,
                               SemanticSegmentation.encoder_path,
                               SemanticSegmentation.decoder_path)
    """

    # model without FPM
    model_path = "./trained_models/BSUVNet-emptyBG-recentBG.mdl"
    seg_network = None

    emtpy_bg = "manual"  # Automatically create an empty BG frame as median of initial frames
    empty_win_len = 30  # Number of initial frames to be used for the empty BG model when empty_bg="automatic"
    empty_bg_path = "examples/Candela_m1_10_empty_BG.jpg"  # Path of the empty background. Only used when empty_bg="manual"
    recent_bg = 10  # Number of last frames to be used for recent BG model

    seg_ch = False if seg_network is None else True
    mean_rgb = [0.485, 0.456, 0.406]
    std_rgb = [0.229, 0.224, 0.225]
    transforms_pre = []
    transforms_post = [
        aug.ToTensor(),
        aug.NormalizeTensor(mean_rgb=mean_rgb,
                            std_rgb=std_rgb,
                            mean_seg=[0.5],
                            std_seg=[0.5],
                            segmentation_ch=seg_ch)
    ]
Ejemplo n.º 3
0
                                      shuffle=True)

    additional_augs.append(
        [aug.RandomMask(inp_size, dataloader_mask, mask_prob=aug_ioa)])

if aug_noise:
    noise = 0.01
    additional_augs.append([aug.AdditiveNoise(noise)])

mean_rgb = [x for x in [0.485, 0.456, 0.406]]
std_rgb = [x for x in [0.229, 0.224, 0.225]]
mean_seg = [x for x in [0.5]]
std_seg = [x for x in [0.5]]

transforms_tr = [
    crop_and_aug, *additional_augs, [aug.ToTensor()],
    [
        aug.NormalizeTensor(mean_rgb=mean_rgb,
                            std_rgb=std_rgb,
                            mean_seg=mean_seg,
                            std_seg=std_seg,
                            segmentation_ch=seg_ch)
    ]
]

transforms_test = [[aug.CenterCrop(inp_size)], [aug.ToTensor()],
                   [
                       aug.NormalizeTensor(mean_rgb=mean_rgb,
                                           std_rgb=std_rgb,
                                           mean_seg=mean_seg,
                                           std_seg=std_seg,
Ejemplo n.º 4
0
def evalVideo(cat,
              vid,
              model,
              empty_bg=False,
              recent_bg=False,
              segmentation_ch=False,
              eps=1e-5,
              save_vid=False,
              save_outputs="",
              model_name="",
              debug=False,
              use_selected=False,
              multiplier=16):
    """ Evalautes the trained model on all ROI frames of cat/vid
    Args:
        :cat (string):                  Category
        :video (string):                Video
        :model (torch model):           Trained PyTorch model
        :empty_bg (boolean):            Boolean for using the empty background frame
        :recent_bg (boolean):           Boolean for using the recent background frame
        :segmentation_ch (boolean):     Boolean for using the segmentation maps
        :eps (float):                   A small multiplier for making the operations easier
        :save_vid (boolean):            Boolean for saving the output as a video
        :save_outputs (str):            Folder path to save the outputs If = "" do not save
        :model_name (string):           Name of the model for logging. Important when save_vid=True
        :debug (boolean):               Use for quick debugging
    """

    transforms = [[aug.ToTensor()],
                  [
                      aug.NormalizeTensor(mean_rgb=[0.485, 0.456, 0.406],
                                          std_rgb=[0.229, 0.224, 0.225],
                                          mean_seg=[0.5],
                                          std_seg=[0.5],
                                          segmentation_ch=segmentation_ch)
                  ]]
    dataloader = CDNet2014Loader({cat: [vid]},
                                 empty_bg=empty_bg,
                                 recent_bg=recent_bg,
                                 segmentation_ch=segmentation_ch,
                                 transforms=transforms,
                                 use_selected=use_selected,
                                 multiplier=0)
    tensorloader = torch.utils.data.DataLoader(dataset=dataloader,
                                               batch_size=1,
                                               shuffle=False,
                                               num_workers=1)

    if save_vid:
        im = next(iter(dataloader))[0][0]
        h, w = im.shape
        if model_name.endswith("_manualBG"):
            model_name = model_name[:-9]
        if model_name.endswith("_autoBG"):
            model_name = model_name[:-7]
        vid_path = os.path.join(data_config.save_dir, model_name,
                                f"{cat}_{vid}.mp4")
        print(vid_path)
        vid = cv2.VideoWriter(vid_path, cv2.VideoWriter_fourcc(*'MP4V'), 30,
                              (3 * w + 20, h))

    if save_outputs:
        output_path = os.path.join(data_config.save_dir, "outputs",
                                   save_outputs)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        output_path = os.path.join(output_path, "results")
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        if not os.path.exists(os.path.join(output_path, cat)):
            os.makedirs(os.path.join(output_path, cat))
        if not os.path.exists(os.path.join(output_path, cat, vid)):
            os.makedirs(os.path.join(output_path, cat, vid))

    model.eval()  # Evaluation mode
    tp, fp, fn = 0, 0, 0

    for i, data in enumerate(tensorloader):

        if debug and i >= 100:
            break
        if (i + 1) % 1000 == 0:
            print("%d/%d" % (i + 1, len(tensorloader)))
        input, label = data[0].float(), data[1].float()

        input, label = input.cuda(), label.cuda()
        _, _, h, w = input.shape
        right_pad, bottom_pad = -w % multiplier, -h % multiplier
        zeropad = torch.nn.ZeroPad2d((0, right_pad, 0, bottom_pad))

        input = zeropad(input)
        output = model(input)

        output = output[:, :, :h, :w]
        label_1d, output_1d = getValid(label, output)

        if save_vid:
            input_np = tensor2double(input)
            label_np = label.cpu().detach().numpy()[0, 0, :, :]
            output_np = output.cpu().detach().numpy()[0, 0, :, :]

            vid_fr = np.ones((h, 3 * w + 20, 3)) * 0.5
            #print(vid_fr.shape, input_np.shape, label_np.shape, output_np.shape)
            vid_fr[:, :w, :] = input_np[:, :, -3:]

            for k in range(3):
                vid_fr[:, w + 10:2 * w + 10, k] = label_np
                vid_fr[:, 2 * w + 20:, k] = output_np

            vid.write((vid_fr[:, :, ::-1] * 255).astype(np.uint8))

        if save_outputs:
            output_np = output.cpu().detach().numpy()[0, 0, :, :]
            output_np = (output_np > 0.5) * 1
            h, w = output_np.shape
            output_fr = np.ones((h, w, 3))
            for k in range(3):
                output_fr[:, :, k] = output_np
            fname = os.path.join(output_path, cat, vid,
                                 f"bin{str(i+1).zfill(6)}.png")
            cv2.imwrite(fname, (output_fr * 255).astype(np.uint8))

        tp += eps * torch.sum(label_1d * output_1d).item()
        fp += eps * torch.sum((1 - label_1d) * output_1d).item()
        fn += eps * torch.sum(label_1d * (1 - output_1d)).item()
        del input, label, output, label_1d, output_1d

    # Calculate the statistics
    prec = tp / (tp + fp) if tp + fp > 0 else float('nan')
    recall = tp / (tp + fn) if tp + fn > 0 else float('nan')
    f_score = 2 * (prec * recall) / (
        prec + recall) if prec + recall > 0 else float('nan')
    if save_vid:
        vid.release()

    return 1 - recall, prec, f_score
Ejemplo n.º 5
0
def evalVideo(cat,
              vid,
              model,
              empty_bg=False,
              recent_bg=False,
              segmentation_ch=False,
              eps=1e-5,
              adversary="no"):
    """ Evalautes the trained model on all ROI frames of cat/vid
    Args:
        :cat (string):                  Category
        :video (string):                Video
        :model (torch model):           Trained PyTorch model
        :empty_bg (boolean):            Boolean for using the empty background frame
        :recent_bg (boolean):           Boolean for using the recent background frame
        :segmentation_ch (boolean):     Boolean for using the segmentation maps
        :eps (float):                   A small multiplier for making the operations easier
        :adversary (str):               "no": No adversarial part
                                        "dann": Domain adversarial neural network
    """

    transforms = [
        aug.ToTensor(),
        aug.NormalizeTensor(mean_rgb=[0.485, 0.456, 0.406],
                            std_rgb=[0.229, 0.224, 0.225],
                            mean_seg=[0.5],
                            std_seg=[0.5],
                            segmentation_ch=segmentation_ch)
    ]
    dataloader = CDNet2014Loader({cat: [vid]},
                                 empty_bg=empty_bg,
                                 recent_bg=recent_bg,
                                 segmentation_ch=segmentation_ch,
                                 transforms=transforms,
                                 use_selected=False,
                                 empty_bg_radnomize=False)
    tensorloader = torch.utils.data.DataLoader(dataset=dataloader,
                                               batch_size=1,
                                               shuffle=False,
                                               num_workers=1)

    model.eval()  # Evaluation mode
    tp, fp, fn = 0, 0, 0
    for i, data in enumerate(tensorloader):
        if (i + 1) % 100 == 0:
            print("%d/%d" % (i + 1, len(tensorloader)))
        input, label = data[0].float(), data[1].float()
        """TO-DO!
        Do not use .cuda() here. it is not modular. Try to make these more modular
        """
        input, label = input.cuda(), label.cuda()
        if adversary == "no":
            output = model(input)
        elif adversary == "dann":
            output, _ = model(input)
        elif adversary == "agnostic_dann":
            output, _, _ = model(input, alpha=[0, 0])
        label_1d, output_1d = getValid(label, output)
        #
        tp += eps * torch.sum(label_1d * output_1d).item()
        fp += eps * torch.sum((1 - label_1d) * output_1d).item()
        fn += eps * torch.sum(label_1d * (1 - output_1d)).item()

    # Calculate the statistics
    prec = tp / (tp + fp) if tp + fp > 0 else float('nan')
    recall = tp / (tp + fn) if tp + fn > 0 else float('nan')
    f_score = 2 * (prec * recall) / (
        prec + recall) if prec + recall > 0 else float('nan')
    return 1 - recall, prec, f_score