Example #1
0
def build_siam_augmentation(cfg, is_train=True, modality='video'):

    motion_limit = 0.0
    motion_blur_prob = 0.0
    compression_limit = 0.0
    if is_train:
        min_size = cfg.INPUT.MIN_SIZE_TRAIN
        max_size = cfg.INPUT.MAX_SIZE_TRAIN
        flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
        brightness = cfg.INPUT.BRIGHTNESS
        contrast = cfg.INPUT.CONTRAST
        saturation = cfg.INPUT.SATURATION
        hue = cfg.INPUT.HUE

        if modality == 'image':
            motion_limit = cfg.INPUT.MOTION_LIMIT
            motion_blur_prob = cfg.INPUT.MOTION_BLUR_PROB
            compression_limit = cfg.INPUT.COMPRESSION_LIMIT

    else:
        min_size = cfg.INPUT.MIN_SIZE_TEST
        max_size = cfg.INPUT.MAX_SIZE_TEST
        flip_horizontal_prob = 0.0
        brightness = 0.0
        contrast = 0.0
        saturation = 0.0
        hue = 0.0

    amodal = cfg.INPUT.AMODAL
    SIZE_DIVISIBILITY = cfg.DATALOADER.SIZE_DIVISIBILITY
    to_bgr255 = cfg.INPUT.TO_BGR255

    video_color_jitter = SiamVideoColorJitter(
        brightness=brightness,
        contrast=contrast,
        saturation=saturation,
        hue=hue,
    )

    normalize_transform = T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                                      std=cfg.INPUT.PIXEL_STD,
                                      to_bgr255=to_bgr255)

    transform = Compose([
        video_color_jitter,
        SiamVideoMotionBlurAugment(motion_blur_prob),
        SiamVideoCompressionAugment(compression_limit),
        SiamVideoMotionAugment(motion_limit, amodal),
        SiamVideoResize(min_size, max_size, SIZE_DIVISIBILITY),
        SiamVideoRandomHorizontalFlip(prob=flip_horizontal_prob),
        # PIL image
        VideoTransformer(ToTensor()),
        # Torch tensor, CHW (RGB format), and range from [0, 1]
        # VideoTransformer(ToBGR255(to_bgr255=to_bgr255))
        VideoTransformer(normalize_transform),
    ])
    return transform
Example #2
0
def im_detect_bbox(model, images, target_scale, target_max_size, device):
    """
    Performs bbox detection on the original image.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    return model(images.to(device))
Example #3
0
def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device):
    """
    Performs bbox detection on the horizontally flipped image.
    Function signature is the same as for im_detect_bbox.
    """
    transform = TT.Compose([
        T.Resize(target_scale, target_max_size),
        TT.RandomHorizontalFlip(1.0),
        TT.ToTensor(),
        T.Normalize(mean=cfg.INPUT.PIXEL_MEAN,
                    std=cfg.INPUT.PIXEL_STD,
                    to_bgr255=cfg.INPUT.TO_BGR255)
    ])
    images = [transform(image) for image in images]
    images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
    boxlists = model(images.to(device))

    # Invert the detections computed on the flipped image
    boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
    return boxlists_inv