Ejemplo n.º 1
0
def make_model(
    model_status: str = "ucf101_trained",
    weights_path: Optional[str] = None
) -> Tuple[torch.nn.DataParallel, optim.SGD]:
    statuses = ("ucf101_trained", "kinetics_pretrained")
    if model_status not in statuses:
        raise ValueError(f"model_status {model_status} not in {statuses}")
    trained = model_status == "ucf101_trained"
    if not trained and weights_path is None:
        raise ValueError(
            "weights_path cannot be None for 'kinetics_pretrained'")

    opt = parse_opts(arguments=[])
    opt.dataset = "UCF101"
    opt.only_RGB = True
    opt.log = 0
    opt.batch_size = 1
    opt.arch = f"{opt.model}-{opt.model_depth}"

    if trained:
        opt.n_classes = 101
    else:
        opt.n_classes = 400
        opt.n_finetune_classes = 101
        opt.batch_size = 32
        opt.ft_begin_index = 4

        opt.pretrain_path = weights_path

    logger.info(f"Loading model... {opt.model} {opt.model_depth}")
    model, parameters = generate_model(opt)

    if trained and weights_path is not None:
        checkpoint = torch.load(weights_path, map_location=DEVICE)
        model.load_state_dict(checkpoint["state_dict"])

    # Initializing the optimizer
    if opt.pretrain_path:
        opt.weight_decay = 1e-5
        opt.learning_rate = 0.001
    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening

    optimizer = optim.SGD(
        parameters,
        lr=opt.learning_rate,
        momentum=opt.momentum,
        dampening=dampening,
        weight_decay=opt.weight_decay,
        nesterov=opt.nesterov,
    )

    return model, optimizer
Ejemplo n.º 2
0
def preprocessing_fn_numpy(batch: np.ndarray):
    """
    batch is a batch of videos, (batch, frames, height, width, channels)

    Preprocessing resizes the height and width to 112 x 112 and reshapes
    each video to (n_stack, 3, 16, height, width), where n_stack = int(time/16).

    Outputs a list of videos, each of shape (n_stack, 3, 16, 112, 112)
    """
    sample_duration = 16  # expected number of consecutive frames as input to the model

    outputs = []
    for i, x in enumerate(batch):
        if x.ndim != 4:
            raise ValueError(
                f"sample {i} in batch has {x.ndim} dims, not 4 (FHWC)")
        if x.dtype in (float, np.float32):
            if x.max() > 1.0 or x.min() < 0.0:
                raise ValueError(
                    f"sample {i} is a float but not in [0.0, 1.0] range")
            x = (255 * x).round().astype(np.uint8)
        if x.dtype != np.uint8:
            raise ValueError(f"sample {i} - unrecognized dtype {x.dtype}")

        # select a fixed number of consecutive frames
        total_frames = x.shape[0]
        if total_frames < sample_duration / 2:
            raise ValueError(
                f"video is too short; requires >= {sample_duration / 2} frames"
            )
        if total_frames <= sample_duration:  # cyclic pad if not enough frames
            x = np.vstack([x, x[:sample_duration - total_frames]])

        # apply MARS preprocessing: scaling, cropping, normalizing
        opt = parse_opts(arguments=[])
        opt.modality = "RGB"
        opt.sample_size = 112
        x_Image = []  # convert each frame to PIL Image
        for frame in x:
            x_Image.append(Image.fromarray(frame))
        x_mars_preprocessed = preprocess_data.scale_crop(x_Image, 0, opt)

        # reshape
        x_reshaped = []
        for ns in range(int(total_frames / sample_duration)):
            np_frames = x_mars_preprocessed[:, ns * sample_duration:(ns + 1) *
                                            sample_duration, :, :].numpy()
            x_reshaped.append(np_frames)
        outputs.append(np.array(x_reshaped, dtype=np.float32))
    return outputs
Ejemplo n.º 3
0
def preprocessing_fn(inputs):
    """
    Inputs is comprised of one or more videos, where each video
    is given as an ndarray with shape (1, time, height, width, 3).
    Preprocessing resizes the height and width to 112 x 112 and reshapes
    each video to (n_stack, 3, 16, height, width), where n_stack = int(time/16).

    Outputs is a list of videos, each of shape (n_stack, 3, 16, 112, 112)
    """
    sample_duration = 16  # expected number of consecutive frames as input to the model
    outputs = []
    if inputs.dtype == np.uint8:  # inputs is a single video, i.e., batch size == 1
        inputs = [inputs]
    # else, inputs is an ndarray (of type object) of ndarrays
    for (
            input
    ) in inputs:  # each input is (1, time, height, width, 3) from the same video
        input = np.squeeze(input)

        # select a fixed number of consecutive frames
        total_frames = input.shape[0]
        if total_frames <= sample_duration:  # cyclic pad if not enough frames
            input_fixed = np.vstack(
                (input, input[:sample_duration - total_frames, ...]))
            assert input_fixed.shape[0] == sample_duration
        else:
            input_fixed = input

        # apply MARS preprocessing: scaling, cropping, normalizing
        opt = parse_opts(arguments=[])
        opt.modality = "RGB"
        opt.sample_size = 112
        input_Image = []  # convert each frame to PIL Image
        for f in input_fixed:
            input_Image.append(Image.fromarray(f))
        input_mars_preprocessed = preprocess_data.scale_crop(
            input_Image, 0, opt)

        # reshape
        input_reshaped = []
        for ns in range(int(total_frames / sample_duration)):
            np_frames = input_mars_preprocessed[:,
                                                ns * sample_duration:(ns + 1) *
                                                sample_duration, :, :].numpy()
            input_reshaped.append(np_frames)
        outputs.append(np.array(input_reshaped, dtype=np.float32))
    return outputs
Ejemplo n.º 4
0
def make_model(model_status="ucf101_trained", weights_file=None):
    statuses = ("ucf101_trained", "kinetics_pretrained")
    if model_status not in statuses:
        raise ValueError(f"model_status {model_status} not in {statuses}")
    trained = model_status == "ucf101_trained"
    if not trained and weights_file is None:
        raise ValueError(
            "weights_file cannot be None for 'kinetics_pretrained'")

    if weights_file:
        filepath = maybe_download_weights_from_s3(weights_file)

    opt = parse_opts(arguments=[])
    opt.dataset = "UCF101"
    opt.only_RGB = True
    opt.log = 0
    opt.batch_size = 1
    opt.arch = f"{opt.model}-{opt.model_depth}"

    if trained:
        opt.n_classes = 101
    else:
        opt.n_classes = 400
        opt.n_finetune_classes = 101
        opt.batch_size = 32
        opt.ft_begin_index = 4

        opt.pretrain_path = filepath

    logger.info(f"Loading model... {opt.model} {opt.model_depth}")
    model, parameters = generate_model(opt)

    if trained and weights_file is not None:
        checkpoint = torch.load(filepath, map_location=DEVICE)

        # Fit the robust model into the original resnext model
        state_dict_path = 'model'
        if not ('model' in checkpoint):
            state_dict_path = 'state_dict'
        sd = checkpoint[state_dict_path]
        sd = {k[len('module.'):]: v for k, v in sd.items()}
        items = list(sd.items())
        for key, val in items:
            if key.startswith('attacker.'):
                sd.pop(key)
            if key.startswith('model.'):
                new_key = 'module.' + key[len('model.'):]
                sd[new_key] = val
                sd.pop(key)
            if key == 'normalizer.new_mean' or key == 'normalizer.new_std':
                sd.pop(key)

        model.load_state_dict(sd)

    # Initializing the optimizer
    if opt.pretrain_path:
        opt.weight_decay = 1e-5
        opt.learning_rate = 0.001
    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening

    optimizer = optim.SGD(
        parameters,
        lr=opt.learning_rate,
        momentum=opt.momentum,
        dampening=dampening,
        weight_decay=opt.weight_decay,
        nesterov=opt.nesterov,
    )

    return model, optimizer