Beispiel #1
0
def r3d_34(cfg):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv3DSimple] * 4,
                        layers=[3, 4, 6, 3],
                        stem=BasicStem)
    model.fc = nn.Linear(model.fc.in_features, cfg.CONFIG.DATA.NUM_CLASSES)
    return model
Beispiel #2
0
def r3d_18(cfg):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv3DSimple] * 4,
                        layers=[2, 2, 2, 2],
                        stem=BasicStem)
    state_dict = load_state_dict_from_url(model_urls['r3d_18'], progress=True)
    model.load_state_dict(state_dict)
    if cfg.CONFIG.DATA.DATASET != 'kinetics':
        model.fc = nn.Linear(model.fc.in_features, cfg.CONFIG.DATA.NUM_CLASSES)
    print('Pretrained Model Weight Loaded')
    return model
Beispiel #3
0
def _generic_resnet(arch, pretrained=False, progress=False, **kwargs):
    model = VideoResNet(**kwargs)

    # We need exact Caffe2 momentum for BatchNorm scaling
    for m in model.modules():
        if isinstance(m, nn.BatchNorm3d):
            m.eps = 1e-3
            m.momentum = 0.9

    if pretrained:
        state_dict = torch.hub.load_state_dict_from_url(model_urls[arch],
                                                        progress=progress)
        model.load_state_dict(state_dict)

    return model
Beispiel #4
0
def r2plus1d_34(num_classes, pretrained=False, progress=False, arch=None):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv2Plus1D] * 4,
                        layers=[3, 4, 6, 3],
                        stem=R2Plus1dStem)

    model.fc = nn.Linear(model.fc.in_features, out_features=num_classes)

    # Fix difference in PyTorch vs Caffe2 architecture
    # https://github.com/facebookresearch/VMZ/issues/89
    # https://github.com/pytorch/vision/issues/1265
    model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288)
    model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576)
    model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152)

    # We need exact Caffe2 momentum for BatchNorm scaling
    for m in model.modules():
        if isinstance(m, nn.BatchNorm3d):
            m.eps = 1e-3
            m.momentum = 0.9

    if pretrained:
        state_dict = torch.hub.load_state_dict_from_url(model_urls[arch],
                                                        progress=progress)
        model.load_state_dict(state_dict)

    model.conv1 = model.stem

    return model
Beispiel #5
0
def r2plus1d_34(pretrained=True, progress=False, **kwargs):
    model = VideoResNet(
        block=BasicBlock,
        conv_makers=[Conv2Plus1D] * 4,
        layers=[3, 4, 6, 3],
        stem=R2Plus1dStem,
        **kwargs,
    )

    # We need exact Caffe2 momentum for BatchNorm scaling
    for m in model.modules():
        if isinstance(m, nn.BatchNorm3d):
            m.eps = 1e-3
            m.momentum = 0.9

    if pretrained:
        state_dict = torch.hub.load_state_dict_from_url(R2PLUS1D_34_MODEL_URL,
                                                        progress=progress)
        model.load_state_dict(state_dict)

    return model
Beispiel #6
0
def r2plus1d_34(num_classes):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv2Plus1D] * 4,
                        layers=[3, 4, 6, 3],
                        stem=R2Plus1dStem)

    model.fc = nn.Linear(model.fc.in_features, out_features=num_classes)

    # Fix difference in PyTorch vs Caffe2 architecture
    # https://github.com/facebookresearch/VMZ/issues/89
    model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288)
    model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576)
    model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152)

    # We need exact Caffe2 momentum for BatchNorm scaling
    for m in model.modules():
        if isinstance(m, nn.BatchNorm3d):
            m.eps = 1e-3
            m.momentum = 0.9

    return model
Beispiel #7
0
def r2plus1d_34(cfg):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv2Plus1D] * 4,
                        layers=[3, 4, 6, 3],
                        stem=R2Plus1dStem)

    model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288)
    model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576)
    model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152)
    model.fc = nn.Linear(model.fc.in_features, 359)
    state_dict = torch.load('./logs/r2plus1d/pretrained.pth')
    model.load_state_dict(state_dict)
    model.fc = nn.Linear(model.fc.in_features, cfg.CONFIG.DATA.NUM_CLASSES)
    print('Pretrained Model Weight Loaded')
    return model
Beispiel #8
0
def r2plus1d_34(cfg):
    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv2Plus1D] * 4,
                        layers=[3, 4, 6, 3],
                        stem=R2Plus1dStem)

    model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288)
    model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576)
    model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152)
    model.fc = nn.Linear(model.fc.in_features, 359)
    state_dict = torch.load('./logs/r2plus1d/pretrained.pth')
    model.load_state_dict(state_dict)
    model.fc = nn.Linear(model.fc.in_features, cfg.CONFIG.DATA.NUM_CLASSES)
    print('Pretrained Model Weight Loaded')
    return model


if __name__ == '__main__':

    model = VideoResNet(block=BasicBlock,
                        conv_makers=[Conv3DSimple] * 4,
                        layers=[2, 2, 2, 2],
                        stem=BasicStem)
    state_dict = load_state_dict_from_url(model_urls['r3d_18'], progress=True)
    model.load_state_dict(state_dict)
    model.fc = nn.Linear(model.fc.in_features, 101)
    print('Pretrained Model Weight Loaded')
    input = torch.randn((2, 3, 4, 224, 224))
    output = model(input)
    print(output.shape)