def r2plus1d_34(num_classes, pretrained=False, progress=False, arch=None): model = VideoResNet(block=BasicBlock, conv_makers=[Conv2Plus1D] * 4, layers=[3, 4, 6, 3], stem=R2Plus1dStem) model.fc = nn.Linear(model.fc.in_features, out_features=num_classes) # Fix difference in PyTorch vs Caffe2 architecture # https://github.com/facebookresearch/VMZ/issues/89 # https://github.com/pytorch/vision/issues/1265 model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288) model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576) model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152) # We need exact Caffe2 momentum for BatchNorm scaling for m in model.modules(): if isinstance(m, nn.BatchNorm3d): m.eps = 1e-3 m.momentum = 0.9 if pretrained: state_dict = torch.hub.load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) return model
def __init__(self, inplanes, planes, scale_factor=2): super(Refine2plus1d, self).__init__(inplanes, planes, scale_factor) self.convFS1 = Conv2Plus1D(inplanes, planes, planes*2 + 32) self.convFS2 = Conv2Plus1D(planes, planes, planes*2 + 32) self.convFS3 = Conv2Plus1D(planes, planes, planes*2 + 32) self.convMM1 = Conv2Plus1D(planes, planes, planes*2 + 32) self.convMM2 = Conv2Plus1D(planes, planes, planes*2 + 32)
def __init__(self, num_classes): super().__init__(num_classes=2) self.merge_conv: nn.Module = nn.Sequential( Conv2Plus1D(128, 64, 144, 1), nn.BatchNorm3d(64), nn.ReLU(inplace=True)) self.upsample = nn.Upsample(size=(16, 28, 28)) self.audio_conv = nn.Conv3d(256, 128, (4, 1, 1), (4, 1, 1), 0) self.merge_conv: nn.Module = nn.Sequential( Conv2Plus1D(256, 128, 230, 1), nn.BatchNorm3d(128), nn.ReLU(inplace=True))
def r2plus1d_34(cfg): model = VideoResNet(block=BasicBlock, conv_makers=[Conv2Plus1D] * 4, layers=[3, 4, 6, 3], stem=R2Plus1dStem) model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288) model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576) model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152) model.fc = nn.Linear(model.fc.in_features, 359) state_dict = torch.load('./logs/r2plus1d/pretrained.pth') model.load_state_dict(state_dict) model.fc = nn.Linear(model.fc.in_features, cfg.CONFIG.DATA.NUM_CLASSES) print('Pretrained Model Weight Loaded') return model
def r2plus1d_34(num_classes): model = VideoResNet(block=BasicBlock, conv_makers=[Conv2Plus1D] * 4, layers=[3, 4, 6, 3], stem=R2Plus1dStem) model.fc = nn.Linear(model.fc.in_features, out_features=num_classes) # Fix difference in PyTorch vs Caffe2 architecture # https://github.com/facebookresearch/VMZ/issues/89 model.layer2[0].conv2[0] = Conv2Plus1D(128, 128, 288) model.layer3[0].conv2[0] = Conv2Plus1D(256, 256, 576) model.layer4[0].conv2[0] = Conv2Plus1D(512, 512, 1152) # We need exact Caffe2 momentum for BatchNorm scaling for m in model.modules(): if isinstance(m, nn.BatchNorm3d): m.eps = 1e-3 m.momentum = 0.9 return model
def __init__(self, num_classes=5, pretrained=True): super().__init__(num_classes=num_classes, sequence_length=8, contains_dropout=False) self.r2plus1 = r2plus1d_18(pretrained=True) self.r2plus1.layer3 = nn.Identity() self.r2plus1.layer4 = nn.Identity() self.r2plus1.fc = nn.Identity() self.sync_net = PretrainedSyncNet() self._set_requires_grad_for_module(self.sync_net, requires_grad=False) self.relu = nn.ReLU() self.padding = nn.ReflectionPad2d((0, 1, 0, 0)) self.upsample = nn.Upsample(size=(8, 56, 56)) self.merge_conv: nn.Module = nn.Sequential( Conv2Plus1D(128, 64, 144, 1), nn.BatchNorm3d(64), nn.ReLU(inplace=True)) self.out = nn.Sequential(nn.Linear(128, 50), nn.ReLU(), nn.Linear(50, self.num_classes))