def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet, self).__init__() self.conv_class = conv_class self.conv1 = inflate.inflate_conv( resnet2d.conv1, time_dim=3, time_padding=1, center=True) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool( resnet2d.maxpool, time_dim=3, time_padding=1, time_stride=2) self.layer1 = inflate_reslayer(resnet2d.layer1) self.layer2 = inflate_reslayer(resnet2d.layer2) self.layer3 = inflate_reslayer(resnet2d.layer3) self.layer4 = inflate_reslayer(resnet2d.layer4) if conv_class: self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1) self.classifier = torch.nn.Conv3d( in_channels=2048, out_channels=class_nb, kernel_size=(1, 1, 1), bias=True) else: final_time_dim = int(math.ceil(frame_nb / 16)) self.avgpool = inflate.inflate_pool( resnet2d.avgpool, time_dim=final_time_dim) self.fc = inflate.inflate_linear(resnet2d.fc, 1)
def inflate_features(features, inflate_block_convs=False): """ Inflates the feature extractor part of DenseNet by adding the corresponding inflated modules and transfering the inflated weights """ features3d = torch.nn.Sequential() transition_nb = 0 # Count number of transition layers for name, child in features.named_children(): if isinstance(child, torch.nn.BatchNorm2d): features3d.add_module(name, inflate.inflate_batch_norm(child)) elif isinstance(child, torch.nn.ReLU): features3d.add_module(name, child) elif isinstance(child, torch.nn.Conv2d): features3d.add_module(name, inflate.inflate_conv(child, 1)) elif isinstance(child, torch.nn.MaxPool2d) or isinstance( child, torch.nn.AvgPool2d): features3d.add_module(name, inflate.inflate_pool(child)) elif isinstance(child, torchvision.models.densenet._DenseBlock): # Add dense block block = torch.nn.Sequential() for nested_name, nested_child in child.named_children(): assert isinstance(nested_child, torchvision.models.densenet._DenseLayer) block.add_module( nested_name, _DenseLayer3d(nested_child, inflate_convs=inflate_block_convs)) features3d.add_module(name, block) elif isinstance(child, torchvision.models.densenet._Transition): features3d.add_module(name, _Transition3d(child)) transition_nb = transition_nb + 1 else: raise ValueError('{} is not among handled layer types'.format( type(child))) return features3d, transition_nb
def __init__(self, resnet2d, frame_nb=32, class_nb=4, reason_nb=21, side_task=False, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet, self).__init__() self.conv_class = conv_class self.conv1 = inflate.inflate_conv( resnet2d.conv1, time_dim=5, time_stride=1, time_padding=2, center=True) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool( resnet2d.maxpool, time_dim=1, time_stride=1) self.maxpool2 = torch.nn.MaxPool3d(kernel_size=(3,1,1),stride=(2,1,1), padding=(1,0,0)) self.layer1 = inflate_reslayer(resnet2d.layer1) self.layer2 = inflate_reslayer(resnet2d.layer2) self.layer3 = inflate_reslayer(resnet2d.layer3) self.layer4_2d = resnet2d.layer4 self.avgpool_2d = resnet2d.avgpool num_ftrs = resnet2d.fc.in_features self.fc_2d = torch.nn.Linear(num_ftrs, 4) # self.drop = nn.Dropout(0.25) self.side_task = side_task
def __init__(self, transition2d, inflate_conv=False): """ Inflates transition layer from transition2d """ super(_Transition3d, self).__init__() for name, layer in transition2d.named_children(): if isinstance(layer, torch.nn.BatchNorm2d): self.add_module(name, inflate.inflate_batch_norm(layer)) elif isinstance(layer, torch.nn.ReLU): self.add_module(name, layer) elif isinstance(layer, torch.nn.Conv2d): if inflate_conv: pad_time = ReplicationPad3d((0, 0, 0, 0, 1, 1)) self.add_module('padding.1', pad_time) self.add_module(name, inflate.inflate_conv(layer, 3)) else: self.add_module(name, inflate.inflate_conv(layer, 1)) elif isinstance(layer, torch.nn.AvgPool2d): self.add_module(name, inflate.inflate_pool(layer, 2)) else: raise ValueError('{} is not among handled layer types'.format( type(layer)))
def test_input_block(): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder( '/sequoia/data1/yhasson/datasets/test-dataset', transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) densenet = torchvision.models.densenet121(pretrained=True) features = densenet.features seq2d = torch.nn.Sequential(features.conv0, features.norm0, features.relu0, features.pool0) seq3d = torch.nn.Sequential(inflate.inflate_conv(features.conv0, 3), inflate.inflate_batch_norm(features.norm0), features.relu0, inflate.inflate_pool(features.pool0, 1)) loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False) frame_nb = 4 for i, (input_2d, target) in enumerate(loader): target = target.cuda() target_var = torch.autograd.Variable(target) input_2d_var = torch.autograd.Variable(input_2d) out2d = seq2d(input_2d_var) time_pad = torch.nn.ReplicationPad3d((0, 0, 0, 0, 1, 1)) input_3d = input_2d.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1) input_3d_var = time_pad(input_3d) out3d = seq3d(input_3d_var) expected_out_3d = out2d.data.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1) out_diff = expected_out_3d - out3d.data print(out_diff.max()) assert (out_diff.max() < 0.0001)
def __init__(self, vgg2d, frame_nb=16, class_nb=1000, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3vgg16, self).__init__() # lookup = {'conv1_1':'0', 'conv1_2':'2', 'conv2_1':'5', 'conv2_2':'7', # 'conv3_1':'10', 'conv3_2':'12', 'conv3_3':'14', # 'conv4_1':'17', 'conv4_2':'19', 'conv4_3':'21', # 'conv5_1':'24', 'conv5_2':'26', 'conv5_3':'28', # 'conv6':'31', 'conv7':'33'} self.conv1_1 = inflate.inflate_conv(vgg2d.features[0], time_dim=3, time_padding=1, center=True) self.conv1_2 = inflate.inflate_conv(vgg2d.features[2], time_dim=3, time_padding=1, center=True) self.maxpool_1 = inflate.inflate_pool(vgg2d.features[4], time_dim=3, time_padding=1, time_stride=2) self.conv2_1 = inflate.inflate_conv(vgg2d.features[5], time_dim=3, time_padding=1, center=True) self.conv2_2 = inflate.inflate_conv(vgg2d.features[7], time_dim=3, time_padding=1, center=True) self.maxpool_2 = inflate.inflate_pool(vgg2d.features[9], time_dim=3, time_padding=1, time_stride=2) self.conv3_1 = inflate.inflate_conv(vgg2d.features[10], time_dim=3, time_padding=1, center=True) self.conv3_2 = inflate.inflate_conv(vgg2d.features[12], time_dim=3, time_padding=1, center=True) self.conv3_3 = inflate.inflate_conv(vgg2d.features[14], time_dim=3, time_padding=1, center=True) self.maxpool_3 = inflate.inflate_pool(vgg2d.features[16], time_dim=3, time_padding=1, time_stride=2) self.conv4_1 = inflate.inflate_conv(vgg2d.features[17], time_dim=3, time_padding=1, center=True) self.conv4_2 = inflate.inflate_conv(vgg2d.features[19], time_dim=3, time_padding=1, center=True) self.conv4_3 = inflate.inflate_conv(vgg2d.features[21], time_dim=3, time_padding=1, center=True) self.maxpool_4 = inflate.inflate_pool(vgg2d.features[23], time_dim=3, time_padding=1, time_stride=2) self.conv5_1 = inflate.inflate_conv(vgg2d.features[24], time_dim=3, time_padding=1, center=True) self.conv5_2 = inflate.inflate_conv(vgg2d.features[26], time_dim=3, time_padding=1, center=True) self.conv5_3 = inflate.inflate_conv(vgg2d.features[28], time_dim=3, time_padding=1, center=True) self.maxpool_5 = inflate.inflate_pool(vgg2d.features[30], time_dim=3, time_padding=1, time_stride=2) self.fc1 = inflate.inflate_linear(vgg2d.classifier[0], 1) self.fc2 = inflate.inflate_linear(vgg2d.classifier[3], 1) self.fc3 = inflate.inflate_linear(vgg2d.classifier[6], 1) self.relu = torch.nn.ReLU(inplace=True)
def __init__(self, vgg2d, frame_nb=16, class_nb=1000, conv_class=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3vgg, self).__init__() self.conv1_1 = inflate.inflate_conv(vgg2d.features[0], time_dim=3, time_padding=1, center=True) self.bn1 = inflate.inflate_batch_norm(vgg2d.features[1]) self.conv1_2 = inflate.inflate_conv(vgg2d.features[3], time_dim=3, time_padding=1, center=True) self.bn2 = inflate.inflate_batch_norm(vgg2d.features[4]) self.maxpool_1 = inflate.inflate_pool(vgg2d.features[6], time_dim=3, time_padding=1, time_stride=2) self.conv2_1 = inflate.inflate_conv(vgg2d.features[7], time_dim=3, time_padding=1, center=True) self.bn3 = inflate.inflate_batch_norm(vgg2d.features[8]) self.conv2_2 = inflate.inflate_conv(vgg2d.features[10], time_dim=3, time_padding=1, center=True) self.bn4 = inflate.inflate_batch_norm(vgg2d.features[11]) self.maxpool_2 = inflate.inflate_pool(vgg2d.features[13], time_dim=3, time_padding=1, time_stride=2) self.conv3_1 = inflate.inflate_conv(vgg2d.features[14], time_dim=3, time_padding=1, center=True) self.bn5 = inflate.inflate_batch_norm(vgg2d.features[15]) self.conv3_2 = inflate.inflate_conv(vgg2d.features[17], time_dim=3, time_padding=1, center=True) self.bn6 = inflate.inflate_batch_norm(vgg2d.features[18]) self.conv3_3 = inflate.inflate_conv(vgg2d.features[20], time_dim=3, time_padding=1, center=True) self.bn7 = inflate.inflate_batch_norm(vgg2d.features[21]) self.maxpool_3 = inflate.inflate_pool(vgg2d.features[23], time_dim=3, time_padding=1, time_stride=2) self.conv4_1 = inflate.inflate_conv(vgg2d.features[24], time_dim=3, time_padding=1, center=True) self.bn8 = inflate.inflate_batch_norm(vgg2d.features[25]) self.conv4_2 = inflate.inflate_conv(vgg2d.features[27], time_dim=3, time_padding=1, center=True) self.bn9 = inflate.inflate_batch_norm(vgg2d.features[28]) self.conv4_3 = inflate.inflate_conv(vgg2d.features[30], time_dim=3, time_padding=1, center=True) self.bn10 = inflate.inflate_batch_norm(vgg2d.features[31]) self.maxpool_4 = inflate.inflate_pool(vgg2d.features[33], time_dim=3, time_padding=1, time_stride=2) self.conv5_1 = inflate.inflate_conv(vgg2d.features[34], time_dim=3, time_padding=1, center=True) self.bn11 = inflate.inflate_batch_norm(vgg2d.features[35]) self.conv5_2 = inflate.inflate_conv(vgg2d.features[37], time_dim=3, time_padding=1, center=True) self.bn12 = inflate.inflate_batch_norm(vgg2d.features[38]) self.conv5_3 = inflate.inflate_conv(vgg2d.features[40], time_dim=3, time_padding=1, center=True) self.bn13 = inflate.inflate_batch_norm(vgg2d.features[41]) self.maxpool_5 = inflate.inflate_pool(vgg2d.features[43], time_dim=3, time_padding=1, time_stride=2) self.fc1 = inflate.inflate_linear(vgg2d.classifier[0], 1) self.fc2 = inflate.inflate_linear(vgg2d.classifier[3], 1) self.fc3 = inflate.inflate_linear(vgg2d.classifier[6], 1) self.relu = torch.nn.ReLU(inplace=True)