def __init__(self, dim_in, temp_pool_size, resolution, scale_factor): super(Head_featextract_roi, self).__init__() self.dim_in = dim_in self.num_pathways = len(temp_pool_size) for pi in range(self.num_pathways): pi_temp_pool_size = temp_pool_size[pi] if pi_temp_pool_size is not None: tpool = nn.AvgPool3d( [pi_temp_pool_size, 1, 1], stride=1) self.add_module(f's{pi}_tpool', tpool) roi_align = ROIAlign( resolution[pi], spatial_scale=1.0/scale_factor[pi], sampling_ratio=0, aligned=True) self.add_module(f's{pi}_roi', roi_align) spool = nn.MaxPool2d(resolution[pi], stride=1) self.add_module(f's{pi}_spool', spool)
def __init__(self, in_channels, out_channels, conv_kernel_size=3, apply_pooling=True, pool_kernel_size=2, pool_type='max', basic_module=DoubleConv, conv_layer_order='gcr', num_groups=8, padding=1): super(Encoder, self).__init__() assert pool_type in ['max', 'avg'] if apply_pooling: if pool_type == 'max': self.pooling = nn.MaxPool3d(kernel_size=pool_kernel_size) else: self.pooling = nn.AvgPool3d(kernel_size=pool_kernel_size) else: self.pooling = None self.basic_module = basic_module(in_channels, out_channels, encoder=True, kernel_size=conv_kernel_size, order=conv_layer_order, num_groups=num_groups, padding=padding)
def __init__(self): super().__init__() self.convs = nn.Sequential( nn.Conv3d(1, 8, (5, 4, 4), padding=(2, 0, 0)), nn.BatchNorm3d(8), nn.ReLU(), nn.Conv3d(8, 8, (5, 1, 1), padding=(2, 0, 0)), nn.BatchNorm3d(8), nn.ReLU(), nn.Conv3d(8, 16, (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(16), nn.ReLU(), nn.Conv3d(16, 16, (5, 1, 1), padding=(2, 0, 0)), nn.BatchNorm3d(16), nn.ReLU(), nn.Conv3d(16, 32, (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(32), nn.ReLU(), nn.Conv3d(32, 32, (5, 1, 1), padding=(2, 0, 0)), nn.BatchNorm3d(32), nn.ReLU(), nn.Conv3d(32, 64, (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(64), nn.ReLU(), nn.Conv3d(64, 64, (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(64), nn.ReLU(), nn.AvgPool3d((5, 1, 1))) self.lins = nn.Sequential( nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 16), nn.ReLU(), nn.Linear(16, 16), nn.ReLU(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU(), nn.Linear(4, 4), nn.ReLU(), nn.Linear(4, 2), nn.ReLU(), nn.Linear(2, 1), nn.Tanh(), )
def __init__(self, block, layers, sample_size, sample_duration, shortcut_type='B', cardinality=32, num_classes=400): self.inplanes = 64 super(ResNeXt, self).__init__() self.conv1 = nn.Conv3d( 3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) self.layer1 = self._make_layer(block, 128, layers[0], shortcut_type, cardinality) self.layer2 = self._make_layer( block, 256, layers[1], shortcut_type, cardinality, stride=2) self.layer3 = self._make_layer( block, 512, layers[2], shortcut_type, cardinality, stride=2) self.layer4 = self._make_layer( block, 1024, layers[3], shortcut_type, cardinality, stride=2) last_duration = int(math.ceil(sample_duration / 16)) last_size = int(math.ceil(sample_size / 32)) self.avgpool = nn.AvgPool3d( (last_duration, last_size, last_size), stride=1) self.dropout = nn.Dropout(0.7) self.fc = nn.Linear(cardinality * 32 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, num_classes): super(PyramidClassifier, self).__init__() self.num_classes = num_classes self.maxpool2D = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) self.avgpool2D = nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) self.maxpool3D = nn.MaxPool3d(kernel_size=(1, 1, 2), stride=0, ceil_mode=False) self.averagepool3D = nn.AvgPool3d(kernel_size=(1, 1, 2), stride=0, ceil_mode=False) self.averagepool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) self.bottleneck_res4 = BottleClassifier(1024, self.num_classes, relu=True, dropout=False, bottle_dim=256) self.linear_embeder_res4 = nn.Linear(1024, 256) self.bottleneck_res5 = BottleClassifier(2048, self.num_classes, relu=True, dropout=False, bottle_dim=256) self.linear_embeder_res5 = nn.Linear(2048, 256) self.bottleneck_unite = BottleClassifier(3072, self.num_classes, relu=True, dropout=False, bottle_dim=512) self.linear_embeder_unite = nn.Linear(3072, 256)
def __init__(self, input_nc, ndf, n_layers=3, norm_layer=nn.BatchNorm3d, use_sigmoid=False, num_D=1, get_inter_feat=False, has_bias=False, has_sn=True, max_ndf=256, conv_type='deform'): super(MultiscaleDiscriminator, self).__init__() self.input_nc = input_nc self.ndf = ndf self.n_layers = n_layers self.norm_layer = norm_layer self.use_sigmoid = use_sigmoid self.num_D = num_D self.get_inter_feat = get_inter_feat self.has_bias = has_bias self.has_sn = has_sn self.max_ndf = max_ndf self.conv_type = conv_type for i in range(self.num_D): netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, get_inter_feat, has_bias, has_sn, max_ndf, conv_type) if self.get_inter_feat: for j in range(n_layers + 2): setattr(self, 'scale' + str(i) + '_layer' + str(j), getattr(netD, 'model' + str(j))) else: setattr(self, 'layer' + str(i), netD.model) self.downsample = nn.AvgPool3d(kernel_size=[1, 3, 3], stride=[1, 2, 2], padding=[0, 1, 1], count_include_pad=False) self.apply(weights_init)
def __init__(self, block, layers, sample_size, sample_duration, shortcut_type='B', num_classes=400, input_chan=3): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv3d( input_chan, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) self.layer2 = self._make_layer( block, 128, layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer( block, 256, layers[2], shortcut_type, stride=2) self.layer4 = self._make_layer( block, 512, layers[3], shortcut_type, stride=2) last_duration = int(math.ceil(sample_duration / 16)) last_size = int(math.ceil(sample_size / 32)) self.avgpool = nn.AvgPool3d( (last_duration, last_size, last_size), stride=1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') elif isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) nn.init.constant_(m.bias, 0) elif isinstance(m, (nn.BatchNorm3d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def compute_O_O_interaction(self, sets_of_objects, t, previous_T, D, sampling=False): # Object set (the reference one) O_t = sets_of_objects[:, t] list_e_inter, list_is_object_inter = [], [] for t_1 in previous_T: # Get the previous object set O_t_1 = sets_of_objects[:, t_1] # Create the input to feed! input_mlp_inter, is_objects_inter = self.create_input_mlp( O_t_1, O_t, D) # Infer the relations e = self.mlp_inter(input_mlp_inter) # Append list_e_inter.append(e) list_is_object_inter.append(is_objects_inter) if (len(list_e_inter) == 1 and self.training): # Training so only one interaction computed return list_e_inter[0], list_is_object_inter[0] else: # Stack all_e_inter = torch.stack(list_e_inter, 1) pooler = nn.AvgPool3d( (all_e_inter.size(1), 1, 1)) # or nn.MaxPool3d((all_e_inter.size(1), 1, 1)) all_e_inter = pooler(all_e_inter) B, _, T_prim, D = all_e_inter.size() all_e_inter = all_e_inter.view(B, T_prim, D) is_objects_inter = torch.stack(list_is_object_inter, 1) is_objects_inter = torch.clamp(torch.sum(is_objects_inter, 1), 0, 1) return all_e_inter, is_objects_inter
def __init__(self, block, layers, sample_size, sample_duration, shortcut_type='B', cardinality=32, num_classes=400, use_depthwise=False, loss_type=None, use_extra_layer=False, phase='train', data_type='normal', policy='first'): self.inplanes = 64 self.DS_Conv3d = None if use_depthwise: self.DS_Conv3d = DepthwiseSeparableConv(dimension=3) self.Detector_layer = None if loss_type == 'multiloss': self.Detector_layer = MultiDetector super(ResNeXt, self).__init__() self.sample_size = sample_size # if self.sample_size == 128: # self.avgpool_128 = nn.AvgPool3d(kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1) # sample_size = 64 self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1) self.layer1 = self._make_layer(block, 128, layers[0], shortcut_type, cardinality) self.layer2 = self._make_layer(block, 256, layers[1], shortcut_type, cardinality, stride=(1, 2, 2)) self.layer3 = self._make_layer(block, 512, layers[2], shortcut_type, cardinality, stride=(1, 2, 2)) self.layer4 = self._make_layer(block, 1024, layers[3], shortcut_type, cardinality, stride=(1, 2, 2)) # last_duration = math.ceil(sample_duration / 16) last_duration = sample_duration last_size = math.ceil(sample_size / 32) kernel_size = (last_duration, last_size, last_size) if self.Detector_layer is not None: self.Detector_layer = self.Detector_layer(block, cardinality * 32, kernel_size=kernel_size, num_classes=num_classes, extra_layers=use_extra_layer, phase=phase, data_type='normal', policy=policy) else: self.avgpool = nn.AvgPool3d(kernel_size, stride=1) self.fc = nn.Linear(cardinality * 32 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, input_dim, output_dim, initializers, depth=3, padding=True, pool=True, reversible=False): super(DownConvolutionalBlock, self).__init__() if depth < 1: raise ValueError layers = [] if pool: layers.append( nn.AvgPool3d(kernel_size=2, stride=2, padding=0, ceil_mode=True)) if reversible: layers.append( ReversibleSequence(input_dim, output_dim, reversible_depth=1)) else: layers.append( Conv3D(input_dim, output_dim, kernel_size=3, stride=1, padding=int(padding))) if depth > 1: for i in range(depth - 1): layers.append( Conv3D(output_dim, output_dim, kernel_size=3, stride=1, padding=int(padding))) self.layers = nn.Sequential(*layers)
def conv(in_f, out_f, kernel_size, stride=1, bias=True, pad='zero', downsample_mode='stride'): downsampler = None if stride != 1 and downsample_mode != 'stride': if downsample_mode == 'avg': downsampler = nn.AvgPool3d(stride, stride) elif downsample_mode == 'max': downsampler = nn.MaxPool3d(stride, stride) elif downsample_mode in ['lanczos2', 'lanczos3']: downsampler = Downsampler(n_planes=out_f, factor=stride, kernel_type=downsample_mode, phase=0.5, preserve_size=True) else: assert False stride = 1 padder = None to_pad = int((kernel_size - 1) / 2) if pad == 'reflection': padder = nn.ReflectionPad3d(to_pad) to_pad = 0 convolver = nn.Conv3d(in_f, out_f, kernel_size, stride, padding=to_pad, bias=bias) torch.nn.init.kaiming_normal_( convolver.weight) # Added by OMM (Xavier init) layers = filter(lambda x: x is not None, [padder, convolver, downsampler]) return nn.Sequential(*layers)
def __init__(self, in_channels, is_last_layer): super(TransmitBlock, self).__init__() act_fn = config["act_fn"] norm_fn = config["norm_fn"] compression = 2 assert in_channels % compression == 0 self.in_channels = in_channels self.compression = compression self.add_module("norm", norm_fn(in_channels)) self.add_module("act", act_fn()) if not is_last_layer: self.add_module("conv", nn.Conv3d(in_channels, in_channels // compression, kernel_size=1, stride=1, padding=0, bias=True)) self.add_module("pool", nn.AvgPool3d(kernel_size=2, stride=2, padding=0)) else: self.compression = 1
def __init__(self, in_channel, num_classes, verbose=False): super(resnet, self).__init__() self.verbose = verbose self.block1 = nn.Conv3d(in_channel, 64, 5, 2) self.block2 = nn.Sequential(nn.MaxPool3d(3, 2), residual_block(64, 64), residual_block(64, 64)) self.block3 = nn.Sequential(residual_block(64, 128, False), residual_block(128, 128)) self.block4 = nn.Sequential(residual_block(128, 256, False), residual_block(256, 256)) self.block5 = nn.Sequential(residual_block(256, 512, False), residual_block(512, 512), nn.AvgPool3d(3)) self.classifier = nn.Linear(512, num_classes) self.classifier2 = nn.Linear(512, 3)
def __init__(self, local_size=1, alpha=1.0, beta=0.75, k=1, ACROSS_CHANNELS=True): super(SpatialCrossMapLRN, self).__init__() self.ACROSS_CHANNELS = ACROSS_CHANNELS if ACROSS_CHANNELS: self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1), stride=1, padding=(int( (local_size - 1.0) / 2), 0, 0)) else: self.average = nn.AvgPool2d(kernel_size=local_size, stride=1, padding=int((local_size - 1.0) / 2)) self.alpha = alpha self.beta = beta self.k = k
def __init__(self, in_channels=1, base_channels=32, n_layers=3, n_discriminators=3): super().__init__() # Initialize all discriminators self.discriminators = nn.ModuleList() for _ in range(n_discriminators): self.discriminators.append( Pix2PixHDPatchDiscriminator(in_channels, base_channels=base_channels, n_layers=n_layers)) # Downsampling layer to pass inputs between discriminators at different scales self.downsample = nn.AvgPool3d(3, stride=2, padding=1, count_include_pad=False)
def __init__(self, local_size=1, alpha=1E-4, beta=0.75, ACROSS_CHANNELS=False): super(LRN, self).__init__() self.ACROSS_CHANNELS = ACROSS_CHANNELS self.alpha = alpha self.beta = beta if self.ACROSS_CHANNELS: self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1), stride=1, padding=(int( (local_size - 1.0) / 2), 0, 0)) else: self.average = nn.AvgPool2d(kernel_size=local_size, stride=1, padding=int((local_size - 1.0) / 2))
def __init__(self, inplanes, planes, stride=1, downsample=None, frames = 8): super(Bottleneck, self).__init__() self.frames = frames self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride if self.downsample is not None: self.conv2_t = MultiConv(planes=planes, stride=stride, layers=int(math.log(frames, 2))) if self.stride > 1: self.avgpool_s = nn.AvgPool3d(kernel_size = (stride,1,1), stride=(stride, 1, 1), padding=0)
def __init__(self, rgb_nfilters, audio_nfilters, img_size, temp_size, hidden_layers): super(av_module, self).__init__() self.rgb_nfilters = rgb_nfilters self.audio_nfilters = audio_nfilters self.hidden_layers = hidden_layers self.out_layers = 64 self.img_size = img_size self.avgpool_rgb = nn.AvgPool3d((temp_size, 1, 1), stride=1) # Make the layers numbers equal self.relu = nn.ReLU() self.affine_rgb = nn.Linear(rgb_nfilters, hidden_layers) self.affine_audio = nn.Linear(audio_nfilters, hidden_layers) self.w_a_rgb = nn.Bilinear(hidden_layers, hidden_layers, self.out_layers, bias=True) self.upscale_ = nn.Upsample(scale_factor=8, mode='bilinear')
def __init__(self, block, layers, spatial_size, sample_duration, shortcut_type='B', num_classes=1): self.inplanes = 64 super(SNresDisc_3DCNN, self).__init__() self.conv1 = utils.spectral_norm( nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False)) # self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) self.layer2 = self._make_layer(block, 128, layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer(block, 256, layers[2], shortcut_type, stride=2) self.layer4 = self._make_layer(block, 512, layers[3], shortcut_type, stride=2) last_duration = int(math.ceil(sample_duration / 16)) last_size = int(math.ceil(spatial_size / 32)) self.avgpool = nn.AvgPool3d((last_duration, last_size, last_size), stride=1) self.fc = utils.spectral_norm( nn.Linear(512 * block.expansion, num_classes))
def __init__(self, band, classes): super(SSRN_network, self).__init__() self.name = 'SSRN' self.conv1 = nn.Conv3d(in_channels=1, out_channels=24, kernel_size=(1, 1, 7), stride=(1, 1, 2)) self.batch_norm1 = nn.Sequential( nn.BatchNorm3d(24, eps=0.001, momentum=0.1, affine=True), # 0.1 nn.ReLU(inplace=True)) self.res_net1 = Residual(24, 24, (1, 1, 7), (0, 0, 3)) self.res_net2 = Residual(24, 24, (1, 1, 7), (0, 0, 3)) self.res_net3 = Residual(24, 24, (3, 3, 1), (1, 1, 0)) self.res_net4 = Residual(24, 24, (3, 3, 1), (1, 1, 0)) kernel_3d = math.ceil((band - 6) / 2) self.conv2 = nn.Conv3d(in_channels=24, out_channels=128, padding=(0, 0, 0), kernel_size=(1, 1, kernel_3d), stride=(1, 1, 1)) self.batch_norm2 = nn.Sequential( nn.BatchNorm3d(128, eps=0.001, momentum=0.1, affine=True), # 0.1 nn.ReLU(inplace=True)) self.conv3 = nn.Conv3d(in_channels=1, out_channels=24, padding=(0, 0, 0), kernel_size=(3, 3, 128), stride=(1, 1, 1)) self.batch_norm3 = nn.Sequential( nn.BatchNorm3d(24, eps=0.001, momentum=0.1, affine=True), # 0.1 nn.ReLU(inplace=True)) self.avg_pooling = nn.AvgPool3d(kernel_size=(5, 5, 1)) self.full_connection = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(24, classes) # , # nn.Softmax() )
def __init__(self, training=True, **kwargs): super(GoogLeNet3D_new, self).__init__() self.pre_layers = nn.Sequential( nn.Conv3d(1, 64, kernel_size=7, stride=2, padding=1), nn.BatchNorm3d(64), nn.ReLU(True), nn.MaxPool3d(3, stride=2), nn.Conv3d(64, 64, kernel_size=1), nn.BatchNorm3d(64), nn.ReLU(True), nn.Conv3d(64, 192, kernel_size=3, stride=1, padding=1), nn.BatchNorm3d(192), nn.ReLU(True), nn.MaxPool3d(3, stride=2), ) self.training = training self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) self.maxpool = nn.MaxPool3d(3, stride=2) self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) self.avgpool = nn.AvgPool3d((2, 3, 2), stride=1) self.linear = nn.Linear(27648, 2) self.aux1 = InceptionAux(512, 2) self.aux2 = InceptionAux(528, 2)
def __init__(self, with_avg_pool=True, temporal_feature_size=1, spatial_feature_size=7, dropout_ratio=0.8, in_channels=2048, num_classes=101, init_std=0.01, non_linear = False, nonlinear_channels = 2048): super(SimpleClsHead, self).__init__() self.with_avg_pool = with_avg_pool self.dropout_ratio = dropout_ratio self.in_channels = in_channels self.dropout_ratio = dropout_ratio self.temporal_feature_size = temporal_feature_size self.spatial_feature_size = spatial_feature_size self.init_std = init_std self.num_classes = num_classes self.non_linear = non_linear self.nonlinear_channels = nonlinear_channels if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None if self.with_avg_pool: self.avg_pool = nn.AvgPool3d((temporal_feature_size, spatial_feature_size, spatial_feature_size)) if self.non_linear: self.fc_nl = nn.Sequential( nn.Identity() #nn.Linear(in_channels, nonlinear_channels), #nn.ReLU() #nn.Dropout(p=self.dropout_ratio), #nn.Linear(nonlinear_channels, nonlinear_channels), #nn.ReLU() ) self.fc_cls = nn.Linear(nonlinear_channels if self.non_linear else in_channels, num_classes)
def inflate_pool(pool2d, time_dim, time_padding, time_stride, time_dilation, center): ''' args: - pool2d: maxpool2d or avgpool2d module - time_dim: new time dim for pool kernel. represents volume in time - time_padding: padding of kernel in time dim - time_stride: stride of pool kernel in time dim - time_dilation: dilation in time dim - center: not used in this func, maintained for consitent helper func args returns: 3dpool layer (max or avg), with all properties of original pool func preserved, and augmented by time args ''' o_kernel_size = get_tuple(pool2d.kernel_size) o_padding = get_tuple(pool2d.padding) o_stride = get_tuple(pool2d.stride) kernel_dim = (time_dim, o_kernel_size[0], o_kernel_size[1]) padding = (time_padding, o_padding[0], o_padding[1]) stride = (time_stride, o_stride[0], o_stride[1]) if isinstance(pool2d, torch.nn.MaxPool2d): o_dilation = get_tuple(pool2d.dilation) dilation = (time_dilation, o_dilation[0], o_dilation[1]) pool3d = nn.MaxPool3d(kernel_dim, padding=padding, dilation=dilation, stride=stride, ceil_mode=pool2d.ceil_mode) elif isinstance(pool2d, torch.nn.AvgPool2d): pool3d = nn.AvgPool3d(kernel_dim, padding=padding, stride=stride, ceil_mode=pool2d.ceil_mode) else: raise ValueError(INVALID_POOL_TYPE_ERR.format(type(pool2d))) return pool3d
def __init__(self, growth_rate=32, block_config=(4, 4, 4), init_channel_num=64, bn_size=4, drop_rate=0): super(DenseNet, self).__init__() self.feature_layer = FeatureLayer(1, init_channel_num) #增加DenseBlock与Transition channel_num = init_channel_num for i, layer_num in enumerate(block_config): block = DenseBlock(layer_num=layer_num, in_channel=channel_num, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) self.feature_layer.add_module('denseblock%d' % (i + 1), block) channel_num = channel_num + layer_num * growth_rate #对于非最后一级的denseblock,增加transition层 if (i != len(block_config) - 1): trans = Transition(channel_num, 0.5) self.feature_layer.add_module('transition%d' % (i + 1), trans) channel_num = int(0.5 * channel_num) #增加Classifier self.feature_layer.add_module('norm5', nn.BatchNorm3d(channel_num)) self.feature_layer.add_module('relu5', nn.ReLU(inplace=True)) self.feature_layer.add_module('avgpool5', nn.AvgPool3d(kernel_size=3, stride=2)) self.classifier = nn.Linear(channel_num, 1) #Linear层的输入个数尚不清楚 self.sigmoid = torch.sigmoid # Official init from torch repo. # 8太知道这一段是干嘛的 for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal(m.weight.data) elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
def __init__(self, num_classes=400, dropout_keep_prob=1, input_channel=3, spatial_squeeze=True): super(I3D, self).__init__() self.features = nn.Sequential( BasicConv3d(input_channel, 64, kernel_size=7, stride=2, padding=3), # (64, 384, 56, 56) nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)), # (64, 384, 28, 28) BasicConv3d(64, 64, kernel_size=1, stride=1), # (64, 384, 28, 28) BasicConv3d(64, 192, kernel_size=3, stride=1, padding=1), # (192, 384, 28, 28) nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)), # (192, 384, 14, 14) Mixed_3b(), # (256, 384, 14, 14) Mixed_3c(), # (256, 384, 14, 14) nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1)), # (480, 192, 7, 7) Mixed_4b(), # (512, 192, 7, 7) Mixed_4c(), # (512, 192, 7, 7) Mixed_4d(), # (512, 192, 7, 7) Mixed_4e(), # (528, 192, 7, 7) Mixed_4f(), # (832, 192, 7, 7) #nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 0, 0)), # (832, 96, 3, 3) nn.MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)), # (832, 96, 7, 7) Mixed_5b(), # (832, 96, 7, 7) Mixed_5c(), # (1024, 96, 7, 7) nn.AvgPool3d(kernel_size=(2, 7, 7), stride=1), # (1024, 8, 1, 1) nn.Dropout3d(dropout_keep_prob), nn.Conv3d(1024, num_classes, kernel_size=1, stride=1, bias=True), # (400, 8, 1, 1) ) self.spatial_squeeze = spatial_squeeze self.softmax = nn.Softmax()
def __init__(self, num_classes=400, num_frames=64, num_keyframe=8, dropout_keep_prob=0.5): super(FGS3DFLOW, self).__init__() self.num_frames = num_frames self.num_keyframe = num_keyframe self.num_classes = num_classes self.dropout_keep_prob = dropout_keep_prob ############################################## # Load flownet ############################################## self.flownetresize = nn.AvgPool2d(kernel_size=4, stride=4) FlowNet_state_dict = torch.load( '/home/weik/pretrainedmodels/FlowNetS/flownets_from_caffe.pth.tar.pth' ) self.flownets = flownets(FlowNet_state_dict) set_parameter_requires_grad(self.flownets) self.flownets = flownets() # self.inception_3D_1 = InceptionModule(1024, [112, 144, 288, 32, 64, 64], 'mixed_4f', ) self.inception_3D_flow_1 = InceptionModule( 2, [256, 160, 320, 32, 128, 128], 'mixed_4f') self.inception_3D_flow_2 = InceptionModule( 256 + 320 + 128 + 128, [256, 160, 320, 32, 128, 128], 'mixed_5b') self.inception_3D_flow_3 = InceptionModule( 256 + 320 + 128 + 128, [384, 192, 384, 48, 128, 128], 'mixed_5c') self.avg_pool_flow = nn.AvgPool3d(kernel_size=[2, 14, 14], stride=(2, 1, 1)) self.dropout_flow = nn.Dropout(self.dropout_keep_prob) self.logits_flow = nn.Linear((384 + 384 + 128 + 128) * 28, self.num_classes) torch.nn.init.normal_(self.logits_flow.weight, mean=0.0, std=0.01) torch.nn.init.constant_(self.logits_flow.bias, 0.0) set_parameter_requires_grad(self.flownets)
def __init__( self, unique_id: str, num_classes: int, in_plane: int, pool_size: Optional[List[int]], activation_func: str, use_dropout: Optional[bool] = None, dropout_ratio: float = 0.5, ): """ Constructor for FullyConvolutionalLinearHead. Args: unique_id: A unique identifier for the head. Multiple instances of the same head might be attached to a model, and unique_id is used to refer to them. num_classes: Number of classes for the head. in_plane: Input size for the fully connected layer. pool_size: Optional kernel size for the 3d pooling layer. If None, use :class:`torch.nn.AdaptiveAvgPool3d` with output size (1, 1, 1). activation_func: activation function to use. 'softmax': applies softmax on the output. 'sigmoid': applies sigmoid on the output. use_dropout: Whether to apply dropout after the pooling layer. dropout_ratio: dropout ratio. """ super().__init__(unique_id, num_classes) if pool_size is not None: self.final_avgpool = nn.AvgPool3d(pool_size, stride=1) else: self.final_avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) if use_dropout: self.dropout = nn.Dropout(p=dropout_ratio) # we separate average pooling from the fully-convolutional linear projection # because for multi-path models such as SlowFast model, the input can be # more than 1 tesnor. In such case, we can define a new head to combine multiple # tensors via concat or addition, do average pooling, but still reuse # FullyConvolutionalLinear inside of it. self.head_fcl = FullyConvolutionalLinear(in_plane, num_classes, act_func=activation_func)
def __init__(self, sample_size, sample_duration, num_classes=400, last_fc=True): super(MobileNetResidual, self).__init__() self.last_fc = last_fc self.model = nn.Sequential( self.conv_bn(3, 32, 2), DepthWiseBlock(32, 64, 1), DepthWiseBlock(64, 128, (1, 2, 2)), DepthWiseBlock(128, 128, 1), DepthWiseBlock(128, 256, 2), DepthWiseBlock(256, 256, 1), DepthWiseBlock(256, 512, 2), DepthWiseBlock(512, 512, 1), DepthWiseBlock(512, 512, 1), DepthWiseBlock(512, 512, 1), DepthWiseBlock(512, 512, 1), DepthWiseBlock(512, 512, 1), DepthWiseBlock(512, 1024, 2), DepthWiseBlock(1024, 1024, 1), ) last_duration = math.ceil(sample_duration / 16) last_size = math.ceil(sample_size / 32) self.avgpool = nn.AvgPool3d((last_duration, last_size, last_size), stride=1) self.dropout = nn.Dropout(p=0.5) self.fc = nn.Linear(1024, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, in_channels=in_channels, out_channels=out_channels, num_classes=num_classes, n_conv_blocks=n_conv_blocks, n_residual_blocks=n_residual_blocks): super(resnet_classifier, self).__init__() # Initial convolution blocks (takes in nxnxn image, outputs nxnxn image) model = [] for _ in range(n_conv_blocks): model += [ nn.Conv3d( in_channels, out_channels, kernel_size=conv_block_kernel_size, padding=3, padding_mode='same' ), #num_out_features = 64 (basically, no. of 7x7 filters), kernel_size = 7 nn.InstanceNorm3d(in_channels), nn.ReLU(inplace=True) ] in_channels = out_channels # Residual blocks (the input to this block is of size (n/4)x(n/4), for an actual input of size nxn) for _ in range(n_residual_blocks): model += [ResidualBlock(in_channels)] self.model = nn.Sequential(*model) self.avgpool = nn.AvgPool3d(kernel_size=32, stride=32, padding=0) self.classifier = nn.Sequential( nn.Linear(feature_map_dim, 256), nn.ReLU(inplace=True), nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Linear(128, num_classes), ) self.fov = n_conv_blocks * conv_block_kernel_size + n_residual_blocks * ( res_block_kernel_size - 1)
def __init__(self, in_planes, out_planes, stride, groups, f =0): super(Layer_3, self).__init__() self.stride = stride self.groups = groups mid_planes = out_planes//4 if self.stride == 2: out_planes = out_planes - in_planes g = 1 if in_planes==24 else groups #in_planes = in_planes*2 if self.stride == 2 else in_planes in_planes =1920 if f else in_planes out_planes =1920 if f else 960 self.conv1 = nn.Conv3d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) self.bn1 = nn.BatchNorm3d(mid_planes) self.conv2 = nn.Conv3d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) self.bn2 = nn.BatchNorm3d(mid_planes) self.conv3 = nn.Conv3d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) self.bn3 = nn.BatchNorm3d(out_planes) self.relu = nn.ReLU(inplace=True) if stride == 2: self.shortcut = nn.AvgPool3d(kernel_size=(2,3,3), stride=2, padding=(0,1,1))