Ejemplo n.º 1
0
    def __init__(self, dim_in, temp_pool_size, resolution, scale_factor):
        super(Head_featextract_roi, self).__init__()
        self.dim_in = dim_in
        self.num_pathways = len(temp_pool_size)

        for pi in range(self.num_pathways):
            pi_temp_pool_size = temp_pool_size[pi]
            if pi_temp_pool_size is not None:
                tpool = nn.AvgPool3d(
                        [pi_temp_pool_size, 1, 1], stride=1)
                self.add_module(f's{pi}_tpool', tpool)
            roi_align = ROIAlign(
                    resolution[pi],
                    spatial_scale=1.0/scale_factor[pi],
                    sampling_ratio=0,
                    aligned=True)
            self.add_module(f's{pi}_roi', roi_align)
            spool = nn.MaxPool2d(resolution[pi], stride=1)
            self.add_module(f's{pi}_spool', spool)
Ejemplo n.º 2
0
    def __init__(self, in_channels, out_channels, conv_kernel_size=3, apply_pooling=True,
                 pool_kernel_size=2, pool_type='max', basic_module=DoubleConv, conv_layer_order='gcr',
                 num_groups=8, padding=1):
        super(Encoder, self).__init__()
        assert pool_type in ['max', 'avg']
        if apply_pooling:
            if pool_type == 'max':
                self.pooling = nn.MaxPool3d(kernel_size=pool_kernel_size)
            else:
                self.pooling = nn.AvgPool3d(kernel_size=pool_kernel_size)
        else:
            self.pooling = None

        self.basic_module = basic_module(in_channels, out_channels,
                                         encoder=True,
                                         kernel_size=conv_kernel_size,
                                         order=conv_layer_order,
                                         num_groups=num_groups,
                                         padding=padding)
Ejemplo n.º 3
0
    def __init__(self):
        super().__init__()

        self.convs = nn.Sequential(
            nn.Conv3d(1, 8, (5, 4, 4), padding=(2, 0, 0)), nn.BatchNorm3d(8),
            nn.ReLU(), nn.Conv3d(8, 8, (5, 1, 1), padding=(2, 0, 0)),
            nn.BatchNorm3d(8), nn.ReLU(),
            nn.Conv3d(8, 16, (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(16),
            nn.ReLU(), nn.Conv3d(16, 16, (5, 1, 1), padding=(2, 0, 0)),
            nn.BatchNorm3d(16), nn.ReLU(),
            nn.Conv3d(16, 32,
                      (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(32),
            nn.ReLU(), nn.Conv3d(32, 32, (5, 1, 1), padding=(2, 0, 0)),
            nn.BatchNorm3d(32), nn.ReLU(),
            nn.Conv3d(32, 64,
                      (5, 2, 2), padding=(2, 0, 0)), nn.BatchNorm3d(64),
            nn.ReLU(), nn.Conv3d(64, 64, (5, 2, 2), padding=(2, 0, 0)),
            nn.BatchNorm3d(64), nn.ReLU(), nn.AvgPool3d((5, 1, 1)))
        self.lins = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
            nn.ReLU(),
            nn.Linear(2, 1),
            nn.Tanh(),
        )
    def __init__(self,
                 block,
                 layers,
                 sample_size,
                 sample_duration,
                 shortcut_type='B',
                 cardinality=32,
                 num_classes=400):
        self.inplanes = 64
        super(ResNeXt, self).__init__()
        self.conv1 = nn.Conv3d(
            3,
            64,
            kernel_size=7,
            stride=(1, 2, 2),
            padding=(3, 3, 3),
            bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        self.layer1 = self._make_layer(block, 128, layers[0], shortcut_type,
                                       cardinality)
        self.layer2 = self._make_layer(
            block, 256, layers[1], shortcut_type, cardinality, stride=2)
        self.layer3 = self._make_layer(
            block, 512, layers[2], shortcut_type, cardinality, stride=2)
        self.layer4 = self._make_layer(
            block, 1024, layers[3], shortcut_type, cardinality, stride=2)
        last_duration = int(math.ceil(sample_duration / 16))
        last_size = int(math.ceil(sample_size / 32))

        self.avgpool = nn.AvgPool3d(
            (last_duration, last_size, last_size), stride=1)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(cardinality * 32 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
Ejemplo n.º 5
0
    def __init__(self, num_classes):
        super(PyramidClassifier, self).__init__()
        self.num_classes = num_classes
        self.maxpool2D = nn.MaxPool2d(kernel_size=3,
                                      stride=2,
                                      padding=1,
                                      dilation=1,
                                      ceil_mode=False)
        self.avgpool2D = nn.AvgPool2d(kernel_size=3,
                                      stride=2,
                                      padding=1,
                                      ceil_mode=False)

        self.maxpool3D = nn.MaxPool3d(kernel_size=(1, 1, 2),
                                      stride=0,
                                      ceil_mode=False)
        self.averagepool3D = nn.AvgPool3d(kernel_size=(1, 1, 2),
                                          stride=0,
                                          ceil_mode=False)

        self.averagepool = nn.AdaptiveAvgPool2d(output_size=(1, 1))

        self.bottleneck_res4 = BottleClassifier(1024,
                                                self.num_classes,
                                                relu=True,
                                                dropout=False,
                                                bottle_dim=256)
        self.linear_embeder_res4 = nn.Linear(1024, 256)

        self.bottleneck_res5 = BottleClassifier(2048,
                                                self.num_classes,
                                                relu=True,
                                                dropout=False,
                                                bottle_dim=256)
        self.linear_embeder_res5 = nn.Linear(2048, 256)

        self.bottleneck_unite = BottleClassifier(3072,
                                                 self.num_classes,
                                                 relu=True,
                                                 dropout=False,
                                                 bottle_dim=512)
        self.linear_embeder_unite = nn.Linear(3072, 256)
Ejemplo n.º 6
0
    def __init__(self,
                 input_nc,
                 ndf,
                 n_layers=3,
                 norm_layer=nn.BatchNorm3d,
                 use_sigmoid=False,
                 num_D=1,
                 get_inter_feat=False,
                 has_bias=False,
                 has_sn=True,
                 max_ndf=256,
                 conv_type='deform'):
        super(MultiscaleDiscriminator, self).__init__()
        self.input_nc = input_nc
        self.ndf = ndf
        self.n_layers = n_layers
        self.norm_layer = norm_layer
        self.use_sigmoid = use_sigmoid
        self.num_D = num_D
        self.get_inter_feat = get_inter_feat
        self.has_bias = has_bias
        self.has_sn = has_sn
        self.max_ndf = max_ndf
        self.conv_type = conv_type

        for i in range(self.num_D):
            netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer,
                                       use_sigmoid, get_inter_feat, has_bias,
                                       has_sn, max_ndf, conv_type)
            if self.get_inter_feat:
                for j in range(n_layers + 2):
                    setattr(self, 'scale' + str(i) + '_layer' + str(j),
                            getattr(netD, 'model' + str(j)))
            else:
                setattr(self, 'layer' + str(i), netD.model)

        self.downsample = nn.AvgPool3d(kernel_size=[1, 3, 3],
                                       stride=[1, 2, 2],
                                       padding=[0, 1, 1],
                                       count_include_pad=False)

        self.apply(weights_init)
Ejemplo n.º 7
0
    def __init__(self,
                 block,
                 layers,
                 sample_size,
                 sample_duration,
                 shortcut_type='B',
                 num_classes=400,
                 input_chan=3):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv3d(
            input_chan,
            64,
            kernel_size=7,
            stride=(1, 2, 2),
            padding=(3, 3, 3),
            bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
        self.layer2 = self._make_layer(
            block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(
            block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(
            block, 512, layers[3], shortcut_type, stride=2)
        last_duration = int(math.ceil(sample_duration / 16))
        last_size = int(math.ceil(sample_size / 32))
        self.avgpool = nn.AvgPool3d(
            (last_duration, last_size, last_size), stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm3d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
Ejemplo n.º 8
0
    def compute_O_O_interaction(self,
                                sets_of_objects,
                                t,
                                previous_T,
                                D,
                                sampling=False):

        # Object set (the reference one)
        O_t = sets_of_objects[:, t]

        list_e_inter, list_is_object_inter = [], []
        for t_1 in previous_T:
            # Get the previous object set
            O_t_1 = sets_of_objects[:, t_1]

            # Create the input to feed!
            input_mlp_inter, is_objects_inter = self.create_input_mlp(
                O_t_1, O_t, D)

            # Infer the relations
            e = self.mlp_inter(input_mlp_inter)

            # Append
            list_e_inter.append(e)
            list_is_object_inter.append(is_objects_inter)

        if (len(list_e_inter) == 1 and self.training):
            # Training so only one interaction computed
            return list_e_inter[0], list_is_object_inter[0]
        else:
            # Stack
            all_e_inter = torch.stack(list_e_inter, 1)
            pooler = nn.AvgPool3d(
                (all_e_inter.size(1), 1,
                 1))  # or nn.MaxPool3d((all_e_inter.size(1), 1, 1))
            all_e_inter = pooler(all_e_inter)
            B, _, T_prim, D = all_e_inter.size()
            all_e_inter = all_e_inter.view(B, T_prim, D)
            is_objects_inter = torch.stack(list_is_object_inter, 1)
            is_objects_inter = torch.clamp(torch.sum(is_objects_inter, 1), 0,
                                           1)
            return all_e_inter, is_objects_inter
Ejemplo n.º 9
0
    def __init__(self, block, layers, sample_size, sample_duration, shortcut_type='B', cardinality=32, num_classes=400,
                 use_depthwise=False, loss_type=None, use_extra_layer=False, phase='train',
                 data_type='normal', policy='first'):
        self.inplanes = 64
        self.DS_Conv3d = None
        if use_depthwise:
            self.DS_Conv3d = DepthwiseSeparableConv(dimension=3)
        self.Detector_layer = None
        if loss_type == 'multiloss':
            self.Detector_layer = MultiDetector

        super(ResNeXt, self).__init__()
        self.sample_size = sample_size
        # if self.sample_size == 128:
        #     self.avgpool_128 = nn.AvgPool3d(kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1)
        #     sample_size = 64
        self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1)
        self.layer1 = self._make_layer(block, 128, layers[0], shortcut_type, cardinality)
        self.layer2 = self._make_layer(block, 256, layers[1], shortcut_type, cardinality, stride=(1, 2, 2))
        self.layer3 = self._make_layer(block, 512, layers[2], shortcut_type, cardinality, stride=(1, 2, 2))
        self.layer4 = self._make_layer(block, 1024, layers[3], shortcut_type, cardinality, stride=(1, 2, 2))
        # last_duration = math.ceil(sample_duration / 16)
        last_duration = sample_duration
        last_size = math.ceil(sample_size / 32)
        kernel_size = (last_duration, last_size, last_size)
        if self.Detector_layer is not None:
            self.Detector_layer = self.Detector_layer(block, cardinality * 32, kernel_size=kernel_size,
                                                      num_classes=num_classes, extra_layers=use_extra_layer,
                                                      phase=phase, data_type='normal', policy=policy)
        else:
            self.avgpool = nn.AvgPool3d(kernel_size, stride=1)
            self.fc = nn.Linear(cardinality * 32 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
Ejemplo n.º 10
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 initializers,
                 depth=3,
                 padding=True,
                 pool=True,
                 reversible=False):
        super(DownConvolutionalBlock, self).__init__()

        if depth < 1:
            raise ValueError

        layers = []
        if pool:
            layers.append(
                nn.AvgPool3d(kernel_size=2,
                             stride=2,
                             padding=0,
                             ceil_mode=True))

        if reversible:
            layers.append(
                ReversibleSequence(input_dim, output_dim, reversible_depth=1))
        else:
            layers.append(
                Conv3D(input_dim,
                       output_dim,
                       kernel_size=3,
                       stride=1,
                       padding=int(padding)))

            if depth > 1:
                for i in range(depth - 1):
                    layers.append(
                        Conv3D(output_dim,
                               output_dim,
                               kernel_size=3,
                               stride=1,
                               padding=int(padding)))

        self.layers = nn.Sequential(*layers)
Ejemplo n.º 11
0
def conv(in_f,
         out_f,
         kernel_size,
         stride=1,
         bias=True,
         pad='zero',
         downsample_mode='stride'):
    downsampler = None
    if stride != 1 and downsample_mode != 'stride':

        if downsample_mode == 'avg':
            downsampler = nn.AvgPool3d(stride, stride)
        elif downsample_mode == 'max':
            downsampler = nn.MaxPool3d(stride, stride)
        elif downsample_mode in ['lanczos2', 'lanczos3']:
            downsampler = Downsampler(n_planes=out_f,
                                      factor=stride,
                                      kernel_type=downsample_mode,
                                      phase=0.5,
                                      preserve_size=True)
        else:
            assert False

        stride = 1

    padder = None
    to_pad = int((kernel_size - 1) / 2)
    if pad == 'reflection':
        padder = nn.ReflectionPad3d(to_pad)
        to_pad = 0

    convolver = nn.Conv3d(in_f,
                          out_f,
                          kernel_size,
                          stride,
                          padding=to_pad,
                          bias=bias)
    torch.nn.init.kaiming_normal_(
        convolver.weight)  # Added by OMM (Xavier init)

    layers = filter(lambda x: x is not None, [padder, convolver, downsampler])
    return nn.Sequential(*layers)
    def __init__(self, in_channels, is_last_layer):
        super(TransmitBlock, self).__init__()
        act_fn = config["act_fn"]
        norm_fn = config["norm_fn"]
        compression = 2

        assert in_channels % compression == 0

        self.in_channels = in_channels
        self.compression = compression

        self.add_module("norm", norm_fn(in_channels))
        self.add_module("act", act_fn())

        if not is_last_layer:
            self.add_module("conv", nn.Conv3d(in_channels, in_channels // compression,
                                              kernel_size=1, stride=1, padding=0, bias=True))
            self.add_module("pool", nn.AvgPool3d(kernel_size=2, stride=2, padding=0))
        else:
            self.compression = 1
Ejemplo n.º 13
0
    def __init__(self, in_channel, num_classes, verbose=False):
        super(resnet, self).__init__()
        self.verbose = verbose

        self.block1 = nn.Conv3d(in_channel, 64, 5, 2)

        self.block2 = nn.Sequential(nn.MaxPool3d(3, 2), residual_block(64, 64),
                                    residual_block(64, 64))

        self.block3 = nn.Sequential(residual_block(64, 128, False),
                                    residual_block(128, 128))

        self.block4 = nn.Sequential(residual_block(128, 256, False),
                                    residual_block(256, 256))

        self.block5 = nn.Sequential(residual_block(256, 512, False),
                                    residual_block(512, 512), nn.AvgPool3d(3))

        self.classifier = nn.Linear(512, num_classes)
        self.classifier2 = nn.Linear(512, 3)
 def __init__(self,
              local_size=1,
              alpha=1.0,
              beta=0.75,
              k=1,
              ACROSS_CHANNELS=True):
     super(SpatialCrossMapLRN, self).__init__()
     self.ACROSS_CHANNELS = ACROSS_CHANNELS
     if ACROSS_CHANNELS:
         self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1),
                                     stride=1,
                                     padding=(int(
                                         (local_size - 1.0) / 2), 0, 0))
     else:
         self.average = nn.AvgPool2d(kernel_size=local_size,
                                     stride=1,
                                     padding=int((local_size - 1.0) / 2))
     self.alpha = alpha
     self.beta = beta
     self.k = k
    def __init__(self,
                 in_channels=1,
                 base_channels=32,
                 n_layers=3,
                 n_discriminators=3):
        super().__init__()

        # Initialize all discriminators
        self.discriminators = nn.ModuleList()
        for _ in range(n_discriminators):
            self.discriminators.append(
                Pix2PixHDPatchDiscriminator(in_channels,
                                            base_channels=base_channels,
                                            n_layers=n_layers))

        # Downsampling layer to pass inputs between discriminators at different scales
        self.downsample = nn.AvgPool3d(3,
                                       stride=2,
                                       padding=1,
                                       count_include_pad=False)
Ejemplo n.º 16
0
Archivo: VGG.py Proyecto: Steap/flerken
    def __init__(self,
                 local_size=1,
                 alpha=1E-4,
                 beta=0.75,
                 ACROSS_CHANNELS=False):
        super(LRN, self).__init__()

        self.ACROSS_CHANNELS = ACROSS_CHANNELS
        self.alpha = alpha
        self.beta = beta

        if self.ACROSS_CHANNELS:
            self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1),
                                        stride=1,
                                        padding=(int(
                                            (local_size - 1.0) / 2), 0, 0))
        else:
            self.average = nn.AvgPool2d(kernel_size=local_size,
                                        stride=1,
                                        padding=int((local_size - 1.0) / 2))
Ejemplo n.º 17
0
	def __init__(self, inplanes, planes, stride=1, downsample=None, frames = 8):
		super(Bottleneck, self).__init__()

		self.frames = frames

		self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
		self.bn1 = nn.BatchNorm2d(planes)
		self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
		self.bn2 = nn.BatchNorm2d(planes)
		self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
		self.bn3 = nn.BatchNorm2d(planes * 4)

		self.relu = nn.ReLU(inplace=True)
		self.downsample = downsample
		self.stride = stride

		if self.downsample is not None:
			self.conv2_t = MultiConv(planes=planes, stride=stride, layers=int(math.log(frames, 2)))
			if self.stride > 1:
				self.avgpool_s = nn.AvgPool3d(kernel_size = (stride,1,1), stride=(stride, 1, 1), padding=0)
Ejemplo n.º 18
0
    def __init__(self, rgb_nfilters, audio_nfilters, img_size, temp_size,
                 hidden_layers):

        super(av_module, self).__init__()

        self.rgb_nfilters = rgb_nfilters
        self.audio_nfilters = audio_nfilters
        self.hidden_layers = hidden_layers
        self.out_layers = 64
        self.img_size = img_size
        self.avgpool_rgb = nn.AvgPool3d((temp_size, 1, 1), stride=1)
        # Make the layers numbers equal
        self.relu = nn.ReLU()
        self.affine_rgb = nn.Linear(rgb_nfilters, hidden_layers)
        self.affine_audio = nn.Linear(audio_nfilters, hidden_layers)
        self.w_a_rgb = nn.Bilinear(hidden_layers,
                                   hidden_layers,
                                   self.out_layers,
                                   bias=True)
        self.upscale_ = nn.Upsample(scale_factor=8, mode='bilinear')
Ejemplo n.º 19
0
 def __init__(self,
              block,
              layers,
              spatial_size,
              sample_duration,
              shortcut_type='B',
              num_classes=1):
     self.inplanes = 64
     super(SNresDisc_3DCNN, self).__init__()
     self.conv1 = utils.spectral_norm(
         nn.Conv3d(3,
                   64,
                   kernel_size=7,
                   stride=(1, 2, 2),
                   padding=(3, 3, 3),
                   bias=False))
     # self.bn1 = nn.BatchNorm3d(64)
     self.relu = nn.ReLU(inplace=True)
     self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
     self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
     self.layer2 = self._make_layer(block,
                                    128,
                                    layers[1],
                                    shortcut_type,
                                    stride=2)
     self.layer3 = self._make_layer(block,
                                    256,
                                    layers[2],
                                    shortcut_type,
                                    stride=2)
     self.layer4 = self._make_layer(block,
                                    512,
                                    layers[3],
                                    shortcut_type,
                                    stride=2)
     last_duration = int(math.ceil(sample_duration / 16))
     last_size = int(math.ceil(spatial_size / 32))
     self.avgpool = nn.AvgPool3d((last_duration, last_size, last_size),
                                 stride=1)
     self.fc = utils.spectral_norm(
         nn.Linear(512 * block.expansion, num_classes))
Ejemplo n.º 20
0
    def __init__(self, band, classes):
        super(SSRN_network, self).__init__()
        self.name = 'SSRN'
        self.conv1 = nn.Conv3d(in_channels=1,
                               out_channels=24,
                               kernel_size=(1, 1, 7),
                               stride=(1, 1, 2))
        self.batch_norm1 = nn.Sequential(
            nn.BatchNorm3d(24, eps=0.001, momentum=0.1, affine=True),  # 0.1
            nn.ReLU(inplace=True))

        self.res_net1 = Residual(24, 24, (1, 1, 7), (0, 0, 3))
        self.res_net2 = Residual(24, 24, (1, 1, 7), (0, 0, 3))
        self.res_net3 = Residual(24, 24, (3, 3, 1), (1, 1, 0))
        self.res_net4 = Residual(24, 24, (3, 3, 1), (1, 1, 0))

        kernel_3d = math.ceil((band - 6) / 2)

        self.conv2 = nn.Conv3d(in_channels=24,
                               out_channels=128,
                               padding=(0, 0, 0),
                               kernel_size=(1, 1, kernel_3d),
                               stride=(1, 1, 1))
        self.batch_norm2 = nn.Sequential(
            nn.BatchNorm3d(128, eps=0.001, momentum=0.1, affine=True),  # 0.1
            nn.ReLU(inplace=True))
        self.conv3 = nn.Conv3d(in_channels=1,
                               out_channels=24,
                               padding=(0, 0, 0),
                               kernel_size=(3, 3, 128),
                               stride=(1, 1, 1))
        self.batch_norm3 = nn.Sequential(
            nn.BatchNorm3d(24, eps=0.001, momentum=0.1, affine=True),  # 0.1
            nn.ReLU(inplace=True))

        self.avg_pooling = nn.AvgPool3d(kernel_size=(5, 5, 1))
        self.full_connection = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(24, classes)  # ,
            # nn.Softmax()
        )
Ejemplo n.º 21
0
    def __init__(self, training=True, **kwargs):
        super(GoogLeNet3D_new, self).__init__()
        self.pre_layers = nn.Sequential(
            nn.Conv3d(1, 64, kernel_size=7, stride=2, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(True),

            nn.MaxPool3d(3, stride=2),

            nn.Conv3d(64, 64, kernel_size=1),
            nn.BatchNorm3d(64),
            nn.ReLU(True),

            nn.Conv3d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(192),
            nn.ReLU(True),

            nn.MaxPool3d(3, stride=2),
        )

        self.training = training

        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)

        self.maxpool = nn.MaxPool3d(3, stride=2)

        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)

        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AvgPool3d((2, 3, 2), stride=1)
        self.linear = nn.Linear(27648, 2)

        self.aux1 = InceptionAux(512, 2)
        self.aux2 = InceptionAux(528, 2)
Ejemplo n.º 22
0
    def __init__(self,
                 with_avg_pool=True,
                 temporal_feature_size=1,
                 spatial_feature_size=7,
                 dropout_ratio=0.8,
                 in_channels=2048,
                 num_classes=101,
                 init_std=0.01,
                 non_linear = False,
                 nonlinear_channels = 2048):
        super(SimpleClsHead, self).__init__()

        self.with_avg_pool = with_avg_pool
        self.dropout_ratio = dropout_ratio
        self.in_channels = in_channels
        self.dropout_ratio = dropout_ratio
        self.temporal_feature_size = temporal_feature_size
        self.spatial_feature_size = spatial_feature_size
        self.init_std = init_std
        self.num_classes = num_classes
        self.non_linear = non_linear
        self.nonlinear_channels = nonlinear_channels

        if self.dropout_ratio != 0:
            self.dropout = nn.Dropout(p=self.dropout_ratio)
        else:
            self.dropout = None

        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool3d((temporal_feature_size, spatial_feature_size, spatial_feature_size))

        if self.non_linear:
            self.fc_nl = nn.Sequential(
                nn.Identity()
                #nn.Linear(in_channels, nonlinear_channels),
                #nn.ReLU()
                #nn.Dropout(p=self.dropout_ratio),
                #nn.Linear(nonlinear_channels, nonlinear_channels),
                #nn.ReLU()
            )
        self.fc_cls = nn.Linear(nonlinear_channels if self.non_linear else in_channels, num_classes)
Ejemplo n.º 23
0
def inflate_pool(pool2d, time_dim, time_padding, time_stride, time_dilation,
                 center):
    '''
        args:
        - pool2d: maxpool2d or avgpool2d module
        - time_dim: new time dim for pool kernel. represents volume in time
        - time_padding: padding of kernel in time dim
        - time_stride: stride of pool kernel in time dim
        - time_dilation: dilation in time dim
        - center: not used in this func, maintained for consitent helper func
        args

        returns:
        3dpool layer (max or avg), with all properties of original pool func
        preserved, and augmented by time args
    '''
    o_kernel_size = get_tuple(pool2d.kernel_size)
    o_padding = get_tuple(pool2d.padding)
    o_stride = get_tuple(pool2d.stride)

    kernel_dim = (time_dim, o_kernel_size[0], o_kernel_size[1])
    padding = (time_padding, o_padding[0], o_padding[1])
    stride = (time_stride, o_stride[0], o_stride[1])

    if isinstance(pool2d, torch.nn.MaxPool2d):
        o_dilation = get_tuple(pool2d.dilation)
        dilation = (time_dilation, o_dilation[0], o_dilation[1])
        pool3d = nn.MaxPool3d(kernel_dim,
                              padding=padding,
                              dilation=dilation,
                              stride=stride,
                              ceil_mode=pool2d.ceil_mode)
    elif isinstance(pool2d, torch.nn.AvgPool2d):
        pool3d = nn.AvgPool3d(kernel_dim,
                              padding=padding,
                              stride=stride,
                              ceil_mode=pool2d.ceil_mode)
    else:
        raise ValueError(INVALID_POOL_TYPE_ERR.format(type(pool2d)))

    return pool3d
Ejemplo n.º 24
0
    def __init__(self,
                 growth_rate=32,
                 block_config=(4, 4, 4),
                 init_channel_num=64,
                 bn_size=4,
                 drop_rate=0):
        super(DenseNet, self).__init__()
        self.feature_layer = FeatureLayer(1, init_channel_num)
        #增加DenseBlock与Transition
        channel_num = init_channel_num
        for i, layer_num in enumerate(block_config):
            block = DenseBlock(layer_num=layer_num,
                               in_channel=channel_num,
                               bn_size=bn_size,
                               growth_rate=growth_rate,
                               drop_rate=drop_rate)
            self.feature_layer.add_module('denseblock%d' % (i + 1), block)
            channel_num = channel_num + layer_num * growth_rate
            #对于非最后一级的denseblock,增加transition层
            if (i != len(block_config) - 1):
                trans = Transition(channel_num, 0.5)
                self.feature_layer.add_module('transition%d' % (i + 1), trans)
                channel_num = int(0.5 * channel_num)
        #增加Classifier
        self.feature_layer.add_module('norm5', nn.BatchNorm3d(channel_num))
        self.feature_layer.add_module('relu5', nn.ReLU(inplace=True))
        self.feature_layer.add_module('avgpool5',
                                      nn.AvgPool3d(kernel_size=3, stride=2))
        self.classifier = nn.Linear(channel_num, 1)  #Linear层的输入个数尚不清楚
        self.sigmoid = torch.sigmoid

        # Official init from torch repo.
        # 8太知道这一段是干嘛的
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal(m.weight.data)
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
Ejemplo n.º 25
0
 def __init__(self,
              num_classes=400,
              dropout_keep_prob=1,
              input_channel=3,
              spatial_squeeze=True):
     super(I3D, self).__init__()
     self.features = nn.Sequential(
         BasicConv3d(input_channel, 64, kernel_size=7, stride=2,
                     padding=3),  # (64, 384, 56, 56)
         nn.MaxPool3d(kernel_size=(1, 3, 3),
                      stride=(1, 2, 2),
                      padding=(0, 1, 1)),  # (64, 384, 28, 28)
         BasicConv3d(64, 64, kernel_size=1, stride=1),  # (64, 384, 28, 28)
         BasicConv3d(64, 192, kernel_size=3, stride=1,
                     padding=1),  # (192, 384, 28, 28)
         nn.MaxPool3d(kernel_size=(1, 3, 3),
                      stride=(1, 2, 2),
                      padding=(0, 1, 1)),  # (192, 384, 14, 14)
         Mixed_3b(),  # (256, 384, 14, 14)
         Mixed_3c(),  # (256, 384, 14, 14)
         nn.MaxPool3d(kernel_size=(3, 3, 3),
                      stride=(2, 2, 2),
                      padding=(1, 1, 1)),  # (480, 192, 7, 7)
         Mixed_4b(),  # (512, 192, 7, 7)
         Mixed_4c(),  # (512, 192, 7, 7)
         Mixed_4d(),  # (512, 192, 7, 7)
         Mixed_4e(),  # (528, 192, 7, 7)
         Mixed_4f(),  # (832, 192, 7, 7)
         #nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 0, 0)), # (832, 96, 3, 3)
         nn.MaxPool3d(kernel_size=(2, 1, 1),
                      stride=(2, 1, 1),
                      padding=(0, 0, 0)),  # (832, 96, 7, 7)
         Mixed_5b(),  # (832, 96, 7, 7)
         Mixed_5c(),  # (1024, 96, 7, 7)
         nn.AvgPool3d(kernel_size=(2, 7, 7), stride=1),  # (1024, 8, 1, 1)
         nn.Dropout3d(dropout_keep_prob),
         nn.Conv3d(1024, num_classes, kernel_size=1, stride=1,
                   bias=True),  # (400, 8, 1, 1)
     )
     self.spatial_squeeze = spatial_squeeze
     self.softmax = nn.Softmax()
Ejemplo n.º 26
0
    def __init__(self,
                 num_classes=400,
                 num_frames=64,
                 num_keyframe=8,
                 dropout_keep_prob=0.5):
        super(FGS3DFLOW, self).__init__()

        self.num_frames = num_frames
        self.num_keyframe = num_keyframe
        self.num_classes = num_classes
        self.dropout_keep_prob = dropout_keep_prob

        ##############################################
        # Load flownet
        ##############################################

        self.flownetresize = nn.AvgPool2d(kernel_size=4, stride=4)
        FlowNet_state_dict = torch.load(
            '/home/weik/pretrainedmodels/FlowNetS/flownets_from_caffe.pth.tar.pth'
        )
        self.flownets = flownets(FlowNet_state_dict)
        set_parameter_requires_grad(self.flownets)
        self.flownets = flownets()

        # self.inception_3D_1 = InceptionModule(1024, [112, 144, 288, 32, 64, 64], 'mixed_4f', )
        self.inception_3D_flow_1 = InceptionModule(
            2, [256, 160, 320, 32, 128, 128], 'mixed_4f')
        self.inception_3D_flow_2 = InceptionModule(
            256 + 320 + 128 + 128, [256, 160, 320, 32, 128, 128], 'mixed_5b')
        self.inception_3D_flow_3 = InceptionModule(
            256 + 320 + 128 + 128, [384, 192, 384, 48, 128, 128], 'mixed_5c')

        self.avg_pool_flow = nn.AvgPool3d(kernel_size=[2, 14, 14],
                                          stride=(2, 1, 1))
        self.dropout_flow = nn.Dropout(self.dropout_keep_prob)
        self.logits_flow = nn.Linear((384 + 384 + 128 + 128) * 28,
                                     self.num_classes)
        torch.nn.init.normal_(self.logits_flow.weight, mean=0.0, std=0.01)
        torch.nn.init.constant_(self.logits_flow.bias, 0.0)

        set_parameter_requires_grad(self.flownets)
    def __init__(
        self,
        unique_id: str,
        num_classes: int,
        in_plane: int,
        pool_size: Optional[List[int]],
        activation_func: str,
        use_dropout: Optional[bool] = None,
        dropout_ratio: float = 0.5,
    ):
        """
        Constructor for FullyConvolutionalLinearHead.

        Args:
            unique_id: A unique identifier for the head. Multiple instances of
                the same head might be attached to a model, and unique_id is used
                to refer to them.
            num_classes: Number of classes for the head.
            in_plane: Input size for the fully connected layer.
            pool_size: Optional kernel size for the 3d pooling layer. If None, use
                :class:`torch.nn.AdaptiveAvgPool3d` with output size (1, 1, 1).
            activation_func: activation function to use. 'softmax': applies
                softmax on the output. 'sigmoid': applies sigmoid on the output.
            use_dropout: Whether to apply dropout after the pooling layer.
            dropout_ratio: dropout ratio.
        """
        super().__init__(unique_id, num_classes)
        if pool_size is not None:
            self.final_avgpool = nn.AvgPool3d(pool_size, stride=1)
        else:
            self.final_avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
        if use_dropout:
            self.dropout = nn.Dropout(p=dropout_ratio)
        # we separate average pooling from the fully-convolutional linear projection
        # because for multi-path models such as SlowFast model, the input can be
        # more than 1 tesnor. In such case, we can define a new head to combine multiple
        # tensors via concat or addition, do average pooling, but still reuse
        # FullyConvolutionalLinear inside of it.
        self.head_fcl = FullyConvolutionalLinear(in_plane,
                                                 num_classes,
                                                 act_func=activation_func)
    def __init__(self,
                 sample_size,
                 sample_duration,
                 num_classes=400,
                 last_fc=True):
        super(MobileNetResidual, self).__init__()

        self.last_fc = last_fc

        self.model = nn.Sequential(
            self.conv_bn(3, 32, 2),
            DepthWiseBlock(32, 64, 1),
            DepthWiseBlock(64, 128, (1, 2, 2)),
            DepthWiseBlock(128, 128, 1),
            DepthWiseBlock(128, 256, 2),
            DepthWiseBlock(256, 256, 1),
            DepthWiseBlock(256, 512, 2),
            DepthWiseBlock(512, 512, 1),
            DepthWiseBlock(512, 512, 1),
            DepthWiseBlock(512, 512, 1),
            DepthWiseBlock(512, 512, 1),
            DepthWiseBlock(512, 512, 1),
            DepthWiseBlock(512, 1024, 2),
            DepthWiseBlock(1024, 1024, 1),
        )

        last_duration = math.ceil(sample_duration / 16)
        last_size = math.ceil(sample_size / 32)
        self.avgpool = nn.AvgPool3d((last_duration, last_size, last_size),
                                    stride=1)
        self.dropout = nn.Dropout(p=0.5)

        self.fc = nn.Linear(1024, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
    def __init__(self,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 num_classes=num_classes,
                 n_conv_blocks=n_conv_blocks,
                 n_residual_blocks=n_residual_blocks):
        super(resnet_classifier, self).__init__()

        # Initial convolution blocks (takes in nxnxn image, outputs nxnxn image)
        model = []

        for _ in range(n_conv_blocks):
            model += [
                nn.Conv3d(
                    in_channels,
                    out_channels,
                    kernel_size=conv_block_kernel_size,
                    padding=3,
                    padding_mode='same'
                ),  #num_out_features = 64 (basically, no. of 7x7 filters), kernel_size = 7
                nn.InstanceNorm3d(in_channels),
                nn.ReLU(inplace=True)
            ]

        in_channels = out_channels
        # Residual blocks (the input to this block is of size (n/4)x(n/4), for an actual input of size nxn)
        for _ in range(n_residual_blocks):
            model += [ResidualBlock(in_channels)]

        self.model = nn.Sequential(*model)
        self.avgpool = nn.AvgPool3d(kernel_size=32, stride=32, padding=0)
        self.classifier = nn.Sequential(
            nn.Linear(feature_map_dim, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, num_classes),
        )

        self.fov = n_conv_blocks * conv_block_kernel_size + n_residual_blocks * (
            res_block_kernel_size - 1)
Ejemplo n.º 30
0
    def __init__(self, in_planes, out_planes, stride, groups, f =0):
        super(Layer_3, self).__init__()
        self.stride = stride
        self.groups = groups
        mid_planes = out_planes//4
        if self.stride == 2:
            out_planes = out_planes - in_planes
        g = 1 if in_planes==24 else groups
        #in_planes = in_planes*2 if self.stride == 2 else in_planes
        in_planes =1920 if f else in_planes
        out_planes =1920 if f else 960 
        self.conv1    = nn.Conv3d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
        self.bn1      = nn.BatchNorm3d(mid_planes)
        self.conv2    = nn.Conv3d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
        self.bn2      = nn.BatchNorm3d(mid_planes)
        self.conv3    = nn.Conv3d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
        self.bn3      = nn.BatchNorm3d(out_planes)
        self.relu     = nn.ReLU(inplace=True)

        if stride == 2:
            self.shortcut = nn.AvgPool3d(kernel_size=(2,3,3), stride=2, padding=(0,1,1))