def __init__(self, bottleneck2d, time_dim=1, time_padding=0, frame=0, num_segments=4): super(BasicBlock3d, self).__init__() spatial_stride = bottleneck2d.conv2.stride[0] self.frame = frame self.num_segments = num_segments self.conv1 = inflate.inflate_conv(bottleneck2d.conv1, time_dim=time_dim, time_padding=time_padding, center=False) self.bn1 = inflate.inflate_batch_norm(bottleneck2d.bn1) self.conv2 = inflate.inflate_conv(bottleneck2d.conv2, time_dim=time_dim, time_padding=time_padding, time_stride=1, center=False) self.bn2 = inflate.inflate_batch_norm(bottleneck2d.bn2) self.relu = torch.nn.ReLU(inplace=True) if bottleneck2d.downsample is not None: self.downsample = inflate_downsample(bottleneck2d.downsample, time_stride=spatial_stride) else: self.downsample = None self.stride = bottleneck2d.stride
def build_network(self, inputs): global time print("inputs:", inputs.shape) residual = inputs out = inflate.inflate_conv(inputs, self.bottleneck2d.conv1, time_dim=1, times=time) print("out1:", out.shape) time += 1 # out=inflate.inflate_batch_norm( # out, # bottleneck2d.bn1 # ) out = nn.relu(out) if self.inflate_time == True: out = self.block( self.bottleneck2d.conv2, temperature=self.temperature, contrastive_att=self.contrastive_att).build_network(out) else: out = inflate.inflate_conv(out, self.bottleneck2d.conv2, time_dim=1, times=time) time += 1 print("out2:", out.shape) # out=inflate.inflate_batch_norm(out,bottleneck2d.bn2) out = nn.relu(out) out = inflate.inflate_conv(out, self.bottleneck2d.conv3, time_dim=1, times=time) time += 1 print("out3:", out.shape) # out=inflate.inflate_batch_norm(out,bottleneck2d.bn3) if self.bottleneck2d.downsample is not None: residual = self._inflate_downsample(inputs, self.bottleneck2d.downsample) print("out,", out.shape) print("residual", residual.shape) out += residual # residual=inflate_batch_norm(residual,downsample2d[1])) out = nn.relu(out) return out
def inflate_downsample(downsample2d, time_stride=1): downsample3d = torch.nn.Sequential( inflate.inflate_conv(downsample2d[0], time_dim=1, time_stride=1, center=False), inflate.inflate_batch_norm(downsample2d[1])) return downsample3d
def _inflate_downsample(self, inputs, downsample2d, time_stride=1): global time out = inflate.inflate_conv(inputs, downsample2d.conv2d, time_dim=1, time_stride=time_stride, times=time) time += 1 #out=inflate.inflate_batch_norm(inputs,downsample2d[1])) return out
def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False, num_segments=4, test_mode=False, fast_implementation=0): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet, self).__init__() self.conv_class = conv_class self.num_segments = num_segments self.frame = frame_nb self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1, time_padding=0, center=False) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1, time_padding=0, time_stride=1) self.layer1 = inflate_reslayer(resnet2d.layer1) self.layer2 = inflate_reslayer(resnet2d.layer2, num_R4D=2, in_channels=512, fast_implementation=fast_implementation, num_segments=num_segments) self.layer3 = inflate_reslayer(resnet2d.layer3, time_dim=3, time_padding=1) self.layer4 = inflate_reslayer(resnet2d.layer4, time_dim=3, time_padding=1) if conv_class: self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1) self.classifier = torch.nn.Conv3d(in_channels=2048, out_channels=class_nb, kernel_size=(1, 1, 1), bias=True) else: final_time_dim = int(math.ceil(frame_nb)) if test_mode: self.avgpool = nn.AvgPool3d((frame_nb, 8, 8)) else: self.avgpool = nn.AvgPool3d((frame_nb, 7, 7))
def __init__(self, resnet2d, frame_nb=16, class_nb=1000, conv_class=False, num_segments=1, gtsn=False): """ Args: conv_class: Whether to use convolutional layer as classifier to adapt to various number of frames """ super(I3ResNet_18_34, self).__init__() self.num_segments = num_segments self.conv_class = conv_class self.gtsn = gtsn self.conv1 = inflate.inflate_conv(resnet2d.conv1, time_dim=1, time_padding=0, center=False) self.bn1 = inflate.inflate_batch_norm(resnet2d.bn1) self.relu = torch.nn.ReLU(inplace=True) self.maxpool = inflate.inflate_pool(resnet2d.maxpool, time_dim=1, time_padding=0, time_stride=1) self.layer1 = inflate_reslayer_18_34(resnet2d.layer1) self.layer2 = inflate_reslayer_18_34(resnet2d.layer2, num_R4D=3, in_channels=128) self.layer3 = inflate_reslayer_18_34(resnet2d.layer3, time_dim=3, time_padding=1, num_R4D=3, in_channels=256) self.layer4 = inflate_reslayer_18_34(resnet2d.layer4, time_dim=3, time_padding=1) if conv_class: self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=1) self.classifier = torch.nn.Conv3d(in_channels=2048, out_channels=class_nb, kernel_size=(1, 1, 1), bias=True) else: final_time_dim = int(math.ceil(frame_nb)) self.avgpool = inflate.inflate_pool(resnet2d.avgpool, time_dim=4)
def test_job(inputs: tp.Numpy.Placeholder((64, 256, 26, 28, 28))) -> tp.Numpy: resnet2d = getresnet.getResnet() layer = resnet2d[1] c3d_idx = [[], [0, 2], [0, 2, 4], []] nl_idx = [[], [1, 3], [1, 3, 5], []] conv2d = Conv2d(3, 64, [7, 7], [2, 2], [3, 3], dilation=[1, 1]) print(layer[0].conv2.kernel_size) print(layer[0].conv2.padding) print(layer[0].conv2.stride) out = inflate.inflate_conv(inputs, layer[0].conv1, time_dim=1, times=1) # output=_inflate_reslayer(inputs,layer, c3d_idx=c3d_idx[0], \ # nonlocal_idx=nl_idx[0], nonlocal_channels=256) #output=inflate.inflate_conv(inputs,1,layer[1].conv2, time_dim=1) #output=inflate.inflate_conv(output,2,layer[1].conv2, time_dim=1) # output=AP3D.C2D(layer[1].conv2).build_network(inputs,1) # output=AP3D.C2D(layer[1].conv2).build_network(output,2) return out
def build_network(self, inputs): # resnet2d = torchvision.models.resnet50(pretrained=True) global time resnet2d = getresnet.getResnet() conv2d = Conv2d(3, 64, [7, 7], [2, 2], [3, 3], dilation=[1, 1]) out = inflate.inflate_conv(inputs, conv2d, time_dim=1, times=time) time += 1 # inputs=inflate.inflate_batch_norm(inputs,resnet2d.bn1) out = nn.relu(out) out = inflate.inflate_pool(out, kernel_size=3, padding=1, stride=2, dilation=1, time_dim=1) out = self._inflate_reslayer(out, resnet2d[0], c3d_idx=self.c3d_idx[0], nonlocal_idx=self.nl_idx[0], nonlocal_channels=256) print("layer1finish") out=self._inflate_reslayer(out,resnet2d[1], c3d_idx=self.c3d_idx[1], \ nonlocal_idx=self.nl_idx[1], nonlocal_channels=512) print("layer2finish") out=self._inflate_reslayer(out,resnet2d[2], c3d_idx=self.c3d_idx[2], \ nonlocal_idx=self.nl_idx[2], nonlocal_channels=1024) print("layer3finish") out= self._inflate_reslayer(out,resnet2d[3], c3d_idx=self.c3d_idx[3], \ nonlocal_idx=self.nl_idx[3], nonlocal_channels=2048) print("layer4finish") b, c, t, h, w = out.shape out = flow.transpose(out, perm=[0, 2, 1, 3, 4]) out = flow.reshape(out, shape=[b * t, c, h, w]) out = nn.max_pool2d(input=out, ksize=out.shape[2:], strides=None, padding="VALID") out = flow.reshape(out, shape=[b, t, -1]) out = flow.math.reduce_mean(out, axis=1) f = flow.layers.batch_normalization(inputs=out, momentum=0.997, epsilon=1.001e-5, center=True, scale=True, trainable=True, name="Resnet503D_linear_bn" + str(time)) time += 1 kernel_initializer = flow.variance_scaling_initializer( 2, 'fan_in', 'random_normal') weight_regularizer = flow.regularizers.l2(1.0 / 32768) y = flow.layers.dense(out, self.num_classes, use_bias=True, bias_initializer=kernel_initializer, kernel_regularizer=weight_regularizer, bias_regularizer=weight_regularizer, trainable=True) return y, f