def __init__(self, class_num=8, dim_in=2048, trans_param_num=3, detach_network=False, pretrained=True, temporal_out=4, T=None, hist=1): super(CycleTime, self).__init__() dim = 512 #print("Pretrained imagenet:", pretrained) resnet = resnet_res4s1.resnet18(pretrained=pretrained) #print("RESNET 18:", resnet) #resnet_50 = resnet_res4s1.resnet50(pretrained=pretrained) #print("RESNET 50:", resnet_50) self.encoderVideo = inflated_resnet.InflatedResNet(copy.deepcopy(resnet)) #print("INFLATED RESNET 18:", self.encoderVideo) self.detach_network = detach_network self.hist = hist self.div_num = 512 self.T = self.div_num**-.5 if T is None else T print('self.T:', self.T) self.afterconv1 = nn.Conv3d(256, 18, kernel_size=1, bias=False) self.spatial_out1 = 30 self.spatial_out2 = 10 self.temporal_out = temporal_out self.afterconv3_trans = nn.Conv2d(self.spatial_out1 * self.spatial_out1, 128, kernel_size=4, padding=0, bias=False) self.afterconv4_trans = nn.Conv2d(128, 64, kernel_size=4, padding=0, bias=False) corrdim = 64 * 4 * 4 corrdim_trans = 64 * 4 * 4 self.linear2 = nn.Linear(corrdim_trans, trans_param_num) self.leakyrelu = nn.LeakyReLU(0.1, inplace=True) self.relu = nn.ReLU(inplace=True) self.avgpool = nn.AvgPool2d(7, stride=1) self.avgpool3d = nn.AvgPool3d((4, 2, 2), stride=(1, 2, 2)) self.maxpool2d = nn.MaxPool2d(2, stride=2) # initialization nn.init.kaiming_normal(self.afterconv1.weight, mode='fan_out') nn.init.kaiming_normal(self.afterconv3_trans.weight, mode='fan_out') nn.init.kaiming_normal(self.afterconv4_trans.weight, mode='fan_out') # assuming no fc pre-training for m in self.modules(): if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() # transformation self.geometricTnf = GeometricTnfAffine(geometric_model='affine', tps_grid_size=3, tps_reg_factor=0.2, out_h=self.spatial_out2, out_w=self.spatial_out2, offset_factor=227/210) xs = np.linspace(-1,1,80) xs = np.meshgrid(xs, xs) xs = np.stack(xs, 2) self.xs = xs self.criterion_inlier = WeakInlierCountPool(geometric_model='affine', tps_grid_size=3, tps_reg_factor=0.2, h_matches=30, w_matches=30, use_conv_filter=False, dilation_filter=0, normalize_inlier_count=True) self.criterion_synth = TransformedGridLoss(use_cuda=True, geometric_model='affine')
def __init__(self, class_num=8, dim_in=2048, trans_param_num=3, detach_network=False, pretrained=True, temporal_out=4, T=None, hist=1): ''' Args: class_num: 类别数 dim_in: 输入特征维度 trans_param_num: 描述变换的参数数量,此处只需要三个参数即可 temporal_out: 视频时序长度 ''' super(CycleTime, self).__init__() dim = 512 print(pretrained) resnet = resnet_res4s1.resnet50(pretrained=pretrained) self.encoderVideo = inflated_resnet.InflatedResNet(copy.deepcopy(resnet)) self.detach_network = detach_network self.hist = hist self.div_num = 512 self.T = self.div_num**-.5 if T is None else T print('self.T:', self.T) self.afterconv1 = nn.Conv3d(1024, 512, kernel_size=1, bias=False) self.spatial_out1 = 30 self.spatial_out2 = 10 self.temporal_out = temporal_out self.afterconv3_trans = nn.Conv2d(self.spatial_out1 * self.spatial_out1, 128, kernel_size=4, padding=0, bias=False) self.afterconv4_trans = nn.Conv2d(128, 64, kernel_size=4, padding=0, bias=False) corrdim = 64 * 4 * 4 corrdim_trans = 64 * 4 * 4 self.linear2 = nn.Linear(corrdim_trans, trans_param_num) self.leakyrelu = nn.LeakyReLU(0.1, inplace=True) self.relu = nn.ReLU(inplace=True) # initialization nn.init.kaiming_normal_(self.afterconv1.weight, mode='fan_out', nonlinearity='relu') nn.init.kaiming_normal_(self.afterconv3_trans.weight, mode='fan_out', nonlinearity='relu') nn.init.kaiming_normal_(self.afterconv4_trans.weight, mode='fan_out', nonlinearity='relu') # assuming no fc pre-training for m in self.modules(): if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() # transformation self.geometricTnf = GeometricTnfAffine(geometric_model='affine', tps_grid_size=3, tps_reg_factor=0.2, out_h=self.spatial_out2, out_w=self.spatial_out2, offset_factor=227/210) self.criterion_inlier = WeakInlierCountPool(geometric_model='affine', tps_grid_size=3, tps_reg_factor=0.2, h_matches=30, w_matches=30, use_conv_filter=False, dilation_filter=0, normalize_inlier_count=True) self.criterion_synth = TransformedGridLoss(use_cuda=True, geometric_model='affine')