def __init__(self, pretrained=1): super(OSVOS, self).__init__() lay_list = [[64, 64], ['M', 128, 128], ['M', 256, 256, 256], ['M', 512, 512, 512], ['M', 512, 512, 512]] in_channels = [3, 64, 128, 256, 512] print("Constructing OSVOS architecture..") stages = modules.ModuleList() side_prep = modules.ModuleList() score_dsn = modules.ModuleList() upscale = modules.ModuleList() upscale_ = modules.ModuleList() # Construct the network for i in range(0, len(lay_list)): # Make the layers of the stages stages.append(make_layers_osvos(lay_list[i], in_channels[i])) # Attention, side_prep and score_dsn start from layer 2 if i > 0: # Make the layers of the preparation step side_prep.append( nn.Conv2d(lay_list[i][-1], 16, kernel_size=3, padding=1)) # Make the layers of the score_dsn step score_dsn.append(nn.Conv2d(16, 1, kernel_size=1, padding=0)) upscale_.append( nn.ConvTranspose2d(1, 1, kernel_size=2**(1 + i), stride=2**i, bias=False)) upscale.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(1 + i), stride=2**i, bias=False)) self.upscale = upscale self.upscale_ = upscale_ self.stages = stages self.side_prep = side_prep self.score_dsn = score_dsn self.fuse = nn.Conv2d(64, 1, kernel_size=1, padding=0) print("Initializing weights..") self._initialize_weights(pretrained)
def __init__(self): super(OSVOS, self).__init__() lay_list = [[64, 64], ['M', 128, 128], ['M', 256, 256, 256], ['M', 512, 512, 512], ['M', 512, 512, 512]] in_channels = [3, 64, 128, 256, 512] print("Constructing OSVOS architecture..") stages = modules.ModuleList() side_prep = modules.ModuleList() score_dsn = modules.ModuleList() upscale = modules.ModuleList() upscale_ = modules.ModuleList() for i in range(0, len(lay_list)): stages.append(make_layers_osvos(lay_list[i], in_channels[i])) if i > 0: side_prep.append( nn.Conv2d(lay_list[i][-1], 16, kernel_size=3, padding=1)) score_dsn.append(nn.Conv2d(16, 1, kernel_size=1, padding=0)) upscale_.append( nn.ConvTranspose2d(1, 1, kernel_size=2**(1 + i), stride=2**i, bias=False)) upscale.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(1 + i), stride=2**i, bias=False)) self.upscale = upscale self.upscale_ = upscale_ self.stages = stages self.side_prep = side_prep self.score_dsn = score_dsn self.fuse = nn.Conv2d(64, 1, kernel_size=1, padding=0) self._initialize_weights()
def __init__(self, pretrained=True): super(AppearanceModel, self).__init__() print("Setting up Appearance model...", end="") sys.stdout.flush() self.n_blocks = 5 layers_list = [[64, 64], ["M", 128, 128], ["M", 256, 256, 256], ["M", 512, 512, 512], ["M", 512, 512, 512]] in_channels = [3, 64, 128, 256, 512] blocks = modules.ModuleList() sides = modules.ModuleList() upsamples = modules.ModuleList() for i in range(self.n_blocks): blocks.append(make_layers(layers_list[i], in_channels[i])) if i > 0: sides.append( nn.Conv2d(layers_list[i][-1], 16, kernel_size=3, padding=1)) upsamples.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(i+1), stride=2**i, bias=False)) self.blocks = blocks self.sides = sides self.upsamples = upsamples self.test = nn.ConvTranspose2d(3, 3, kernel_size=4, stride=2, bias=False) self.fuse = nn.Conv2d(64, 1, kernel_size=1) if pretrained: print("Initializing weights...", end="") sys.stdout.flush() self._initialize_weights(pretrained) print("Done!")
def __init__(self, in_channels, out_channels, stride=1, M=2, r=16, L=32): """ :param in_channels: 输入通道维度 :param out_channels: 输出通道维度 原论文中 输入输出通道维度相同 :param stride: 步长,默认为1 :param M: 分支数 :param r: 特征Z的长度,计算其维度d 时所需的比率(论文中 特征S->Z 是降维,故需要规定 降维的下界) :param L: 论文中规定特征Z的下界,默认为32 """ super(SK_Conv1d, self).__init__() d = max(in_channels // r, L) # 计算向量Z 的长度d self.M = M self.out_channels = out_channels self.conv = nn.ModuleList() # 根据分支数量 添加 不同核的卷积操作 for i in range(M): # 为提高效率,原论文中 扩张卷积5x5为 (3X3,dilation=2)来代替, 且论文中建议组卷积G=32, # 每组计算只有out_channel/groups = 2 个channel参与. self.conv.append( nn.Sequential( nn.Conv1d(in_channels, out_channels, 3, stride, padding=1 + i, dilation=1 + i, groups=32, bias=False), nn.BatchNorm1d(out_channels), nn.ReLU(inplace=True))) self.global_pool = nn.AdaptiveAvgPool1d( 1) # 自适应pool到指定维度, 这里指定为1,实现 GAP self.fc1 = nn.Sequential(nn.Conv1d(out_channels, d, 1, bias=False), nn.BatchNorm1d(d), nn.ReLU(inplace=True)) # 降维 self.fc2 = nn.Conv1d(d, out_channels * M, 1, 1, bias=False) # 升维 # self.fcs = nn.ModuleList(self.fc1, self.fc2) self.softmax = nn.Softmax( dim=1) # 指定dim=1 使得两个全连接层对应位置进行softmax,保证 对应位置a+b+..=1
def __init__(self, pretrained=True, mask=False): super(AppearanceFlowModel, self).__init__() print("Setting up Appearance+Flow model...", end="") sys.stdout.flush() self.mask = mask self.n_blocks = 5 layers_list = [[64, 64], ["M", 128, 128], ["M", 256, 256, 256], ["M", 512, 512, 512], ["M", 512, 512, 512]] app_in_channels = [3, 64, 128, 256, 512] flow_in_channels = [3, 64, 128, 256, 512] if mask: app_in_channels[0] = 4 flow_in_channels[0] = 4 # Make 2 separate streams. app_blocks = modules.ModuleList() app_sides = modules.ModuleList() flow_blocks = modules.ModuleList() flow_sides = modules.ModuleList() upsamples = modules.ModuleList() # Make appearance stream. for i in range(self.n_blocks): app_blocks.append(make_layers(layers_list[i], app_in_channels[i])) if i > 0: app_sides.append( nn.Conv2d(layers_list[i][-1], 16, kernel_size=3, padding=1)) upsamples.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(i + 1), stride=2**i, bias=False)) self.app_blocks = app_blocks self.app_sides = app_sides # Make flow stream. for i in range(self.n_blocks): flow_blocks.append(make_layers(layers_list[i], flow_in_channels[i])) if i > 0: flow_sides.append( nn.Conv2d(layers_list[i][-1], 16, kernel_size=3, padding=1)) self.flow_blocks = flow_blocks self.flow_sides = flow_sides self.upsamples = upsamples self.fuse = nn.Conv2d(128, 1, kernel_size=1) if pretrained: print("Initializing weights...", end="") sys.stdout.flush() self._initialize_weights(pretrained) # Make upsample layers not trainable. for param in self.upsamples.parameters(): param.requires_grad = False print("Done!")
def __init__(self, pretrained=1): super(SalAR, self).__init__() lay_list = [[64, 0.3, 64], ['M', 128, 0.4, 128], ['M', 256, 0.4, 256, 0.4, 256], ['M', 512, 0.4, 512, 0.4, 512], ['M', 512, 0.4, 512, 0.4, 512]] in_channels = [3, 64, 128, 256, 512] stages = modules.ModuleList() side_prep = modules.ModuleList() score_dsn = modules.ModuleList() upscale = modules.ModuleList() upscale_ = modules.ModuleList() attention_blocks = modules.ModuleList() gaussian_priors = modules.ModuleList() fuse_all = modules.ModuleList() # Construct the network for i in range(0, len(lay_list)): # Make the layers of the stages stages.append(make_layers_salar(lay_list[i], in_channels[i])) # Attention, side_prep and score_dsn start from layer 2 if i > 0: # Make the layers of the preparation step side_prep.append( nn.Conv2d(lay_list[i][-1], 16, kernel_size=3, padding=1)) # Make the layers of the score_dsn step score_dsn.append(nn.Conv2d(16, 1, kernel_size=1, padding=0)) upscale_.append( nn.ConvTranspose2d(1, 1, kernel_size=2**(1 + i), stride=2**i, bias=False)) upscale.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(1 + i), stride=2**i, bias=False)) # Attention for i in range(len(side_prep) - 1): gaussian_priors.append( GaussianPrior(input_channels=16, nb_gaussian=16)) attention_blocks.append( AttentionBlock(gating_channels=16, in_channels=16, inter_channels=16)) fuse_all.append(Fuse()) self.upscale = upscale self.upscale_ = upscale_ self.stages = stages self.side_prep = side_prep self.score_dsn = score_dsn self.attention_blocks = attention_blocks self.gaussian_priors = gaussian_priors self.gaussian_prior_last = GaussianPrior(input_channels=16, nb_gaussian=16) self.fuse = nn.Conv2d(64, 1, kernel_size=1, padding=0) self.fuse_all = fuse_all self.fuse_last = Fuse() #print("Initializing weights..") self._initialize_weights(pretrained)
def __init__(self, pretrained=True): super(AppearanceFlowLocalizedModel, self).__init__() print("Setting up Appearance+Flow+Localized model...") sys.stdout.flush() self.use_cuda = False self.n_blocks = 5 layers_list = [[64, 64], ["M", 128, 128], ["M", 256, 256, 256], ["M", 512, 512, 512], ["M", 512, 512, 512]] app_in_channels = [3, 64, 128, 256, 512] flow_in_channels = [3, 64, 128, 256, 512] # Make 2 separate streams. app_blocks = modules.ModuleList() app_sides = modules.ModuleList() flow_blocks = modules.ModuleList() flow_sides = modules.ModuleList() upsamples = modules.ModuleList() # Make appearance stream. for i in range(self.n_blocks): app_blocks.append(make_layers(layers_list[i], app_in_channels[i])) if i > 0: app_sides.append( nn.Conv2d(layers_list[i][-1], 16, kernel_size=3, padding=1)) upsamples.append( nn.ConvTranspose2d(16, 16, kernel_size=2**(i + 1), stride=2**i, bias=False)) self.app_blocks = app_blocks self.app_sides = app_sides # Make flow stream. for i in range(self.n_blocks): flow_blocks.append(make_layers(layers_list[i], flow_in_channels[i])) if i > 0: flow_sides.append( nn.Conv2d(layers_list[i][-1], 16, kernel_size=3, padding=1)) self.flow_blocks = flow_blocks self.flow_sides = flow_sides self.upsamples = upsamples self.fuse = nn.Conv2d(128, 1, kernel_size=1) if pretrained: print("Initializing weights...") sys.stdout.flush() self._initialize_weights(pretrained) # Make upsample layers not trainable. for param in self.upsamples.parameters(): param.requires_grad = False # Setup localization stream. print("Setting up localization stream...", end="") sys.stdout.flush() vgg = torchvision.models.vgg.vgg16(pretrained=True) vgg.classifier = nn.Sequential( *list(vgg.classifier._modules.values())[:-1]) self.RCNN_top = vgg.classifier POOLING_MODE = 'crop' POOLING_SIZE = 7 self.RCNN_bbox_pred = nn.Linear(4096, 4) self.RCNN_roi_pool = _RoIPooling(POOLING_SIZE, POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(POOLING_SIZE, POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_crop = _RoICrop() print("Done!")