def __init__(self, motion='GRU', se_layer=False, dilation=True, basic_model='resnext50'): super(R3Net, self).__init__() self.motion = motion self.se_layer = se_layer self.dilation = dilation if basic_model == 'resnext50': resnext = ResNeXt50() elif basic_model == 'resnext101': resnext = ResNeXt101() elif basic_model == 'resnet50': resnext = ResNet50() else: resnext = ResNet101() self.layer0 = resnext.layer0 self.layer1 = resnext.layer1 self.layer2 = resnext.layer2 self.layer3 = resnext.layer3 self.layer4 = resnext.layer4 self.reduce_low = nn.Sequential( nn.Conv2d(64 + 256 + 512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.PReLU()) self.reduce_high = nn.Sequential( nn.Conv2d(1024 + 2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), _ASPP(256)) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) if self.se_layer: self.reduce_high_se = SELayer(256) self.reduce_low_se = SELayer(256) # self.motion_se = SELayer(32) if dilation: resnext.layer3.apply(partial(self._nostride_dilate, dilate=2)) resnext.layer4.apply(partial(self._nostride_dilate, dilate=4)) for m in self.modules(): if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout): m.inplace = True
def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias=True, normalize=False, selayer=False): """ Initialize the ConvLSTM cell :param input_size: (int, int) Height and width of input tensor as (height, width). :param input_dim: int Number of channels of input tensor. :param hidden_dim: int Number of channels of hidden state. :param kernel_size: (int, int) Size of the convolutional kernel. :param bias: bool Whether or not to add the bias. :param dtype: torch.cuda.FloatTensor or torch.FloatTensor Whether or not to use cuda. """ super(ConvGRUCell, self).__init__() self.height, self.width = input_size self.padding = kernel_size[0] // 2, kernel_size[1] // 2 self.hidden_dim = hidden_dim self.bias = bias self.normalize = normalize self.conv_gates = nn.Conv2d( in_channels=input_dim + hidden_dim, out_channels=2 * self.hidden_dim, # for update_gate,reset_gate respectively kernel_size=kernel_size, padding=self.padding, bias=self.bias) self.conv_can = nn.Conv2d( in_channels=input_dim + hidden_dim, out_channels=self.hidden_dim, # for candidate neural memory kernel_size=kernel_size, padding=self.padding, bias=self.bias) self.se_layer = selayer if self.se_layer: self.se = SELayer(2 * self.hidden_dim, reduction=16) if self.normalize: self.gamma_norm = nn.LayerNorm( [self.hidden_dim, self.height, self.width]) self.beta_norm = nn.LayerNorm( [self.hidden_dim, self.height, self.width]) self.cc_norm = nn.LayerNorm( [self.hidden_dim, self.height, self.width])
def __init__(self, motion='GRU', se_layer=False, attention=False, pre_attention=True): super(DSS, self).__init__() self.motion = motion self.se_layer = se_layer self.attention = attention self.pre_attention = pre_attention resnext = ResNet101() self.layer0 = resnext.layer0 self.layer1 = resnext.layer1 self.layer2 = resnext.layer2 self.layer3 = resnext.layer3 self.layer4 = resnext.layer4 self.reduce_high = nn.Sequential( nn.Conv2d(1024 + 2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), _ASPP(256)) if self.motion == 'GRU': self.reduce_high_motion = ConvGRU(input_size=(119, 119), input_dim=256, hidden_dim=128, kernel_size=(3, 3), num_layers=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) elif self.motion == 'LSTM': self.reduce_high_motion = ConvLSTM(input_size=(119, 119), input_dim=256, hidden_dim=32, kernel_size=(3, 3), num_layers=1, padding=1, dilation=1, batch_first=True, bias=True, return_all_layers=False) elif self.motion == 'no': self.reduce_high_motion = nn.Sequential( nn.Conv2d(256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 32, kernel_size=1)) self.predict1_motion = nn.Sequential( nn.Conv2d(129, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 1, kernel_size=1)) self.predict2_motion = nn.Sequential( nn.Conv2d(129, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 1, kernel_size=1)) self.predict3_motion = nn.Sequential( nn.Conv2d(129, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 1, kernel_size=1)) self.predict4_motion = nn.Sequential( nn.Conv2d(129, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.PReLU(), nn.Conv2d(64, 1, kernel_size=1)) if self.pre_attention: self.pre_sals_attention2 = SELayer(2, 1) self.pre_sals_attention3 = SELayer(3, 1) self.pre_sals_attention4 = SELayer(4, 1) self.dsn6 = nn.Sequential( nn.Conv2d(2048, 512, kernel_size=7, padding=3), nn.ReLU(), nn.Conv2d(512, 512, kernel_size=7, padding=3), nn.ReLU(), nn.Conv2d(512, 1, kernel_size=1)) self.dsn5 = nn.Sequential( nn.Conv2d(1024, 512, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(512, 512, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(512, 1, kernel_size=1)) self.dsn4 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(256, 1, kernel_size=1)) self.dsn4_fuse = nn.Conv2d(3, 1, kernel_size=1) self.dsn3 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=5, padding=2), nn.ReLU(), nn.Conv2d(256, 1, kernel_size=1)) self.dsn3_fuse = nn.Conv2d(3, 1, kernel_size=1) self.dsn2 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.dsn2_fuse = nn.Conv2d(5, 1, kernel_size=1) self.dsn_all_fuse = nn.Conv2d(5, 1, kernel_size=1)
def __init__(self, motion='GRU', se_layer=False, dilation=True, basic_model='resnext50'): super(R3Net, self).__init__() self.motion = motion self.se_layer = se_layer self.dilation = dilation if basic_model == 'resnext50': resnext = ResNeXt50() elif basic_model == 'resnext101': resnext = ResNeXt101() elif basic_model == 'resnet50': resnext = ResNet50() else: resnext = ResNet101() self.layer0 = resnext.layer0 self.layer1 = resnext.layer1 self.layer2 = resnext.layer2 self.layer3 = resnext.layer3 self.layer4 = resnext.layer4 self.reduce_low = nn.Sequential( nn.Conv2d(64 + 256 + 512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.PReLU()) self.reduce_high = nn.Sequential( nn.Conv2d(1024 + 2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), _ASPP(256)) if self.motion == 'GRU': self.reduce_low_GRU = ConvGRU(input_size=(119, 119), input_dim=256, hidden_dim=256, kernel_size=(3, 3), num_layers=1, batch_first=True, bias=True, return_all_layers=False) self.reduce_high_GRU = ConvGRU(input_size=(119, 119), input_dim=256, hidden_dim=256, kernel_size=(3, 3), num_layers=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) elif self.motion == 'LSTM': # self.reduce_low_GRU = ConvLSTM(input_size=(119, 119), input_dim=256, # hidden_dim=256, # kernel_size=(3, 3), # num_layers=1, # padding=1, # dilation=1, # batch_first=True, # bias=True, # return_all_layers=False) self.reduce_high_GRU = ConvLSTM(input_size=(119, 119), input_dim=256, hidden_dim=256, kernel_size=(3, 3), num_layers=1, padding=1, dilation=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) if self.se_layer: self.reduce_high_se = SELayer(256) self.reduce_low_se = SELayer(256) # self.motion_se = SELayer(32) if dilation: resnext.layer3.apply(partial(self._nostride_dilate, dilate=2)) resnext.layer4.apply(partial(self._nostride_dilate, dilate=4)) for m in self.modules(): if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout): m.inplace = True
def __init__(self, motion='GRU', se_layer=False, attention=False, basic_model='resnext50'): super(R3Net_prior, self).__init__() self.motion = motion self.se_layer = se_layer self.attention = attention if basic_model == 'resnext50': resnext = ResNeXt50() else: resnext = ResNeXt101() self.layer0 = resnext.layer0 self.layer1 = resnext.layer1 self.layer2 = resnext.layer2 self.layer3 = resnext.layer3 self.layer4 = resnext.layer4 self.reduce_low = nn.Sequential( nn.Conv2d(64 + 256 + 512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.PReLU() ) self.reduce_high = nn.Sequential( nn.Conv2d(1024 + 2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), _ASPP(256) ) if self.motion == 'GRU': # self.reduce_low_GRU = ConvGRU(input_size=(119, 119), input_dim=256, # hidden_dim=256, # kernel_size=(3, 3), # num_layers=1, # batch_first=True, # bias=True, # return_all_layers=False) self.reduce_high_motion = ConvGRU(input_size=(119, 119), input_dim=256, hidden_dim=32, kernel_size=(3, 3), num_layers=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) elif self.motion == 'LSTM': # self.reduce_low_GRU = ConvLSTM(input_size=(119, 119), input_dim=256, # hidden_dim=256, # kernel_size=(3, 3), # num_layers=1, # padding=1, # dilation=1, # batch_first=True, # bias=True, # return_all_layers=False) self.reduce_high_motion = ConvLSTM(input_size=(119, 119), input_dim=256, hidden_dim=32, kernel_size=(3, 3), num_layers=1, padding=1, dilation=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) elif self.motion == 'no': self.reduce_high_motion = nn.Sequential( nn.Conv2d(256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 32, kernel_size=1) ) if self.se_layer: self.reduce_high_se = SELayer(256) # self.reduce_low_se = SELayer(256) self.motion_se = SELayer(32) if self.attention: self.reduce_atte = BaseOC_Context_Module(256, 256, 128, 128, 0.05, sizes=([2])) self.predict0 = nn.Conv2d(256, 1, kernel_size=1) self.predict1 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict2 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict3 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict4 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict5 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict6 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1) ) self.predict1_motion = nn.Sequential( nn.Conv2d(33, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 1, kernel_size=1) ) self.predict2_motion = nn.Sequential( nn.Conv2d(33, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 1, kernel_size=1) ) self.predict3_motion = nn.Sequential( nn.Conv2d(33, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 1, kernel_size=1) ) self.predict4_motion = nn.Sequential( nn.Conv2d(33, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.BatchNorm2d(16), nn.PReLU(), nn.Conv2d(16, 1, kernel_size=1) ) for m in self.modules(): if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout): m.inplace = True
def __init__(self, motion='GRU', se_layer=False, attention=False, pre_attention=False, isTriplet=False, basic_model='resnext50', sta=False, naive_fuse=False): super(R3Net_prior, self).__init__() self.motion = motion self.se_layer = se_layer self.attention = attention self.pre_attention = pre_attention self.isTriplet = isTriplet self.sta = sta self.naive_fuse = naive_fuse if basic_model == 'resnext50': resnext = ResNeXt50() elif basic_model == 'resnext101': resnext = ResNeXt101() elif basic_model == 'resnet50': resnext = ResNet50() else: resnext = ResNet101() self.layer0 = resnext.layer0 self.layer1 = resnext.layer1 self.layer2 = resnext.layer2 self.layer3 = resnext.layer3 self.layer4 = resnext.layer4 self.reduce_low = nn.Sequential( nn.Conv2d(64 + 256 + 512, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=1), nn.BatchNorm2d(256), nn.PReLU()) self.reduce_high = nn.Sequential( nn.Conv2d(1024 + 2048, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.PReLU(), _ASPP(256)) if self.motion == 'GRU': self.reduce_high_motion = ConvGRU(input_size=(119, 119), input_dim=256, hidden_dim=64, kernel_size=(3, 3), num_layers=1, batch_first=True, bias=True, return_all_layers=False) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) elif self.motion == 'LSTM': self.reduce_high_motion = ConvLSTM(input_size=(119, 119), input_dim=256, hidden_dim=64, kernel_size=(3, 3), num_layers=1, padding=1, dilation=1, batch_first=True, bias=True, return_all_layers=False) elif self.motion == 'no': self.reduce_high_motion = nn.Sequential( nn.Conv2d(256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 32, kernel_size=1)) # self.motion_predict = nn.Conv2d(256, 1, kernel_size=1) if self.se_layer: self.reduce_high_se = SELayer(256) # self.reduce_low_se = SELayer(256) self.motion_se = SELayer(32) if self.attention: self.reduce_atte = BaseOC_Context_Module(256, 256, 128, 128, 0.05, sizes=([2])) if self.pre_attention: self.pre_sals_attention2 = SELayer(2, 1) self.pre_sals_attention3 = SELayer(3, 1) self.pre_sals_attention4 = SELayer(4, 1) if self.sta: self.sta_module = STA_Module(64) self.sp_down = nn.Sequential(nn.Conv2d(256, 64, kernel_size=1), nn.PReLU()) if self.naive_fuse: self.sp_down = nn.Sequential(nn.Conv2d(256, 64, kernel_size=1), nn.PReLU()) # self.sp_down2 = nn.Sequential( # nn.Conv2d(128, 64, kernel_size=1), nn.PReLU() # ) self.predict0 = nn.Conv2d(256, 1, kernel_size=1) self.predict1 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict2 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict3 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict4 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict5 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict6 = nn.Sequential( nn.Conv2d(257, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.PReLU(), nn.Conv2d(128, 1, kernel_size=1)) self.predict1_motion = nn.Sequential( nn.Conv2d(65, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 1, kernel_size=1)) self.predict2_motion = nn.Sequential( nn.Conv2d(65, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 1, kernel_size=1)) self.predict3_motion = nn.Sequential( nn.Conv2d(65, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 1, kernel_size=1)) self.predict4_motion = nn.Sequential( nn.Conv2d(65, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.PReLU(), nn.Conv2d(32, 1, kernel_size=1)) for m in self.modules(): if isinstance(m, nn.ReLU) or isinstance(m, nn.Dropout): m.inplace = True