def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, mask_mode="channel_to_channel"): """ @param mask_mode (string) Determines how large the weight mask tensor needs to be. """ super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.groups = groups self.weight = Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size)) if mask_mode == "channel_to_channel": weight_mask = torch.Tensor(out_channels, in_channels, 1, 1) elif mask_mode == "weight_to_weight": weight_mask = torch.Tensor(out_channels, in_channels, *self.kernel_size) else: raise ValueError(f"Unrecognized mask_mode: {mask_mode}") self.register_buffer("weight_mask", weight_mask) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.bias = None
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, weight_decay=1., **kwargs): super(MAPConv2d, self).__init__() self.weight_decay = weight_decay self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.weight = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() self.input_shape = None print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.groups = groups self.weight = Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) init.kaiming_normal_(self.weight, mode="fan_out") self.w_logvar = Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.w_logvar.data.fill_(-10) if bias: self.bias = Parameter(torch.Tensor(out_channels)) self.bias.data.fill_(0) else: self.bias = None self.input_shape = None self.threshold = 3 self.epsilon = 1e-8 self.tensor_constructor = (torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, droprate=0.5, weight_decay=1., share_mask=False, **kwargs): kernel_size = pair(kernel_size) stride = pair(stride) padding = pair(padding) dilation = pair(dilation) self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor super(DropoutConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, False, pair(0), groups, bias) self.droprate = droprate self.dim_z = self.weight.size(0) self.weight_decay = weight_decay self.share_mask = share_mask self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, lamba=1., alpha=1., beta=4., weight_decay=1., **kwargs): """ :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: size of the kernel :param stride: stride for the convolution :param padding: padding for the convolution :param dilation: dilation factor for the convolution :param groups: how many groups we will assume in the convolution :param bias: whether we will use a bias :param lamba: strength of the TFL regularization """ super(group_relaxed_L1L2Conv2d, self).__init__() self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.lamba = lamba self.alpha = alpha self.beta = beta self.lamba1 = self.lamba / self.beta self.weight_decay = weight_decay self.weight = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.u = torch.rand(out_channels, in_channels // groups, *self.kernel_size) self.u = self.u.to('cuda') if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() self.input_shape = None print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, use_bias=True, simple=True, add_diagonal=True, weight_decay=1., **kwargs): kernel_size = pair(kernel_size) stride = pair(stride) padding = pair(padding) dilation = pair(dilation) self.weight_decay = weight_decay self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor self.device = torch.device( 'cpu') if not torch.cuda.is_available() else torch.device('cuda') self.simple = simple self.add_diagonal = add_diagonal super(OrthogonalConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, False, pair(0), groups, use_bias) self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size[0] if simple: self.r = Parameter( self.floatTensor(self.kernel_size * self.kernel_size, self.out_channels)) else: self.r = Parameter(self.floatTensor(2, self.out_channels)) self.t = Parameter( self.floatTensor(2 * (self.kernel_size - 1), self.out_channels)) if self.add_diagonal: self.d = Parameter( self.floatTensor(self.kernel_size, self.kernel_size, min(self.in_channels, self.out_channels))) self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, droprate_init=0.5, temperature=2./3., weight_decay=1., lamba=1., local_rep=False, **kwargs): """ :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: Size of the kernel :param stride: Stride for the convolution :param padding: Padding for the convolution :param dilation: Dilation factor for the convolution :param groups: How many groups we will assume in the convolution :param bias: Whether we will use a bias :param droprate_init: Dropout rate that the L0 gates will be initialized to :param temperature: Temperature of the concrete distribution :param weight_decay: Strength of the L2 penalty :param lamba: Strength of the L0 penalty :param local_rep: Whether we will use a separate gate sample per element in the minibatch """ super(L0Conv2d, self).__init__() if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.prior_prec = weight_decay self.lamba = lamba self.droprate_init = droprate_init if droprate_init != 0. else 0.5 self.temperature = temperature self.floatTensor = torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor self.use_bias = False self.weights = Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.qz_loga = Parameter(torch.Tensor(out_channels)) self.dim_z = out_channels self.input_shape = None self.local_rep = local_rep self.ppos = 0 if bias: self.bias = Parameter(torch.Tensor(out_channels)) self.use_bias = True self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, prior_std=1., prior_std_z=1., dof=1., **kwargs): super(HSConv2d, self).__init__() if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.prior_std = prior_std self.prior_std_z = prior_std_z self.use_bias = False self.dof = dof self.mean_w = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.logvar_w = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.qz_mean = Parameter(torch.Tensor(in_channels // groups)) self.qz_logvar = Parameter(torch.Tensor(in_channels // groups)) self.dim_z = in_channels // groups if bias: self.mean_bias = Parameter(torch.Tensor(out_channels)) self.logvar_bias = Parameter(torch.Tensor(out_channels)) self.use_bias = True self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, central_data, stride=1, padding=0, dilation=1, groups=1, bias=True): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.groups = groups # Store in a list to avoid having it registered as a module, otherwise # it will appear multiple times in the state dict. self.central_data = [central_data] w_mu = torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) w_logvar = torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.bias = None w_logvar.data.fill_(central_data.z_logvar_init) # Standard nn.Conv2d initialization. init.kaiming_uniform_(w_mu, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(w_mu) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) self.data_index = central_data.register(self, w_mu, w_logvar) self.tensor_constructor = (torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True, name='', weight_decay=0, lamba=0.1 / 6e5, droprate_init=0.01, k=7, local_rep=True, init_size=-1, device='cpu'): super(ARMConv2dBn, self).__init__() self.in_channels = in_channels self.out_channels = int(out_channels) self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.weight_decay = weight_decay self.lamba = lamba self.k = k self.use_bias = bias if bias: self.bias = Parameter(torch.Tensor(self.out_channels)) self.weights = Parameter( torch.Tensor(self.out_channels, in_channels, *self.kernel_size)) self.z_phi = Parameter(torch.Tensor(self.out_channels)) self.dim_z = self.out_channels self.input_shape = None self.u = torch.Tensor(self.dim_z).uniform_(0, 1) self.droprate_init = droprate_init self.forward_mode = True self.local_rep = local_rep self.activated_neuron_size = init_size self.device = device self.bn = nn.BatchNorm2d(self.out_channels) self.reset_parameters() self.layer_name = name self.dimz_tensor = torch.FloatTensor(self.dim_z).zero_().to(device) print(self)
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, dim: int = 2, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, use_bias: bool = True, weight_decay: float = 1., **kwargs): stride = pair(stride) padding = pair(padding) dilation = pair(dilation) self.in_channels = in_channels self.out_channels = out_channels self.groups = groups self.dim = dim self.kernel_size = pair(kernel_size) self.use_bias = use_bias self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor super(KernelConv2, self).__init__(in_channels, out_channels, self.kernel_size, stride, padding, dilation, False, pair(0), groups, use_bias) self.columns = Parameter( self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)), self.dim)) self.rows = Parameter( self.floatTensor( self.out_channels * int(np.prod(self.kernel_size)) // groups, self.dim)) self.alpha = Parameter( self.floatTensor(self.out_channels // self.groups, self.in_channels)) self.weight_decay = weight_decay self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True, weight_decay=1.e-4, lamba=0.1 / 6e5, droprate_init=.5, local_rep=True, **kwargs): super(ArmConv2d, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.weight_decay = weight_decay self.lamba = lamba self.floatTensor = torch.FloatTensor if not opt.use_gpu else torch.cuda.FloatTensor self.use_bias = bias if bias: self.bias = Parameter(torch.Tensor(out_channels)) self.weights = Parameter( torch.Tensor(out_channels, in_channels, *self.kernel_size)) self.z_phi = Parameter(torch.Tensor(out_channels)) self.dim_z = out_channels self.input_shape = None self.u = torch.Tensor(self.dim_z).uniform_(0, 1) self.droprate_init = droprate_init self.forward_mode = True self.local_rep = local_rep self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, mask=None, w_logvar_init=-10): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.groups = groups self.w_logvar_min = min(w_logvar_init, -10) self.w_logvar_max = 10. self.pruned_logvar_sentinel = self.w_logvar_max - 0.00058 self.epsilon = 1e-8 self.w_mu = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.w_logvar = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.bias = None self.w_logvar.data.fill_(w_logvar_init) self.register_buffer( "w_mask", torch.HalfTensor(out_channels, in_channels // groups, *self.kernel_size)) # Standard nn.Conv2d initialization. init.kaiming_uniform_(self.w_mu, a=math.sqrt(5)) if mask is not None: self.w_mask[:] = mask self.w_mu.data *= self.w_mask self.w_logvar.data[self.w_mask == 0.0] = self.pruned_logvar_sentinel else: self.w_mask.fill_(1.0) # Standard nn.Conv2d initialization. if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.w_mu) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) self.tensor_constructor = (torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, dropout=0.5, dropout_botk=0.5, dropout_type="weight", temperature=2.0 / 3.0, weight_decay=1.0, lamba=1.0, local_rep=False, **kwargs): """ :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: Size of the kernel :param stride: Stride for the convolution :param padding: Padding for the convolution :param dilation: Dilation factor for the convolution :param groups: How many groups we will assume in the convolution :param bias: Whether we will use a bias :param weight_decay: Strength of the L2 penalty """ super(TDConv2d, self).__init__() if in_channels % groups != 0: raise ValueError("in_channels must be divisible by groups") if out_channels % groups != 0: raise ValueError("out_channels must be divisible by groups") self.weight_decay = weight_decay self.floatTensor = (torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor) self.prune_rate = 0 self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.weight = Parameter( self.floatTensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter("bias", None) self.dropout = dropout self.dropout_type = dropout_type self.dropout_botk = dropout_botk self.reset_parameters() self.input_shape = None print(self) print(self)
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, dim: int = 2, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, use_bias: bool = True, prior_std: float = 1., bias_std: float = 1e-3, **kwargs): stride = pair(stride) padding = pair(padding) dilation = pair(dilation) self.floatTensor = torch.FloatTensor if not torch.cuda.is_available( ) else torch.cuda.FloatTensor self.in_channels = in_channels self.out_channels = out_channels self.groups = groups self.dim = dim self.kernel_size = pair(kernel_size) self.use_bias = use_bias self.prior_std = prior_std self.bias_std = bias_std super(KernelBayesianConv2, self).__init__(in_channels, out_channels, self.kernel_size, stride, padding, dilation, False, pair(0), groups, use_bias) self.columns_mean = Parameter( self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)), self.dim)) self.columns_logvar = Parameter( self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)), self.dim)) self.rows_mean = Parameter( self.floatTensor( self.out_channels * int(np.prod(self.kernel_size)) // groups, self.dim)) self.rows_logvar = Parameter( self.floatTensor( self.out_channels * int(np.prod(self.kernel_size)) // groups, self.dim)) self.alpha_mean = Parameter( self.floatTensor(self.out_channels // groups, self.in_channels)) self.alpha_logvar = Parameter( self.floatTensor(self.out_channels // groups, self.in_channels)) self.use_bias = use_bias if self.use_bias: self.bias_mean = Parameter( self.floatTensor(self.out_channels // self.groups)) self.bias_logvar = Parameter( self.floatTensor(self.out_channels // self.groups)) self.reset_parameters() print(self)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, learn_weight=True, bias=True, droprate_init=0.5, l2_strength=1., l0_strength=1., random_weight=True, deterministic=False, use_baseline_bias=False, optimize_inference=True, one_sample_per_item=False, **kwargs): """ :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: Size of the kernel :param stride: Stride for the convolution :param padding: Padding for the convolution :param dilation: Dilation factor for the convolution :param groups: How many groups we will assume in the convolution :param bias: Whether we will use a bias :param droprate_init: Dropout rate that the gates will be initialized to :param l2_strength: Strength of the L2 penalty :param l0_strength: Strength of the L0 penalty """ super(BinaryGatedConv2d, self).__init__() if in_channels % groups != 0: raise ValueError("in_channels must be divisible by groups") if out_channels % groups != 0: raise ValueError("out_channels must be divisible by groups") self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.l2_strength = l2_strength self.l0_strength = l0_strength self.droprate_init = droprate_init if droprate_init != 0. else 0.5 self.deterministic = deterministic self.use_baseline_bias = use_baseline_bias self.optimize_inference = optimize_inference self.one_sample_per_item = one_sample_per_item self.random_weight = random_weight if random_weight: exc_weight = torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) inh_weight = torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) else: exc_weight = torch.ones(out_channels, in_channels // groups, *self.kernel_size) inh_weight = torch.ones(out_channels, in_channels // groups, *self.kernel_size) if learn_weight: self.exc_weight = Parameter(exc_weight) self.inh_weight = Parameter(inh_weight) else: self.register_buffer("exc_weight", exc_weight) self.register_buffer("inh_weight", inh_weight) self.exc_p1 = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.inh_p1 = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.dim_z = out_channels self.input_shape = None self.use_bias = bias if bias: self.bias = Parameter(torch.Tensor(out_channels)) self.reset_parameters()
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, learn_weight=True, bias=True, droprate_init=0.5, l2_strength=1., l0_strength=1., **kwargs): """ :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: Size of the kernel :param stride: Stride for the convolution :param padding: Padding for the convolution :param dilation: Dilation factor for the convolution :param groups: How many groups we will assume in the convolution :param bias: Whether we will use a bias :param droprate_init: Dropout rate that the gates will be initialized to :param l2_strength: Strength of the L2 penalty :param l0_strength: Strength of the L0 penalty """ super(BinaryGatedConv2d, self).__init__() if in_channels % groups != 0: raise ValueError("in_channels must be divisible by groups") if out_channels % groups != 0: raise ValueError("out_channels must be divisible by groups") self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = pair(kernel_size) self.stride = pair(stride) self.padding = pair(padding) self.dilation = pair(dilation) self.output_padding = pair(0) self.groups = groups self.l2_strength = l2_strength self.l0_strength = l0_strength self.droprate_init = droprate_init if droprate_init != 0. else 0.5 self.floatTensor = (torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor) self.use_bias = False weight = torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) if learn_weight: self.weight = Parameter(weight) else: self.register_buffer("weight", weight) self.logit_p1 = Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) self.dim_z = out_channels self.input_shape = None if bias: b = torch.Tensor(out_channels) if learn_weight: self.bias = Parameter(b) else: self.register_buffer("bias", b) self.use_bias = True self.reset_parameters()
def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), dilation=(1, 1), groups=1, bias=True, radix=2, reduction_factor=4, rectify=False, rectify_avg=False, norm_layer=None, dropblock_prob=0.0, **kwargs): super(SplitAttentionConv2d, self).__init__() padding = pair(padding) self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) self.rectify_avg = rectify_avg inter_channels = max(in_channels * radix // reduction_factor, 32) self.radix = radix self.cardinality = groups self.channels = channels self.dropblock_prob = dropblock_prob if self.rectify: from rfconv import RFConv2d self.conv = RFConv2d(in_channels=in_channels, out_channels=channels * radix, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups * radix, bias=bias, average_mode=rectify_avg, **kwargs) else: self.conv = nn.Conv2d(in_channels=in_channels, out_channels=channels * radix, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups * radix, bias=bias, **kwargs) self.use_bn = norm_layer is not None if self.use_bn: self.bn0 = norm_layer(channels * radix) self.relu = nn.ReLU(inplace=True) self.fc1 = nn.Conv2d(in_channels=channels, out_channels=inter_channels, kernel_size=1, groups=self.cardinality) if self.use_bn: self.bn1 = norm_layer(inter_channels) self.fc2 = nn.Conv2d(in_channels=inter_channels, out_channels=channels * radix, kernel_size=1, groups=self.cardinality) if dropblock_prob > 0.0: self.dropblock = DropBlock2D(p=dropblock_prob) self.rsoftmax = rSoftMax(radix=radix, cardinality=groups)