Ejemplo n.º 1
0
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, bias=True,
                 mask_mode="channel_to_channel"):
        """
        @param mask_mode (string)
        Determines how large the weight mask tensor needs to be.
        """
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.groups = groups

        self.weight = Parameter(torch.Tensor(out_channels, in_channels,
                                             *self.kernel_size))

        if mask_mode == "channel_to_channel":
            weight_mask = torch.Tensor(out_channels, in_channels, 1, 1)
        elif mask_mode == "weight_to_weight":
            weight_mask = torch.Tensor(out_channels, in_channels,
                                       *self.kernel_size)
        else:
            raise ValueError(f"Unrecognized mask_mode: {mask_mode}")
        self.register_buffer("weight_mask", weight_mask)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.bias = None
Ejemplo n.º 2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 weight_decay=1.,
                 **kwargs):
        super(MAPConv2d, self).__init__()
        self.weight_decay = weight_decay
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.weight = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()
        self.input_shape = None
        print(self)
Ejemplo n.º 3
0
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, bias=True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.groups = groups

        self.weight = Parameter(torch.Tensor(out_channels,
                                             in_channels // groups,
                                             *self.kernel_size))
        init.kaiming_normal_(self.weight, mode="fan_out")

        self.w_logvar = Parameter(torch.Tensor(out_channels,
                                               in_channels // groups,
                                               *self.kernel_size))
        self.w_logvar.data.fill_(-10)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
            self.bias.data.fill_(0)
        else:
            self.bias = None

        self.input_shape = None

        self.threshold = 3
        self.epsilon = 1e-8
        self.tensor_constructor = (torch.FloatTensor
                                   if not torch.cuda.is_available()
                                   else torch.cuda.FloatTensor)
 def __init__(self,
              in_channels,
              out_channels,
              kernel_size,
              stride=1,
              padding=0,
              dilation=1,
              groups=1,
              bias=True,
              droprate=0.5,
              weight_decay=1.,
              share_mask=False,
              **kwargs):
     kernel_size = pair(kernel_size)
     stride = pair(stride)
     padding = pair(padding)
     dilation = pair(dilation)
     self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
     ) else torch.cuda.FloatTensor
     super(DropoutConv2d,
           self).__init__(in_channels, out_channels, kernel_size, stride,
                          padding, dilation, False, pair(0), groups, bias)
     self.droprate = droprate
     self.dim_z = self.weight.size(0)
     self.weight_decay = weight_decay
     self.share_mask = share_mask
     self.reset_parameters()
     print(self)
Ejemplo n.º 5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 lamba=1.,
                 alpha=1.,
                 beta=4.,
                 weight_decay=1.,
                 **kwargs):
        """
		:param in_channels: Number of input channels
		:param out_channels: Number of output channels
		:param kernel_size: size of the kernel
		:param stride: stride for the convolution
		:param padding: padding for the convolution
		:param dilation: dilation factor for the convolution
		:param groups: how many groups we will assume in the convolution
		:param bias: whether we will use a bias
		:param lamba: strength of the TFL regularization
		"""
        super(group_relaxed_L1L2Conv2d, self).__init__()
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.lamba = lamba
        self.alpha = alpha
        self.beta = beta
        self.lamba1 = self.lamba / self.beta
        self.weight_decay = weight_decay
        self.weight = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.u = torch.rand(out_channels, in_channels // groups,
                            *self.kernel_size)
        self.u = self.u.to('cuda')
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.input_shape = None
        print(self)
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 use_bias=True,
                 simple=True,
                 add_diagonal=True,
                 weight_decay=1.,
                 **kwargs):
        kernel_size = pair(kernel_size)
        stride = pair(stride)
        padding = pair(padding)
        dilation = pair(dilation)
        self.weight_decay = weight_decay
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.device = torch.device(
            'cpu') if not torch.cuda.is_available() else torch.device('cuda')
        self.simple = simple
        self.add_diagonal = add_diagonal
        super(OrthogonalConv2d,
              self).__init__(in_channels, out_channels,
                             kernel_size, stride, padding, dilation, False,
                             pair(0), groups, use_bias)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size[0]

        if simple:
            self.r = Parameter(
                self.floatTensor(self.kernel_size * self.kernel_size,
                                 self.out_channels))
        else:
            self.r = Parameter(self.floatTensor(2, self.out_channels))
            self.t = Parameter(
                self.floatTensor(2 * (self.kernel_size - 1),
                                 self.out_channels))

        if self.add_diagonal:
            self.d = Parameter(
                self.floatTensor(self.kernel_size, self.kernel_size,
                                 min(self.in_channels, self.out_channels)))

        self.reset_parameters()
        print(self)
Ejemplo n.º 7
0
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,
                 droprate_init=0.5, temperature=2./3., weight_decay=1., lamba=1., local_rep=False, **kwargs):
        """
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param kernel_size: Size of the kernel
        :param stride: Stride for the convolution
        :param padding: Padding for the convolution
        :param dilation: Dilation factor for the convolution
        :param groups: How many groups we will assume in the convolution
        :param bias: Whether we will use a bias
        :param droprate_init: Dropout rate that the L0 gates will be initialized to
        :param temperature: Temperature of the concrete distribution
        :param weight_decay: Strength of the L2 penalty
        :param lamba: Strength of the L0 penalty
        :param local_rep: Whether we will use a separate gate sample per element in the minibatch
        """
        super(L0Conv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.prior_prec = weight_decay
        self.lamba = lamba
        self.droprate_init = droprate_init if droprate_init != 0. else 0.5
        self.temperature = temperature
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor
        self.use_bias = False
        self.weights = Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size))
        self.qz_loga = Parameter(torch.Tensor(out_channels))
        self.dim_z = out_channels
        self.input_shape = None
        self.local_rep = local_rep
        self.ppos = 0

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
            self.use_bias = True

        self.reset_parameters()
        print(self)
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 prior_std=1.,
                 prior_std_z=1.,
                 dof=1.,
                 **kwargs):
        super(HSConv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.prior_std = prior_std
        self.prior_std_z = prior_std_z
        self.use_bias = False
        self.dof = dof
        self.mean_w = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.logvar_w = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.qz_mean = Parameter(torch.Tensor(in_channels // groups))
        self.qz_logvar = Parameter(torch.Tensor(in_channels // groups))
        self.dim_z = in_channels // groups

        if bias:
            self.mean_bias = Parameter(torch.Tensor(out_channels))
            self.logvar_bias = Parameter(torch.Tensor(out_channels))
            self.use_bias = True
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.reset_parameters()
        print(self)
Ejemplo n.º 9
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 central_data,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.groups = groups

        # Store in a list to avoid having it registered as a module, otherwise
        # it will appear multiple times in the state dict.
        self.central_data = [central_data]

        w_mu = torch.Tensor(out_channels, in_channels // groups,
                            *self.kernel_size)
        w_logvar = torch.Tensor(out_channels, in_channels // groups,
                                *self.kernel_size)
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.bias = None

        w_logvar.data.fill_(central_data.z_logvar_init)

        # Standard nn.Conv2d initialization.
        init.kaiming_uniform_(w_mu, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(w_mu)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

        self.data_index = central_data.register(self, w_mu, w_logvar)

        self.tensor_constructor = (torch.FloatTensor
                                   if not torch.cuda.is_available() else
                                   torch.cuda.FloatTensor)
Ejemplo n.º 10
0
 def __init__(self,
              in_channels,
              out_channels,
              kernel_size,
              stride=1,
              padding=0,
              dilation=1,
              bias=True,
              name='',
              weight_decay=0,
              lamba=0.1 / 6e5,
              droprate_init=0.01,
              k=7,
              local_rep=True,
              init_size=-1,
              device='cpu'):
     super(ARMConv2dBn, self).__init__()
     self.in_channels = in_channels
     self.out_channels = int(out_channels)
     self.kernel_size = pair(kernel_size)
     self.stride = pair(stride)
     self.padding = pair(padding)
     self.dilation = pair(dilation)
     self.output_padding = pair(0)
     self.weight_decay = weight_decay
     self.lamba = lamba
     self.k = k
     self.use_bias = bias
     if bias:
         self.bias = Parameter(torch.Tensor(self.out_channels))
     self.weights = Parameter(
         torch.Tensor(self.out_channels, in_channels, *self.kernel_size))
     self.z_phi = Parameter(torch.Tensor(self.out_channels))
     self.dim_z = self.out_channels
     self.input_shape = None
     self.u = torch.Tensor(self.dim_z).uniform_(0, 1)
     self.droprate_init = droprate_init
     self.forward_mode = True
     self.local_rep = local_rep
     self.activated_neuron_size = init_size
     self.device = device
     self.bn = nn.BatchNorm2d(self.out_channels)
     self.reset_parameters()
     self.layer_name = name
     self.dimz_tensor = torch.FloatTensor(self.dim_z).zero_().to(device)
     print(self)
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: int,
                 dim: int = 2,
                 stride: int = 1,
                 padding: int = 0,
                 dilation: int = 1,
                 groups: int = 1,
                 use_bias: bool = True,
                 weight_decay: float = 1.,
                 **kwargs):
        stride = pair(stride)
        padding = pair(padding)
        dilation = pair(dilation)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.groups = groups
        self.dim = dim
        self.kernel_size = pair(kernel_size)
        self.use_bias = use_bias
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor

        super(KernelConv2, self).__init__(in_channels, out_channels,
                                          self.kernel_size,
                                          stride, padding, dilation, False,
                                          pair(0), groups, use_bias)

        self.columns = Parameter(
            self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)),
                             self.dim))
        self.rows = Parameter(
            self.floatTensor(
                self.out_channels * int(np.prod(self.kernel_size)) // groups,
                self.dim))
        self.alpha = Parameter(
            self.floatTensor(self.out_channels // self.groups,
                             self.in_channels))

        self.weight_decay = weight_decay

        self.reset_parameters()
        print(self)
Ejemplo n.º 12
0
 def __init__(self,
              in_channels,
              out_channels,
              kernel_size,
              stride=1,
              padding=0,
              dilation=1,
              bias=True,
              weight_decay=1.e-4,
              lamba=0.1 / 6e5,
              droprate_init=.5,
              local_rep=True,
              **kwargs):
     super(ArmConv2d, self).__init__()
     self.in_channels = in_channels
     self.out_channels = out_channels
     self.kernel_size = pair(kernel_size)
     self.stride = pair(stride)
     self.padding = pair(padding)
     self.dilation = pair(dilation)
     self.output_padding = pair(0)
     self.weight_decay = weight_decay
     self.lamba = lamba
     self.floatTensor = torch.FloatTensor if not opt.use_gpu else torch.cuda.FloatTensor
     self.use_bias = bias
     if bias:
         self.bias = Parameter(torch.Tensor(out_channels))
     self.weights = Parameter(
         torch.Tensor(out_channels, in_channels, *self.kernel_size))
     self.z_phi = Parameter(torch.Tensor(out_channels))
     self.dim_z = out_channels
     self.input_shape = None
     self.u = torch.Tensor(self.dim_z).uniform_(0, 1)
     self.droprate_init = droprate_init
     self.forward_mode = True
     self.local_rep = local_rep
     self.reset_parameters()
     print(self)
Ejemplo n.º 13
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 mask=None,
                 w_logvar_init=-10):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.groups = groups

        self.w_logvar_min = min(w_logvar_init, -10)
        self.w_logvar_max = 10.
        self.pruned_logvar_sentinel = self.w_logvar_max - 0.00058
        self.epsilon = 1e-8

        self.w_mu = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.w_logvar = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.bias = None

        self.w_logvar.data.fill_(w_logvar_init)

        self.register_buffer(
            "w_mask",
            torch.HalfTensor(out_channels, in_channels // groups,
                             *self.kernel_size))

        # Standard nn.Conv2d initialization.
        init.kaiming_uniform_(self.w_mu, a=math.sqrt(5))

        if mask is not None:
            self.w_mask[:] = mask
            self.w_mu.data *= self.w_mask
            self.w_logvar.data[self.w_mask ==
                               0.0] = self.pruned_logvar_sentinel
        else:
            self.w_mask.fill_(1.0)

        # Standard nn.Conv2d initialization.
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.w_mu)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

        self.tensor_constructor = (torch.FloatTensor
                                   if not torch.cuda.is_available() else
                                   torch.cuda.FloatTensor)
Ejemplo n.º 14
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 dropout=0.5,
                 dropout_botk=0.5,
                 dropout_type="weight",
                 temperature=2.0 / 3.0,
                 weight_decay=1.0,
                 lamba=1.0,
                 local_rep=False,
                 **kwargs):
        """
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param kernel_size: Size of the kernel
        :param stride: Stride for the convolution
        :param padding: Padding for the convolution
        :param dilation: Dilation factor for the convolution
        :param groups: How many groups we will assume in the convolution
        :param bias: Whether we will use a bias
        :param weight_decay: Strength of the L2 penalty
        """
        super(TDConv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError("in_channels must be divisible by groups")
        if out_channels % groups != 0:
            raise ValueError("out_channels must be divisible by groups")
        self.weight_decay = weight_decay
        self.floatTensor = (torch.FloatTensor if not torch.cuda.is_available()
                            else torch.cuda.FloatTensor)
        self.prune_rate = 0
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.weight = Parameter(
            self.floatTensor(out_channels, in_channels // groups,
                             *self.kernel_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter("bias", None)

        self.dropout = dropout
        self.dropout_type = dropout_type
        self.dropout_botk = dropout_botk

        self.reset_parameters()
        self.input_shape = None
        print(self)

        print(self)
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: int,
                 dim: int = 2,
                 stride: int = 1,
                 padding: int = 0,
                 dilation: int = 1,
                 groups: int = 1,
                 use_bias: bool = True,
                 prior_std: float = 1.,
                 bias_std: float = 1e-3,
                 **kwargs):

        stride = pair(stride)
        padding = pair(padding)
        dilation = pair(dilation)

        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.groups = groups
        self.dim = dim
        self.kernel_size = pair(kernel_size)
        self.use_bias = use_bias
        self.prior_std = prior_std
        self.bias_std = bias_std

        super(KernelBayesianConv2, self).__init__(in_channels, out_channels,
                                                  self.kernel_size, stride,
                                                  padding, dilation, False,
                                                  pair(0), groups, use_bias)

        self.columns_mean = Parameter(
            self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)),
                             self.dim))
        self.columns_logvar = Parameter(
            self.floatTensor(self.in_channels * int(np.prod(self.kernel_size)),
                             self.dim))

        self.rows_mean = Parameter(
            self.floatTensor(
                self.out_channels * int(np.prod(self.kernel_size)) // groups,
                self.dim))
        self.rows_logvar = Parameter(
            self.floatTensor(
                self.out_channels * int(np.prod(self.kernel_size)) // groups,
                self.dim))

        self.alpha_mean = Parameter(
            self.floatTensor(self.out_channels // groups, self.in_channels))
        self.alpha_logvar = Parameter(
            self.floatTensor(self.out_channels // groups, self.in_channels))

        self.use_bias = use_bias
        if self.use_bias:
            self.bias_mean = Parameter(
                self.floatTensor(self.out_channels // self.groups))
            self.bias_logvar = Parameter(
                self.floatTensor(self.out_channels // self.groups))

        self.reset_parameters()
        print(self)
Ejemplo n.º 16
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 learn_weight=True,
                 bias=True,
                 droprate_init=0.5,
                 l2_strength=1.,
                 l0_strength=1.,
                 random_weight=True,
                 deterministic=False,
                 use_baseline_bias=False,
                 optimize_inference=True,
                 one_sample_per_item=False,
                 **kwargs):
        """
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param kernel_size: Size of the kernel
        :param stride: Stride for the convolution
        :param padding: Padding for the convolution
        :param dilation: Dilation factor for the convolution
        :param groups: How many groups we will assume in the convolution
        :param bias: Whether we will use a bias
        :param droprate_init: Dropout rate that the gates will be initialized to
        :param l2_strength: Strength of the L2 penalty
        :param l0_strength: Strength of the L0 penalty
        """
        super(BinaryGatedConv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError("in_channels must be divisible by groups")
        if out_channels % groups != 0:
            raise ValueError("out_channels must be divisible by groups")
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.l2_strength = l2_strength
        self.l0_strength = l0_strength
        self.droprate_init = droprate_init if droprate_init != 0. else 0.5
        self.deterministic = deterministic
        self.use_baseline_bias = use_baseline_bias
        self.optimize_inference = optimize_inference
        self.one_sample_per_item = one_sample_per_item

        self.random_weight = random_weight
        if random_weight:
            exc_weight = torch.Tensor(out_channels, in_channels // groups,
                                      *self.kernel_size)
            inh_weight = torch.Tensor(out_channels, in_channels // groups,
                                      *self.kernel_size)
        else:
            exc_weight = torch.ones(out_channels, in_channels // groups,
                                    *self.kernel_size)
            inh_weight = torch.ones(out_channels, in_channels // groups,
                                    *self.kernel_size)

        if learn_weight:
            self.exc_weight = Parameter(exc_weight)
            self.inh_weight = Parameter(inh_weight)
        else:
            self.register_buffer("exc_weight", exc_weight)
            self.register_buffer("inh_weight", inh_weight)
        self.exc_p1 = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.inh_p1 = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.dim_z = out_channels
        self.input_shape = None

        self.use_bias = bias
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))

        self.reset_parameters()
Ejemplo n.º 17
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 learn_weight=True,
                 bias=True,
                 droprate_init=0.5,
                 l2_strength=1.,
                 l0_strength=1.,
                 **kwargs):
        """
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param kernel_size: Size of the kernel
        :param stride: Stride for the convolution
        :param padding: Padding for the convolution
        :param dilation: Dilation factor for the convolution
        :param groups: How many groups we will assume in the convolution
        :param bias: Whether we will use a bias
        :param droprate_init: Dropout rate that the gates will be initialized to
        :param l2_strength: Strength of the L2 penalty
        :param l0_strength: Strength of the L0 penalty
        """
        super(BinaryGatedConv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError("in_channels must be divisible by groups")
        if out_channels % groups != 0:
            raise ValueError("out_channels must be divisible by groups")
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.l2_strength = l2_strength
        self.l0_strength = l0_strength
        self.droprate_init = droprate_init if droprate_init != 0. else 0.5
        self.floatTensor = (torch.FloatTensor if not torch.cuda.is_available()
                            else torch.cuda.FloatTensor)
        self.use_bias = False
        weight = torch.Tensor(out_channels, in_channels // groups,
                              *self.kernel_size)
        if learn_weight:
            self.weight = Parameter(weight)
        else:
            self.register_buffer("weight", weight)
        self.logit_p1 = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.dim_z = out_channels
        self.input_shape = None

        if bias:
            b = torch.Tensor(out_channels)
            if learn_weight:
                self.bias = Parameter(b)
            else:
                self.register_buffer("bias", b)
            self.use_bias = True

        self.reset_parameters()
Ejemplo n.º 18
0
    def __init__(self,
                 in_channels,
                 channels,
                 kernel_size,
                 stride=(1, 1),
                 padding=(0, 0),
                 dilation=(1, 1),
                 groups=1,
                 bias=True,
                 radix=2,
                 reduction_factor=4,
                 rectify=False,
                 rectify_avg=False,
                 norm_layer=None,
                 dropblock_prob=0.0,
                 **kwargs):
        super(SplitAttentionConv2d, self).__init__()

        padding = pair(padding)
        self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
        self.rectify_avg = rectify_avg
        inter_channels = max(in_channels * radix // reduction_factor, 32)
        self.radix = radix
        self.cardinality = groups
        self.channels = channels
        self.dropblock_prob = dropblock_prob

        if self.rectify:
            from rfconv import RFConv2d
            self.conv = RFConv2d(in_channels=in_channels,
                                 out_channels=channels * radix,
                                 kernel_size=kernel_size,
                                 stride=stride,
                                 padding=padding,
                                 dilation=dilation,
                                 groups=groups * radix,
                                 bias=bias,
                                 average_mode=rectify_avg,
                                 **kwargs)
        else:
            self.conv = nn.Conv2d(in_channels=in_channels,
                                  out_channels=channels * radix,
                                  kernel_size=kernel_size,
                                  stride=stride,
                                  padding=padding,
                                  dilation=dilation,
                                  groups=groups * radix,
                                  bias=bias,
                                  **kwargs)

        self.use_bn = norm_layer is not None
        if self.use_bn:
            self.bn0 = norm_layer(channels * radix)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Conv2d(in_channels=channels,
                             out_channels=inter_channels,
                             kernel_size=1,
                             groups=self.cardinality)
        if self.use_bn:
            self.bn1 = norm_layer(inter_channels)
        self.fc2 = nn.Conv2d(in_channels=inter_channels,
                             out_channels=channels * radix,
                             kernel_size=1,
                             groups=self.cardinality)
        if dropblock_prob > 0.0:
            self.dropblock = DropBlock2D(p=dropblock_prob)
        self.rsoftmax = rSoftMax(radix=radix, cardinality=groups)