Python Parameter.clone Examples

Programming Language: Python

Namespace/Package Name: torch.nn.parameter

Class/Type: Parameter

Method/Function: clone

Examples at hotexamples.com: 13

Python Parameter.clone - 13 examples found. These are the top rated real world Python examples of torch.nn.parameter.Parameter.clone extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Parameter(30)

expand(30)

size(30)

repeat(30)

pow(30)

detach(30)

abs(26)

exp(26)

reshape(17)

permute(13)

cpu(13)

clone(13)

mul(12)

cuda(12)

expand_as(10)

mm(10)

item(9)

sum(9)

add(8)

numel(7)

mv(7)

fill_(6)

clamp(6)

masked_fill(6)

matmul(5)

new_empty(5)

new(5)

max(5)

copy_(5)

requires_grad_(5)

normal_(5)

set_(4)

new_zeros(4)

square(4)

register_hook(4)

data(3)

norm(3)

double(3)

index_select(3)

new_ones(2)

_values(2)

log(2)

materialize(2)

_local_shard(2)

retain_grad(2)

repeat_interleave(2)

sigmoid(2)

chunk(2)

sign(2)

_full_param_padded(2)

Example #1

Show file

File: cnn.py Project: Lousse/HBP-online

class cnn_THS(clstm):
    def __init__(self, vocab_size,max_num_hidden_layers,embedding_dim, n_classes,n_filters,filter_size,
                  dropout, batch_size,b=0.99, n=0.01, s=0.2, use_cuda=False):
        super().__init__(vocab_size,max_num_hidden_layers,embedding_dim, n_classes,n_filters,filter_size,
                  dropout, batch_size, b=b, n=n, s=s,use_cuda=use_cuda)
        self.e = Parameter(torch.tensor(e), requires_grad=False)
        self.arms_values = Parameter(torch.arange(n_classes), requires_grad=False)
        self.explorations_mab = []

        for i in range(n_classes):
            self.explorations_mab.append(algs.ThompsomSampling(len(e)))

    def partial_fit(self, X_data, Y_data, exp_factor, show_loss=True):
        self.partial_fit_(X_data, Y_data, show_loss)
        self.explorations_mab[Y_data[0]].reward(exp_factor)

    def predict(self, X_data):
        pred = self.predict_(X_data)[0]
        exp_factor = self.explorations_mab[pred].select()[0]
        if np.random.uniform() < self.e[exp_factor]:
            removed_arms = self.arms_values.clone().numpy().tolist()
            removed_arms.remove(pred)
            return random.choice(removed_arms), exp_factor

        return pred, exp_factor

Example #2

Show file

class ONN_THS(ONN):
    def __init__(self,
                 features_size,
                 max_num_hidden_layers,
                 qtd_neuron_per_hidden_layer,
                 n_classes,
                 b=0.99,
                 n=0.01,
                 s=0.2,
                 e=[0.5, 0.35, 0.2, 0.1, 0.05],
                 use_cuda=False):
        super().__init__(features_size,
                         max_num_hidden_layers,
                         qtd_neuron_per_hidden_layer,
                         n_classes,
                         b=b,
                         n=n,
                         s=s,
                         use_cuda=use_cuda)
        self.e = Parameter(torch.tensor(e), requires_grad=False)
        self.arms_values = Parameter(torch.arange(n_classes),
                                     requires_grad=False)
        self.explorations_mab = []

        for i in range(n_classes):
            self.explorations_mab.append(algs.ThompsomSampling(len(e)))

    def partial_fit(self, X_data, Y_data, exp_factor, show_loss=True):
        self.partial_fit_(X_data, Y_data, show_loss)
        self.explorations_mab[Y_data[0]].reward(exp_factor)

    def predict(self, X_data):
        pred = self.predict_(X_data)[0]
        exp_factor = self.explorations_mab[pred].select()[0]
        if np.random.uniform() < self.e[exp_factor]:
            removed_arms = self.arms_values.clone().numpy().tolist()
            removed_arms.remove(pred)
            return random.choice(removed_arms), exp_factor

        return pred, exp_factor

Example #3

Show file

class BinaryGatedLinear(Module):
    """
    Linear layer with stochastic binary gates
    """
    def __init__(self,
                 in_features,
                 out_features,
                 l0_strength=1.,
                 l2_strength=1.,
                 learn_weight=True,
                 bias=True,
                 droprate_init=0.5,
                 random_weight=True,
                 deterministic=False,
                 use_baseline_bias=False,
                 optimize_inference=False,
                 one_sample_per_item=False,
                 **kwargs):
        """
        :param in_features: Input dimensionality
        :param out_features: Output dimensionality
        :param bias: Whether we use a bias
        :param l2_strength: Strength of the L2 penalty
        :param droprate_init: Dropout rate that the gates will be initialized to
        :param l0_strength: Strength of the L0 penalty
        """
        super(BinaryGatedLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.l0_strength = l0_strength
        self.l2_strength = l2_strength
        self.deterministic = deterministic
        self.use_baseline_bias = use_baseline_bias
        self.optimize_inference = optimize_inference
        self.one_sample_per_item = one_sample_per_item

        self.random_weight = random_weight
        if random_weight:
            exc_weight = torch.Tensor(out_features, in_features)
            inh_weight = torch.Tensor(out_features, in_features)
        else:
            exc_weight = torch.ones(out_features, in_features)
            inh_weight = torch.ones(out_features, in_features)

        if learn_weight:
            self.exc_weight = Parameter(exc_weight)
            self.inh_weight = Parameter(inh_weight)
        else:
            self.register_buffer("exc_weight", exc_weight)
            self.register_buffer("inh_weight", inh_weight)

        self.exc_p1 = Parameter(torch.Tensor(out_features, in_features))
        self.inh_p1 = Parameter(torch.Tensor(out_features, in_features))

        self.droprate_init = droprate_init if droprate_init != 0. else 0.5
        self.use_bias = bias
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        self.reset_parameters()

    def reset_parameters(self):
        if self.random_weight:
            init.kaiming_normal_(self.exc_weight, mode="fan_out")
            init.kaiming_normal_(self.inh_weight, mode="fan_out")
            self.exc_weight.data.abs_()
            self.inh_weight.data.abs_()
        self.exc_p1.data.normal_(1 - self.droprate_init, 1e-2)
        self.inh_p1.data.normal_(1 - self.droprate_init, 1e-2)
        if self.use_bias:
            self.bias.data.fill_(0)

    def constrain_parameters(self, **kwargs):
        self.exc_weight.data.clamp_(min=0.)
        self.inh_weight.data.clamp_(min=0.)

    def get_gate_probabilities(self):
        exc_p1 = torch.clamp(self.exc_p1.data, min=0., max=1.)
        inh_p1 = torch.clamp(self.inh_p1.data, min=0., max=1.)
        return exc_p1, inh_p1

    def weight_size(self):
        return self.exc_weight.size()

    def regularization(self):
        """
        Expected L0 norm under the stochastic gates, takes into account and
        re-weights also a potential L2 penalty
        """
        if self.l0_strength > 0 or self.l2_strength > 0:
            # Clamp these, but do it in a way that still always propagates the
            # gradient.
            exc_p1 = self.exc_p1.clone()
            torch.clamp(exc_p1.data, min=0, max=1, out=exc_p1.data)
            inh_p1 = self.inh_p1.clone()
            torch.clamp(inh_p1.data, min=0, max=1, out=inh_p1.data)

            if self.l2_strength == 0:
                return self.l0_strength * (exc_p1 + inh_p1).sum()
            else:
                exc_weight_decay_ungated = (.5 * self.l2_strength *
                                            self.exc_weight.pow(2))
                inh_weight_decay_ungated = (.5 * self.l2_strength *
                                            self.inh_weight.pow(2))
                exc_weight_l2_l0 = torch.sum(
                    (exc_weight_decay_ungated + self.l0_strength) * exc_p1)
                inh_weight_l2_l0 = torch.sum(
                    (inh_weight_decay_ungated + self.l0_strength) * inh_p1)
                bias_l2 = (0 if not self.use_bias else torch.sum(
                    .5 * self.l2_strength * self.bias.pow(2)))
                return exc_weight_l2_l0 + inh_weight_l2_l0 + bias_l2
        else:
            return 0

    def get_inference_mask(self):
        exc_p1, inh_p1 = self.get_gate_probabilities()

        if self.deterministic:
            exc_mask = (exc_p1 >= 0.5).float()
            inh_mask = (inh_p1 >= 0.5).float()
            return exc_mask, inh_mask
        else:
            exc_count1 = exc_p1.sum(dim=1).round().int()
            inh_count1 = inh_p1.sum(dim=1).round().int()

            # pytorch doesn't offer topk with varying k values.
            exc_mask = torch.zeros_like(exc_p1)
            inh_mask = torch.zeros_like(inh_p1)
            for i in range(exc_count1.size()[0]):
                _, exc_indices = torch.topk(exc_p1[i], exc_count1[i].item())
                _, inh_indices = torch.topk(inh_p1[i], inh_count1[i].item())
                exc_mask[i].scatter_(-1, exc_indices, 1)
                inh_mask[i].scatter_(-1, inh_indices, 1)

            return exc_mask, inh_mask

    def sample_weight_and_bias(self):
        if self.training or not self.optimize_inference:
            w = (sample_weight(self.exc_p1, self.exc_weight,
                               self.deterministic) -
                 sample_weight(self.inh_p1, self.inh_weight,
                               self.deterministic))
        else:
            exc_mask, inh_mask = self.get_inference_mask()
            w = exc_mask * self.exc_weight - inh_mask * self.inh_weight

        b = None
        if self.use_baseline_bias:
            b = -w.sum(dim=-1) / 2

        if self.use_bias:
            b = (b + self.bias if b is not None else self.bias)

        return w, b

    def forward(self, x):
        if self.one_sample_per_item and self.training and len(x.size()) > 1:
            results = []
            for i in range(x.size(0)):
                w, b = self.sample_weight_and_bias()
                results.append(F.linear(x[i:i + 1], w, b))
            return torch.cat(results)
        else:
            w, b = self.sample_weight_and_bias()
            return F.linear(x, w, b)
            return self._forward(x)

    def get_expected_nonzeros(self):
        exc_p1, inh_p1 = self.get_gate_probabilities()

        # Flip two coins with probabilities pi_1 and pi_2. What is the
        # probability one of them is 1?
        #
        # 1 - (1 - pi_1)*(1 - pi_2)
        # = 1 - 1 + pi_1 + pi_2 - pi_1*pi_2
        # = pi_1 + pi_2 - pi_1*pi_2
        p1 = exc_p1 + inh_p1 - (exc_p1 * inh_p1)

        return p1.sum(dim=1).detach()

    def get_inference_nonzeros(self):
        exc_mask, inh_mask = self.get_inference_mask()

        return torch.sum(exc_mask.int() | inh_mask.int(), dim=1)

    def count_inference_flops(self):
        # For each unit, multiply with its n inputs then do n - 1 additions.
        # To capture the -1, subtract it, but only in cases where there is at
        # least one weight.
        nz_by_unit = self.get_inference_nonzeros()
        multiplies = torch.sum(nz_by_unit)
        adds = multiplies - torch.sum(nz_by_unit > 0)
        return multiplies.item(), adds.item()

Example #4

Show file

class BinaryGatedConv2d(Module):
    """
    Convolutional layer with binary stochastic gates
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 learn_weight=True,
                 bias=True,
                 droprate_init=0.5,
                 l2_strength=1.,
                 l0_strength=1.,
                 random_weight=True,
                 deterministic=False,
                 use_baseline_bias=False,
                 optimize_inference=True,
                 one_sample_per_item=False,
                 **kwargs):
        """
        :param in_channels: Number of input channels
        :param out_channels: Number of output channels
        :param kernel_size: Size of the kernel
        :param stride: Stride for the convolution
        :param padding: Padding for the convolution
        :param dilation: Dilation factor for the convolution
        :param groups: How many groups we will assume in the convolution
        :param bias: Whether we will use a bias
        :param droprate_init: Dropout rate that the gates will be initialized to
        :param l2_strength: Strength of the L2 penalty
        :param l0_strength: Strength of the L0 penalty
        """
        super(BinaryGatedConv2d, self).__init__()
        if in_channels % groups != 0:
            raise ValueError("in_channels must be divisible by groups")
        if out_channels % groups != 0:
            raise ValueError("out_channels must be divisible by groups")
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.l2_strength = l2_strength
        self.l0_strength = l0_strength
        self.droprate_init = droprate_init if droprate_init != 0. else 0.5
        self.deterministic = deterministic
        self.use_baseline_bias = use_baseline_bias
        self.optimize_inference = optimize_inference
        self.one_sample_per_item = one_sample_per_item

        self.random_weight = random_weight
        if random_weight:
            exc_weight = torch.Tensor(out_channels, in_channels // groups,
                                      *self.kernel_size)
            inh_weight = torch.Tensor(out_channels, in_channels // groups,
                                      *self.kernel_size)
        else:
            exc_weight = torch.ones(out_channels, in_channels // groups,
                                    *self.kernel_size)
            inh_weight = torch.ones(out_channels, in_channels // groups,
                                    *self.kernel_size)

        if learn_weight:
            self.exc_weight = Parameter(exc_weight)
            self.inh_weight = Parameter(inh_weight)
        else:
            self.register_buffer("exc_weight", exc_weight)
            self.register_buffer("inh_weight", inh_weight)
        self.exc_p1 = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.inh_p1 = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.dim_z = out_channels
        self.input_shape = None

        self.use_bias = bias
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))

        self.reset_parameters()

    def reset_parameters(self):
        if self.random_weight:
            init.kaiming_normal_(self.exc_weight, mode="fan_out")
            init.kaiming_normal_(self.inh_weight, mode="fan_out")
            self.exc_weight.data.abs_()
            self.inh_weight.data.abs_()
        self.exc_p1.data.normal_(1 - self.droprate_init, 1e-2)
        self.inh_p1.data.normal_(1 - self.droprate_init, 1e-2)

        if self.use_bias:
            self.bias.data.fill_(0)

    def constrain_parameters(self, **kwargs):
        self.exc_weight.data.clamp_(min=0.)
        self.inh_weight.data.clamp_(min=0.)

    def weight_size(self):
        return self.exc_weight.size()

    def regularization(self):
        """
        Expected L0 norm under the stochastic gates, takes into account and
        re-weights also a potential L2 penalty
        """

        if self.l0_strength > 0 or self.l2_strength > 0:
            # Clamp these, but do it in a way that still always propagates the
            # gradient.
            exc_p1 = self.exc_p1.clone()
            torch.clamp(exc_p1.data, min=0, max=1, out=exc_p1.data)
            inh_p1 = self.inh_p1.clone()
            torch.clamp(inh_p1.data, min=0, max=1, out=inh_p1.data)

            if self.l2_strength == 0:
                return self.l0_strength * (exc_p1 + inh_p1).sum()
            else:
                exc_weight_decay_ungated = (.5 * self.l2_strength *
                                            self.exc_weight.pow(2))
                inh_weight_decay_ungated = (.5 * self.l2_strength *
                                            self.inh_weight.pow(2))
                exc_weight_l2_l0 = torch.sum(
                    (exc_weight_decay_ungated + self.l0_strength) * exc_p1)
                inh_weight_l2_l0 = torch.sum(
                    (inh_weight_decay_ungated + self.l0_strength) * inh_p1)
                bias_l2 = (0 if not self.use_bias else torch.sum(
                    .5 * self.l2_strength * self.bias.pow(2)))
                return exc_weight_l2_l0 + inh_weight_l2_l0 + bias_l2
        else:
            return 0

    def get_gate_probabilities(self):
        exc_p1 = torch.clamp(self.exc_p1.data, min=0., max=1.)
        inh_p1 = torch.clamp(self.inh_p1.data, min=0., max=1.)
        return exc_p1, inh_p1

    def get_inference_mask(self):
        exc_p1, inh_p1 = self.get_gate_probabilities()

        if self.deterministic:
            exc_mask = (exc_p1 >= 0.5).float()
            inh_mask = (inh_p1 >= 0.5).float()
            return exc_mask, inh_mask
        else:
            exc_count1 = exc_p1.sum(
                dim=tuple(range(1, len(exc_p1.shape)))).round().int()
            inh_count1 = inh_p1.sum(
                dim=tuple(range(1, len(inh_p1.shape)))).round().int()

            # pytorch doesn't offer topk with varying k values.
            exc_mask = torch.zeros_like(exc_p1)
            inh_mask = torch.zeros_like(inh_p1)
            for i in range(exc_count1.size()[0]):
                _, exc_indices = torch.topk(exc_p1[i].flatten(),
                                            exc_count1[i].item())
                _, inh_indices = torch.topk(inh_p1[i].flatten(),
                                            inh_count1[i].item())
                exc_mask[i].flatten().scatter_(-1, exc_indices, 1)
                inh_mask[i].flatten().scatter_(-1, inh_indices, 1)

            return exc_mask, inh_mask

    def sample_weight_and_bias(self, samples=1):
        if self.training or not self.optimize_inference:
            w = (sample_weight(self.exc_p1, self.exc_weight,
                               self.deterministic, samples) -
                 sample_weight(self.inh_p1, self.inh_weight,
                               self.deterministic, samples))
        else:
            exc_mask, inh_mask = self.get_inference_mask()
            w = exc_mask * self.exc_weight - inh_mask * self.inh_weight

        b = None
        if self.use_baseline_bias:
            b = -w.sum(dim=(-3, -2, -1)) / 2

        if self.use_bias:
            b = (b + self.bias if b is not None else self.bias)

        return w, b

    def forward(self, x):
        if self.input_shape is None:
            self.input_shape = x.size()

        if self.one_sample_per_item and self.training and len(x.size()) > 3:
            w, b = self.sample_weight_and_bias(x.size(0))

            if self.use_baseline_bias:
                b = b.view(x.size(0) * self.out_channels)
            else:
                b = b.repeat(x.size(0))

            x_ = x.view(1, x.size(0) * x.size(1), *x.size()[2:])
            w_ = w.view(w.size(0) * w.size(1), *w.size()[2:])
            result = F.conv2d(x_, w_, b, self.stride, self.padding,
                              self.dilation,
                              x.size(0) * self.groups)

            return result.view(x.size(0), self.out_channels,
                               *result.size()[2:])
        else:
            w, b = self.sample_weight_and_bias()
            return F.conv2d(x, w, b, self.stride, self.padding, self.dilation,
                            self.groups)

    def get_expected_nonzeros(self):
        exc_p1, inh_p1 = self.get_gate_probabilities()

        # Flip two coins with probabilities pi_1 and pi_2. What is the
        # probability one of them is 1?
        #
        # 1 - (1 - pi_1)*(1 - pi_2)
        # = 1 - 1 + pi_1 + pi_2 - pi_1*pi_2
        # = pi_1 + pi_2 - pi_1*pi_2
        p1 = exc_p1 + inh_p1 - (exc_p1 * inh_p1)

        return p1.sum(dim=tuple(range(1, len(p1.shape)))).detach()

    def get_inference_nonzeros(self):
        exc_mask, inh_mask = self.get_inference_mask()
        return torch.sum(exc_mask.int() | inh_mask.int(),
                         dim=tuple(range(1, len(exc_mask.shape))))

    def count_inference_flops(self):
        # For each unit, multiply with n inputs then do n - 1 additions.
        # Only subtract 1 in cases where is at least one weight.
        nz_by_unit = self.get_inference_nonzeros()
        multiplies_per_instance = torch.sum(nz_by_unit)
        adds_per_instance = multiplies_per_instance - torch.sum(nz_by_unit > 0)

        # for rows
        instances = ((self.input_shape[-2] - self.kernel_size[0] +
                      2 * self.padding[0]) / self.stride[0]) + 1
        # multiplying with cols
        instances *= ((self.input_shape[-1] - self.kernel_size[1] +
                       2 * self.padding[1]) / self.stride[1]) + 1

        multiplies = multiplies_per_instance * instances
        adds = adds_per_instance * instances

        return multiplies.item(), adds.item()

Example #5

Show file

class group_relaxed_L1L2Conv2d(Module):
    """Implementation of TF1 regularization for the feature maps of a convolutional layer"""
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 lamba=1.,
                 alpha=1.,
                 beta=4.,
                 weight_decay=1.,
                 **kwargs):
        """
		:param in_channels: Number of input channels
		:param out_channels: Number of output channels
		:param kernel_size: size of the kernel
		:param stride: stride for the convolution
		:param padding: padding for the convolution
		:param dilation: dilation factor for the convolution
		:param groups: how many groups we will assume in the convolution
		:param bias: whether we will use a bias
		:param lamba: strength of the TFL regularization
		"""
        super(group_relaxed_L1L2Conv2d, self).__init__()
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = pair(kernel_size)
        self.stride = pair(stride)
        self.padding = pair(padding)
        self.dilation = pair(dilation)
        self.output_padding = pair(0)
        self.groups = groups
        self.lamba = lamba
        self.alpha = alpha
        self.beta = beta
        self.lamba1 = self.lamba / self.beta
        self.weight_decay = weight_decay
        self.weight = Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *self.kernel_size))
        self.u = torch.rand(out_channels, in_channels // groups,
                            *self.kernel_size)
        self.u = self.u.to('cuda')
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.input_shape = None
        print(self)

    def reset_parameters(self):
        init.kaiming_normal(self.weight, mode='fan_in')

        if self.bias is not None:
            self.bias.data.normal_(0, 1e-2)

    def constrain_parameters(self, **kwargs):
        norm_w = self.weight.data.norm(p=float('inf'))
        if norm_w > self.lamba1:
            m = Softshrink(self.lamba1)
            z = m(self.weight.data)
            self.u.data = z * (z.data.norm(p=2) +
                               self.alpha * self.lamba1) / (z.data.norm(p=2))
        elif norm_w == self.lamba1:
            self.u = self.weight.clone()
            self.u[self.u.abs() < lamba1] = 0
            n = torch.sum(self.u != 0)
            self.u[self.u != 0] = self.weight.sign(
            ) * self.alpha * self.lamba1 / (n**(1 / 2))

        elif (1 - self.alpha) * self.lamba1 < norm_w and norm_w < self.lamba1:
            self.u = self.weight.clone()
            max_idx = np.unravel_index(torch.argmax(self.u.cpu(), None),
                                       self.u.shape)
            max_value_sign = self.u[max_idx].sign()
            self.u[:] = 0
            self.u[max_idx] = (norm_w +
                               (self.alpha - 1) * self.lamba1) * max_value_sign
        else:
            self.u = self.weight.clone()
            self.u[:] = 0

    def grow_beta(self, growth_factor):
        self.beta = self.beta * growth_factor
        self.lamba1 = self.lamba / self.beta

    def _reg_w(self, **kwargs):
        logpw = -self.beta * torch.sum(
            0.5 * self.weight.add(-self.u).pow(2)) - self.lamba * np.sqrt(
                self.in_channels * self.kernel_size[0] * self.kernel_size[1]
            ) * torch.sum(
                torch.pow(torch.sum(self.weight.pow(2), 3).sum(2).sum(1), 0.5))
        logpb = 0
        if self.bias is not None:
            logpb = -torch.sum(self.weight_decay * .5 * (self.bias.pow(2)))
        return logpw + logpb

    def regularization(self):
        return self._reg_w()

    def count_zero_u(self):
        total = np.prod(self.u.size())
        zero = total - self.u.nonzero().size(0)
        return zero

    def count_zero_w(self):
        return torch.sum((self.weight.abs() < 1e-5).int()).item()

    def count_active_neuron(self):
        return torch.sum((torch.sum(self.weight.abs(), 3).sum(2).sum(1) /
                          (self.in_channels * self.kernel_size[0] *
                           self.kernel_size[1])) > 1e-5).item()

    def count_total_neuron(self):
        return self.out_channels

    def count_weight(self):
        return np.prod(self.u.size())

    def count_expected_flops_and_l0(self):
        #ppos = self.out_channels
        ppos = torch.sum(
            torch.sum(self.weight.abs(), 3).sum(2).sum(1) > 0.001).item()
        n = self.kernel_size[0] * self.kernel_size[1] * self.in_channels
        flops_per_instance = n + (n - 1)

        num_instances_per_filter = (
            (self.input_shape[1] - self.kernel_size[0] + 2 * self.padding[0]) /
            self.stride[0]) + 1
        num_instances_per_filter *= (
            (self.input_shape[2] - self.kernel_size[1] + 2 * self.padding[1]) /
            self.stride[1]) + 1

        flops_per_filter = num_instances_per_filter * flops_per_instance
        expected_flops = flops_per_filter * ppos
        expected_l0 = n * ppos

        if self.bias is not None:
            expected_flops += num_instances_per_filter * ppos
            expected_l0 += ppos
        return expected_flops, expected_l0

    def forward(self, input_):
        if self.input_shape is None:
            self.input_shape = input_.size()
        output = F.conv2d(input_, self.weight, self.bias, self.stride,
                          self.padding, self.dilation, self.groups)
        return output

    def __repr__(self):
        s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size} '
             ', stride={stride}')
        if self.padding != (0, ) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1, ) * len(self.dilation):
            s += ', dilation={dilation}'
        if self.output_padding != (0, ) * len(self.output_padding):
            s += ', output_padding={output_padding}'
        if self.groups != 1:
            s += ', groups={groups}'
        if self.bias is None:
            s += ', bias=False'
        s += ')'
        return s.format(name=self.__class__.__name__, **self.__dict__)

Example #6

Show file

class group_relaxed_L1L2Dense(Module):
    """Implementation of TFL regularization for the input units of a fully connected layer"""
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 lamba=1.,
                 alpha=1.,
                 beta=4.,
                 weight_decay=1.,
                 **kwargs):
        """
		:param in_features: input dimensionality
		:param out_features: output dimensionality
		:param bias: whether we use bias
		:param lamba: strength of the TF1 regularization
		"""
        super(group_relaxed_L1L2Dense, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(in_features, out_features))
        self.u = torch.rand(in_features, out_features)
        self.u = self.u.to('cuda')
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.lamba = lamba
        self.alpha = alpha
        self.beta = beta
        self.lamba1 = self.lamba / self.beta
        self.weight_decay = weight_decay
        self.floatTensor = torch.FloatTensor if not torch.cuda.is_available(
        ) else torch.cuda.FloatTensor
        self.reset_parameters()
        print(self)

    def reset_parameters(self):
        init.kaiming_normal(self.weight, mode='fan_out')

        if self.bias is not None:
            self.bias.data.normal_(0, 1e-2)

    def constrain_parameters(self, **kwargs):
        norm_w = self.weight.data.norm(p=float('inf'))
        if norm_w > self.lamba1:
            m = Softshrink(self.lamba1)
            z = m(self.weight.data)
            self.u.data = z * (z.data.norm(p=2) +
                               self.alpha * self.lamba1) / (z.data.norm(p=2))
        elif norm_w == self.lamba1:
            self.u = self.weight.clone()
            self.u[self.u.abs() < lamba1] = 0
            n = torch.sum(self.u != 0)
            self.u[self.u != 0] = self.weight.sign(
            ) * self.alpha * self.lamba1 / (n**(1 / 2))

        elif (1 - self.alpha) * self.lamba1 < norm_w and norm_w < self.lamba1:
            self.u = self.weight.clone()
            max_idx = np.unravel_index(torch.argmax(self.u.cpu(), None),
                                       self.u.shape)
            max_value_sign = self.u[max_idx].sign()
            self.u[:] = 0
            self.u[max_idx] = (norm_w +
                               (self.alpha - 1) * self.lamba1) * max_value_sign
        else:
            self.u = self.weight.clone()
            self.u[:] = 0

    def grow_beta(self, growth_factor):
        self.beta = self.beta * growth_factor
        self.lamba1 = self.lamba / self.beta

    def _reg_w(self, **kwargs):
        logpw = -self.beta * torch.sum(
            0.5 * self.weight.add(-self.u).pow(2)) - self.lamba * np.sqrt(
                self.out_features) * torch.sum(
                    torch.pow(torch.sum(self.weight.pow(2), 1), 0.5))
        logpb = 0
        if self.bias is not None:
            logpb = -torch.sum(self.weight_decay * .5 * (self.bias.pow(2)))
        return logpw + logpb

    def regularization(self):
        return self._reg_w()

    def count_zero_u(self):
        total = np.prod(self.u.size())
        zero = total - self.u.nonzero().size(0)
        return zero

    def count_zero_w(self):
        return torch.sum((self.weight.abs() < 1e-5).int()).item()

    def count_weight(self):
        return np.prod(self.u.size())

    def count_active_neuron(self):
        return torch.sum(
            torch.sum(self.weight.abs() / self.out_features, 1) > 1e-5).item()

    def count_total_neuron(self):
        return self.in_features

    def count_expected_flops_and_l0(self):
        ppos = torch.sum(self.weight.abs() > 0.000001).item()
        expected_flops = (2 * ppos - 1) * self.out_features
        expected_l0 = ppos * self.out_features
        if self.bias is not None:
            expected_flops += self.out_features
            expected_l0 += self.out_features
        return expected_flops, expected_l0

    def forward(self, input):
        output = input.mm(self.weight)
        if self.bias is not None:
            output.add_(self.bias.view(1, self.out_features).expand_as(output))
        return output

    def __repr__(self):
        return self.__class__.__name__+' (' \
         + str(self.in_features) + ' -> ' \
         + str(self.out_features) + ', lambda: ' \
         + str(self.lamba) + ')'

Example #7

Show file

class MFLinearLayer(nn.Module):
    def __init__(self, dim_in, dim_out, prior_var=1, init_var=-7):
        super().__init__()
        self.init_var = init_var
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.W_mean = Parameter(torch.Tensor(dim_out, dim_in))
        self.b_mean = Parameter(torch.Tensor(dim_out))

        self.W_var = Parameter(torch.Tensor(dim_out, dim_in))
        self.b_var = Parameter(torch.Tensor(dim_out))

        self.W_prior_mean = torch.zeros([dim_out, dim_in], device=device)
        self.b_prior_mean = torch.zeros([dim_out], device=device)

        self.prior_var = prior_var

        self.W_prior_var = torch.ones([dim_out, dim_in], device=device).mul(
            np.log(self.prior_var))
        self.b_prior_var = torch.ones([dim_out], device=device).mul(
            np.log(self.prior_var))

        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.W_mean, a=math.sqrt(5))

        fan_in, _ = init._calculate_fan_in_and_fan_out(self.W_mean)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.b_mean, -bound, bound)

        init.constant_(self.W_var, self.init_var)
        init.constant_(self.b_var, self.init_var)

    def add_new_task(self, reset_variance=True):
        self.W_prior_mean = self.W_mean.clone().detach().requires_grad_(False)
        self.b_prior_mean = self.b_mean.clone().detach().requires_grad_(False)

        self.W_prior_var = self.W_var.clone().detach().requires_grad_(False)
        self.b_prior_var = self.b_var.clone().detach().requires_grad_(False)

        if reset_variance:
            self.W_var.data = torch.min(
                self.W_var,
                self.init_var * torch.ones_like(self.W_var).data)
            self.b_var.data = torch.min(
                self.b_var,
                self.init_var * torch.ones_like(self.b_var).data)

            fan_in, _ = init._calculate_fan_in_and_fan_out(self.W_mean)
            bound = 1 / math.sqrt(fan_in)

            initialization_noise = torch.empty_like(self.W_mean)
            init.kaiming_uniform_(initialization_noise, a=math.sqrt(5))
            # self.W_mean.data = self.W_mean.data + (self.W_var > -2).float() * initialization_noise
            # self.b_mean.data = self.b_mean.data + (self.b_var > -2).float() * torch.empty_like(self.b_mean).uniform_(-bound, bound)

            self.W_mean.data = initialization_noise.data
            self.b_mean.data = torch.empty_like(self.b_mean).uniform_(
                -bound, bound).data

    def get_kl(self, lamb):
        W_kl = compute_kl(self.W_mean,
                          self.W_var,
                          self.W_prior_mean,
                          self.W_prior_var,
                          lamb=lamb,
                          initial_prior_var=self.prior_var)
        b_kl = compute_kl(self.b_mean,
                          self.b_var,
                          self.b_prior_mean,
                          self.b_prior_var,
                          lamb=lamb,
                          initial_prior_var=self.prior_var)
        return W_kl + b_kl

    def forward(self, x):
        output_mean = x.matmul(
            self.W_mean.t()) + self.b_mean.unsqueeze(0).unsqueeze(0)
        output_std = torch.sqrt(
            (x**2).matmul(torch.exp(self.W_var.t())) +
            torch.exp(self.b_var).unsqueeze(0).unsqueeze(0))
        eps = torch.empty(output_mean.shape, device=device).normal_(mean=0,
                                                                    std=1)

        output = output_mean + (eps * output_std)
        return output

Example #8

Show file

class MFConvLayer(torch.nn.modules.conv._ConvNd):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 padding_mode='zeros',
                 prior_var=1,
                 init_var=-7):
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super().__init__(in_channels, out_channels, kernel_size, stride,
                         padding, dilation, False, _pair(0), groups, bias,
                         padding_mode)

        self.init_var = init_var

        self.W_prior_mean = torch.zeros(self.weight.shape, device=device)
        self.b_prior_mean = torch.zeros(self.bias.shape, device=device)

        self.prior_var = prior_var
        self.W_prior_var = torch.ones(self.weight.shape, device=device).mul(
            np.log(self.prior_var))
        self.b_prior_var = torch.ones(self.bias.shape, device=device).mul(
            np.log(self.prior_var))

        self.weight_var = Parameter(torch.Tensor(self.weight.shape))
        self.bias_var = Parameter(torch.Tensor(self.bias.shape))

        self.reset_parameters()

    def conv2d_forward(self, input, weight, bias):
        if self.padding_mode == 'circular':
            expanded_padding = ((self.padding[1] + 1) // 2,
                                self.padding[1] // 2,
                                (self.padding[0] + 1) // 2,
                                self.padding[0] // 2)
            return F.conv2d(F.pad(input, expanded_padding,
                                  mode='circular'), weight, bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, bias, self.stride, self.padding,
                        self.dilation, self.groups)

    def reset_parameters(self):
        super().reset_parameters()
        if hasattr(self, 'weight_var'):
            init.constant_(self.weight_var, self.init_var)
            init.constant_(self.bias_var, self.init_var)

    def add_new_task(self):
        self.W_prior_mean = self.weight.clone().detach().requires_grad_(False)
        self.b_prior_mean = self.bias.clone().detach().requires_grad_(False)

        self.W_prior_var = self.weight_var.clone().detach().requires_grad_(
            False)
        self.b_prior_var = self.bias_var.clone().detach().requires_grad_(False)

        self.weight_var.data = torch.min(
            self.weight_var,
            self.init_var * torch.ones_like(self.weight_var).data)
        self.bias_var.data = torch.min(
            self.bias_var,
            self.init_var * torch.ones_like(self.bias_var).data)

        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
        bound = 1 / math.sqrt(fan_in)

        initialization_noise = torch.empty_like(self.weight)
        init.kaiming_uniform_(initialization_noise, a=math.sqrt(5))
        # self.weight.data = self.weight.data + (self.weight_var > -2).float() * initialization_noise
        # self.bias.data = self.bias.data + (self.bias_var > -2).float() * torch.empty_like(self.bias).uniform_(-bound, bound)

        self.weight.data = initialization_noise.data
        self.bias.data = torch.empty_like(self.bias).uniform_(-bound,
                                                              bound).data

    def get_kl(self, lamb):
        W_kl = compute_kl(self.weight,
                          self.weight_var,
                          self.W_prior_mean,
                          self.W_prior_var,
                          lamb=lamb,
                          initial_prior_var=self.prior_var)
        b_kl = compute_kl(self.bias,
                          self.bias_var,
                          self.b_prior_mean,
                          self.b_prior_var,
                          lamb=lamb,
                          initial_prior_var=self.prior_var)

        return W_kl + b_kl

    def forward(self, input):
        output_mean = self.conv2d_forward(input, self.weight, self.bias)
        output_var = self.conv2d_forward(input**2, torch.exp(self.weight_var),
                                         torch.exp(self.bias_var))

        eps = torch.empty(output_mean.shape, device=device).normal_(mean=0,
                                                                    std=1)
        output = output_mean + torch.sqrt(output_var + 1e-9) * eps

        return output

Example #9

Show file

class my_Linear(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(my_Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self._mode = 0
        self._verbose = False
        self._bverbose = False
        self._value = None     ## save mav value
        self._index = None     ## save max position

    def setMode(self, m):
        self._mode = m

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input):
        tweight = self.weight.clone()
        if(self._mode == 2):  ## find path
            if(input.shape[0] > 1):  ## max & min input
                max_input = input[0].clone().unsqueeze(0)  ## max
                min_input = input[1].clone().unsqueeze(0)  ## min
            else:                    ## max only
                max_input = input.clone()
                min_input = input.clone() * 0

            maxpos = None
            if self._verbose:
                print('== input ==')
                print(input.shape)
                print(input)
                print('== weight ==')
                print(self.weight.shape)
                print(self.weight)
                print('== bias ==')
                print(self.bias)
            tx = []
            tx_min = []
            ws = self.weight.shape
            bias = self.bias.clone()
            bias *= 0
            print('linear max node : ', ws[1], file=sys.stderr)
            for py in range(ws[1]):  ## out-feature
                tweight *= 0
                tweight[:,py] = self.weight[:,py].data 
                tx.append(F.linear(max_input, tweight, bias))
                tx_min.append(F.linear(min_input, tweight, bias))
                if(py % 100 == 0):
                    print('processed node : %d \r' % py, file=sys.stderr, end='')
                if self._verbose:
                    print('===iter ', py, ' ===')
                    print(tweight)
                    print(tx[py])
                    print(tx_min[py])
            ## make maximum result
            maxv = torch.max(torch.stack(tx + tx_min), axis=0)
            minv = torch.min(torch.stack(tx + tx_min), axis=0)

            self._value = maxv[0].data
            self._value_min = minv[0].data

            maxi = maxv[1].data
            maxi[ maxi >= ws[1] ] *= -1
            maxi[ maxi < 0 ] += (ws[1]-1)  ## -1 부터 시작되도록
            self._index = maxi.data

            mini = minv[1].data
            mini[ mini >= ws[1] ] *= -1
            mini[ mini < 0 ] += (ws[1]-1)
            self._index_min = mini.data

            if self._verbose:
                #print(torch.stack(tx + tx_min))
                print(self._value) 
                print(self._index) 
                print(self._value_min) 
                print(self._index_min) 

            return torch.cat([self._value, self._value_min])
            
        elif(self._mode == 1): ## normal mode
            return F.linear(input, self.weight, self.bias)

        else:
            return F.linear(input, self.weight, self.bias)
        
    def getValue(self, pos):
        if(pos >= 0):
            v = self._value.flatten()[pos] ## position  
        else:
            npos = -1 * (pos+1) ## begins from -1
            v = self._value_min.flatten()[npos]
        return v   

    def getIndex(self, pos):
        if(pos >= 0):
            tpos = self._index.flatten()[pos].item() ## 
        else: 
            npos = -1 * (pos+1) ## begins from -1
            tpos = self._index_min.flatten()[npos].item()
        return tpos

    def getOutShape(self):
        if(self._value is None): return None
        return self._value.shape

    def getWeight(self, cpos, upos):   ## cpos : current, upos : under pos
        if(cpos < 0):
            cpos = -1 * (cpos+1)
        if(upos < 0):
            upos = -1 * (upos+1)
        return self.weight[cpos, upos]

    def backward(self, input):
        ##1. use last tensor ( upper layer result)
        current_pos = int(input[-1, 0].item())               ## current position
        current_val = self.getValue(current_pos) 
        input[-1,1] = current_val  ## set current val
        ##2. make under layer information
        under_pos = self.getIndex(current_pos)
        under_out = torch.tensor([[under_pos, current_val, 0.0, 0.0]])
        #for saving weight
        weight = self.getWeight(current_pos, under_pos)
        input[-1,2] = weight.data
        out = torch.cat([input, under_out], dim=0)
        if self._bverbose:
            print('=== linear backwrd ===')
            print('selected class = ', current_pos)
            print('max value = ', current_val)
            print('position in under layer = ',  under_pos)
            print('used weigh = ', weight)
            print('-- input')
            print(input)
            print('-- output')
            print(out)
            print('======')
        
        return out

    def back_candidate(self, path, underpath, not_input):
        p = []
        cp = int(path[0].item())
        up = int(underpath[0].item())
        for px in range(self.weight.shape[1]):
            if(px == up): continue  ## check identity
            tweight = self.weight[cp, px]
            p.append(torch.tensor([px, tweight, 0.0]))      
            if(not_input):
                p.append(torch.tensor([-1*(px+1), tweight, 0.0]))      
        return p

    def path_forward(self, input_val, path):
        cpos = int(path[0].item())    # [cpos, value, weight]
        if(input_val is None):
            return self.getValue(cpos)
        
        cweight = path[2]
        return input_val * cweight 

    def extra_repr(self):
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )

Example #10

Show file

class DenseFCLayer(torch.nn.Module):
    def __init__(self,
                 n_inputs=None,
                 n_outputs=None,
                 weights: torch.Tensor = None,
                 use_biases=True,
                 activation=None):
        super(DenseFCLayer, self).__init__()
        if n_inputs is not None and n_outputs is not None:
            self.n_inputs = n_inputs
            self.n_outputs = n_outputs
            self._activation = activation
            self._initial_weights = None

            self._weights = Parameter(torch.Tensor(n_inputs, n_outputs))
            self._init_weights()
            self._mask = torch.ones_like(self._weights)
            self._initial_weights = self._weights.clone()
            self.use_biases = use_biases

            if self.use_biases:
                self._biases = Parameter(torch.Tensor(n_outputs))
                self._init_biases()
        elif weights is not None:
            self.n_inputs = weights.size(0)
            self.n_outputs = weights.size(1)
            self._activation = activation
            self._initial_weights = weights

            self._weights = Parameter(weights)
            self._mask = torch.ones_like(self._weights)

            self._biases = Parameter(torch.Tensor(self.n_outputs))
            self._init_biases()
        else:
            raise ValueError(
                "DenseFClayer class accepts either n_inputs/n_outputs or weights"
            )

    def _init_weights(self):
        # Note the difference between init functions
        # torch.nn.init.xavier_normal_(self._weights)
        # torch.nn.init.xavier_uniform_(self._weights)
        # torch.nn.init.kaiming_normal_(self._weights)
        torch.nn.init.kaiming_uniform_(self._weights)

    def _init_biases(self):
        torch.nn.init.zeros_(self._biases)

    def prune_by_threshold(self, thr):
        self._mask *= (torch.abs(self._weights) >= thr).float()

    def prune_by_rank(self, rank):
        weights_val = self._weights[self._mask == 1]
        sorted_abs_weights = torch.sort(torch.abs(weights_val))[0]
        thr = sorted_abs_weights[rank]
        self.prune_by_threshold(thr)

    def prune_by_pct(self, pct):
        prune_idx = int(self.n_weights * pct)
        self.prune_by_rank(prune_idx)

    def prune_by_pct_taylor(self, pct):
        prune_idx = int(self.n_weights * pct)

        # by abs val
        wg = torch.abs(self._weights[self._mask == 1] *
                       self._weights.grad[self._mask == 1])
        sorted_wg = torch.sort(wg)[0]
        thr = sorted_wg[prune_idx]
        print(thr)
        self._mask *= (torch.abs(self._weights * self._weights.grad) >
                       thr).float()

        # by val
        # wg = self._weights[self._mask == 1] * self._weights.grad[self._mask == 1]
        # sorted_wg = torch.sort(wg)[0]
        # thr = sorted_wg[prune_idx]
        # self._mask *= (self._weights * self._weights.grad >= thr).float()

    def random_prune_by_pct(self, pct):
        prune_idx = int(self.n_weights * pct)
        rand = torch.rand(size=self._mask.size(), device=self._mask.device)
        rand_val = rand[self._mask == 1]
        sorted_abs_rand = torch.sort(rand_val)[0]
        thr = sorted_abs_rand[prune_idx]
        self._mask *= (rand >= thr).float()

    def reinitialize(self):
        self._weights = Parameter(self._initial_weights)
        self._init_biases()  # biases are reinitialized

    def to_sparse(self) -> SparseFCLayer:
        return SparseFCLayer((self._weights * self._mask).t().to_sparse(),
                             self._biases.reshape((-1, 1)), self._activation)

    @classmethod
    def from_sparse(cls, s_layer: SparseFCLayer):
        return cls(weights=s_layer.weights.t().to_dense(),
                   activation=s_layer.activation)

    def to_device(self, device: torch.device):
        self._initial_weights = self._initial_weights.to(device)
        self._mask = self._mask.to(device)

    def forward(self, inputs: torch.Tensor, use_mask=True):
        masked_weights = self._weights
        if use_mask:
            masked_weights = self._weights * self._mask
        if self.use_biases:
            ret = torch.addmm(self._biases, inputs, masked_weights)
        else:
            ret = torch.mm(inputs, masked_weights)
        return ret if self._activation is None else self._activation(ret)

    @property
    def mask(self):
        return self._mask

    @property
    def weights(self):
        return self._weights

    @property
    def activation(self):
        return self._activation

    @property
    def n_weights(self):
        return torch.nonzero(self._mask).size(0)

    @property
    def biases(self):
        if self.use_biases:
            return self._biases
        else:
            return None

    def __str__(self):
        return "DenseFClayer with size {} and activation {}".format(
            (self.n_inputs, self.n_outputs), self._activation)

Example #11

Show file

File: my_module_simplemax.py Project: maygodwithu/mz_interpret

class my_Linear(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(my_Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self._mode = 0
        self._verbose = False
        self._bverbose = True
        self._value = None  ## save mav value
        self._index = None  ## save max position

    def setMode(self, m):
        self._mode = m

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input):
        tweight = self.weight.clone()
        if (self._mode == 2):  ## find path
            maxpos = None
            if self._verbose:
                print('== input ==')
                print(input.shape)
                print(input)
                print('== weight ==')
                print(self.weight.shape)
                print(self.weight)
                print('== bias ==')
                print(self.bias)
            tx = []
            ws = self.weight.shape
            bias = self.bias.clone()
            bias *= 0
            print('linear max node : ', ws[1], file=sys.stderr)
            for py in range(ws[1]):  ## out-feature
                tweight *= 0
                tweight[:, py] = self.weight[:, py].data
                tx.append(F.linear(input, tweight, bias))
                if (py % 1000 == 0):
                    print('processed node : %d \r' % py,
                          file=sys.stderr,
                          end='')
                if self._verbose:
                    print('===iter ', py, ' ===')
                    print(tweight)
                    print(tx[py])
            ## make maximum result
            ts = torch.stack(tx)
            maxv = torch.max(ts, axis=0)

            self._value = maxv[0].data
            self._index = maxv[1].data

            if self._verbose:
                print(self._value)
                print(self._index)

            return self._value

        elif (self._mode == 1):  ## normal mode
            return F.linear(input, self.weight, self.bias)

        else:
            return F.linear(input, self.weight, self.bias)

    def getValue(self, pos):
        return self._value.flatten()[pos]  ## position

    def getIndex(self, pos):
        tpos = self._index.flatten()[pos].item()  ##
        return tpos

    def getOutShape(self):
        return self._value.shape

    def backward(self, input):
        ## use last tensor ( upper layer result)
        current_pos = int(input[-1, 0].item())  ## current position
        current_val = self.getValue(current_pos)
        under_pos = self.getIndex(current_pos)
        under_out = torch.tensor([[under_pos, current_val, 0]])
        out = torch.cat([input, under_out], dim=0)
        if self._bverbose:
            print('=== linear backwrd ===')
            print('selected class = ', current_pos)
            print('max value = ', current_val)
            print('position in under layer = ', under_pos)
            print('-- input')
            print(input)
            print('-- output')
            print(out)
            print('======')

        return out

    def extra_repr(self):
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None)

Example #12

Show file

File: group_relaxed_SCAD_layers.py Project: kbui1993/Official_Nonconvex_SGL

class group_relaxed_SCAD_Dense(Module):
	"""Implementation of TFL regularization for the input units of a fully connected layer"""
	def __init__(self, in_features, out_features, bias=True, lamba=1., alpha = 3.7, beta = 4.0, weight_decay=1., **kwargs):
		"""
		:param in_features: input dimensionality
		:param out_features: output dimensionality
		:param bias: whether we use bias
		:param lamba: strength of the TF1 regularization
		"""
		super(group_relaxed_SCAD_Dense,self).__init__()
		self.in_features = in_features
		self.out_features = out_features
		self.weight = Parameter(torch.Tensor(in_features, out_features))
		self.u = torch.rand(in_features, out_features)
		self.u = self.u.to('cuda')
		if bias:
			self.bias = Parameter(torch.Tensor(out_features))
		else:
			self.register_parameter('bias', None)
		self.lamba = lamba
		self.alpha = alpha
		self.beta = beta
		self.lamba1 = self.lamba/self.beta
		self.weight_decay = weight_decay
		self.floatTensor = torch.FloatTensor if not torch.cuda.is_available() else torch.cuda.FloatTensor
		self.reset_parameters()
		print(self)

	def reset_parameters(self):
		init.kaiming_normal(self.weight, mode='fan_out')

		if self.bias is not None:
			self.bias.data.normal_(0,1e-2)


	def constrain_parameters(self, **kwargs):
		self.u = self.weight.clone()
		s = Softshrink(self.lamba1)
		#shrinkage on values with absolute value less than 2*lamba1
		shrink_value = s(self.weight.data)
		self.u[self.weight.abs()<=2*self.lamba1] = shrink_value[self.weight.abs()<=2*self.lamba1]

		#modify values whose absolute values are between 2*lamba1 and alpha*lamba1
		modify_weight = self.weight.data
		modify_weight = ((self.alpha - 1)*modify_weight-modify_weight.sign()*(3.7*self.lamba1))/(self.alpha -2)
		self.u[(self.weight.abs()>2*self.lamba1) & (self.weight.abs()<=self.alpha*self.lamba1)] = modify_weight[(self.weight.abs()>2*self.lamba1) & (self.weight.abs()<=self.alpha*self.lamba1)]


	def grow_beta(self, growth_factor):
		self.beta = self.beta*growth_factor
		self.lamba1 = self.lamba/self.beta

	def _reg_w(self, **kwargs):
		logpw = -self.beta*torch.sum(0.5*self.weight.add(-self.u).pow(2))-self.lamba*np.sqrt(self.out_features)*torch.sum(torch.pow(torch.sum(self.weight.pow(2),1),0.5))
		logpb = 0
		if self.bias is not None:
			logpb = - torch.sum(self.weight_decay * .5 * (self.bias.pow(2)))
		return logpw + logpb

	def regularization(self):
		return self._reg_w()

	def count_zero_u(self):
		total = np.prod(self.u.size())
		zero = total - self.u.nonzero().size(0)
		return zero

	def count_zero_w(self):
		return torch.sum((self.weight.abs()<1e-5).int()).item()

	def count_weight(self):
		return np.prod(self.u.size())

	def count_active_neuron(self):
		return torch.sum(torch.sum(self.weight.abs()/self.out_features,1)>1e-5).item()

	def count_total_neuron(self):
		return self.in_features

	def count_expected_flops_and_l0(self):
		ppos = torch.sum(self.weight.abs()>0.000001).item()
		expected_flops = (2*ppos-1)*self.out_features
		expected_l0 = ppos*self.out_features
		if self.bias is not None:
			expected_flops += self.out_features
			expected_l0 += self.out_features
		return expected_flops, expected_l0

	def forward(self, input):
		output = input.mm(self.weight)
		if self.bias is not None:
			output.add_(self.bias.view(1, self.out_features).expand_as(output))
		return output

	def __repr__(self):
		return self.__class__.__name__+' (' \
			+ str(self.in_features) + ' -> ' \
			+ str(self.out_features) + ', lambda: ' \
			+ str(self.lamba) + ')'

Example #13

Show file

class ElementWiseConv2d(nn.Module):
    """Modified conv with masks for weights."""
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=False,
                 mask_init='uniform',
                 mask_scale=1e-2,
                 threshold_fn='binarizer',
                 threshold=0.0):
        super(ElementWiseConv2d, self).__init__()
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        self.mask_scale = mask_scale
        self.mask_init = mask_init

        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = False
        self.output_padding = _pair(0)
        self.groups = groups

        # imagenet pretrained weight
        self.imagenet_weight = Parameter(torch.Tensor(out_channels,
                                                      in_channels // groups,
                                                      *kernel_size),
                                         requires_grad=True)

        # place365 weight  no bias now
        self.place365_weight = Parameter(torch.Tensor(out_channels,
                                                      in_channels // groups,
                                                      *kernel_size),
                                         requires_grad=True)

        # Initialize real-valued mask weights.
        self.mask_real = self.imagenet_weight.data.new(
            self.imagenet_weight.size())

        if mask_init == '1s':
            self.mask_real.fill_(mask_scale)

        elif mask_init == 'uniform':
            self.mask_real.uniform_(-1 * mask_scale, mask_scale)

        # mask_real is now a trainable parameter.
        self.mask_real = Parameter(self.mask_real)
        '''
        # changed for audo threshold
        self.threshold = nn.Parameter(torch.Tensor([threshold]), requires_grad = False)
        '''

        # Initialize the thresholder.
        if threshold_fn == 'binarizer':
            print('Calling binarizer with threshold:', threshold)
            self.threshold_fn = Binarizer(threshold=threshold)
        elif threshold_fn == 'ternarizer':
            print('Calling ternarizer with threshold:', threshold)
            self.threshold_fn = Ternarizer(threshold=threshold)

    def forward(self, input):
        # Get binarized/ternarized mask from real-valued mask.
        #mask_thresholded = self.threshold_fn(self.mask_real)

        #mask_thresholded = torch.sigmoid(self.mask_real)
        prob_data = self.mask_real.clone()
        prob_data[self.mask_real.le(0.5)] = 0
        prob_data[self.mask_real.gt(0.5)] = 1
        mask_thresholded = (prob_data -
                            self.mask_real).detach() + self.mask_real

        # changed  for audo threshold
        #mask_thresholded = Binarizer_auto()(self.mask_real+self.threshold)

        # Mask weights with above mask.
        weight_combined = mask_thresholded * self.place365_weight + (
            1 - mask_thresholded) * self.imagenet_weight
        #weight_combined = self.place365_weight

        # Perform conv using modified weight.
        return F.conv2d(input, weight_combined, None, self.stride,
                        self.padding, self.dilation, self.groups)

    def __repr__(self):
        s = ('{name} ({in_channels}, {out_channels}, kernel_size={kernel_size}'
             ', stride={stride}')
        if self.padding != (0, ) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1, ) * len(self.dilation):
            s += ', dilation={dilation}'
        if self.output_padding != (0, ) * len(self.output_padding):
            s += ', output_padding={output_padding}'
        if self.groups != 1:
            s += ', groups={groups}'
        if self.bias is None:
            s += ', bias=False'
        s += ')'
        return s.format(name=self.__class__.__name__, **self.__dict__)

    def _apply(self, fn):
        for module in self.children():
            module._apply(fn)

        for param in self._parameters.values():
            if param is not None:
                # Variables stored in modules are graph leaves, and we don't
                # want to create copy nodes, so we have to unpack the data.
                param.data = fn(param.data)
                if param._grad is not None:
                    param._grad.data = fn(param._grad.data)

        for key, buf in self._buffers.items():
            if buf is not None:
                self._buffers[key] = fn(buf)

        self.imagenet_weight.data = fn(self.imagenet_weight.data)