def __init__( self, in_planes, planes, stride=1, sparse_weights_class=SparseWeights2d, sparsity=None, percent_on=None, ): super(SparsePreActBottleneckNoBN, self).__init__( in_planes, planes, stride=stride ) if sparsity is None: sparsity = {"conv1": 0.01, "conv2": 0.01, "conv3": 0.01, "shortcut": 0.01} if percent_on is None: percent_on = { "nonlinearity1": 0.9, "nonlinearity2": 0.9, "nonlinearity3": 0.9, } # weight sparsity if sparsity["conv1"] > 0.3: self.conv1 = sparse_weights_class( self.conv1, sparsity=sparsity["conv1"], allow_extremes=True ) if sparsity["conv2"] > 0.3: self.conv2 = sparse_weights_class( self.conv2, sparsity=sparsity["conv2"], allow_extremes=True ) if sparsity["conv3"] > 0.3: self.conv3 = sparse_weights_class( self.conv3, sparsity=sparsity["conv3"], allow_extremes=True ) if hasattr(self, "shortcut") and sparsity["shortcut"] > 0.3: self.shortcut = nn.Sequential( sparse_weights_class( self.shortcut._modules["0"], sparsity=sparsity["shortcut"], allow_extremes=True, ) ) # activation sparsity if percent_on["nonlinearity1"] >= 0.5: self.nonlinearity1 = F.relu else: self.nonlinearity1 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity1"] ) if percent_on["nonlinearity2"] >= 0.5: self.nonlinearity2 = F.relu else: self.nonlinearity2 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity2"] ) if percent_on["nonlinearity3"] >= 0.5: self.nonlinearity3 = F.relu else: self.nonlinearity3 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity3"] )
def __init__(self, in_planes, planes, stride=1): super().__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.kw1 = KWinners2d(planes, percent_on=0.1, local=False) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.kw2 = KWinners2d(planes, percent_on=0.1, local=False) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes), KWinners2d(self.expansion * planes, percent_on=0.1, local=False), ) self.stride = stride
def __init__(self, cnn_out_channels=(64, 64), cnn_percent_on=(0.095, 0.125), linear_units=1000, linear_percent_on=0.1, linear_weight_sparsity=0.4, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000): super(GSCSparseCNN, self).__init__( OrderedDict([ # First Sparse CNN layer ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)), ("cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False)), ("cnn1_maxpool", nn.MaxPool2d(2)), ("cnn1_kwinner", KWinners2d(channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Second Sparse CNN layer ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)), ("cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False)), ("cnn2_maxpool", nn.MaxPool2d(2)), ("cnn2_kwinner", KWinners2d(channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), ("flatten", Flatten()), # Sparse Linear layer ("linear", SparseWeights(nn.Linear(25 * cnn_out_channels[1], linear_units), weight_sparsity=linear_weight_sparsity)), ("linear_bn", nn.BatchNorm1d(linear_units, affine=False)), ("linear_kwinner", KWinners(n=linear_units, percent_on=linear_percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Classifier ("output", nn.Linear(linear_units, 12)), ("softmax", nn.LogSoftmax(dim=1)) ]))
def relu_maybe_kwinners2d(channels, density=1.0, k_inference_factor=1.0, boost_strength=1.0, boost_strength_factor=0.9, duty_cycle_period=1000, local=True, inplace=True, break_ties=False, compatibility_mode=True, explicit_relu=False): """ Get a nn.ReLU, possible followed by a KWinners2d :param density: Either a density or a function that returns a density. :type density: float or function(channels) :param compatibility_mode: Insert an nn.Sequential and nn.Identity to maintain compatibility with old checkpoints. :type compatibility_mode: bool :param explicit_relu: Slower, but useful if you need to fuse the relu for quantization. :type explicit_relu: bool """ if callable(density): density = density(channels) if density == 1.0: return nn.ReLU(inplace=inplace) if explicit_relu: return nn.Sequential( nn.ReLU(inplace=inplace), KWinners2d(channels, percent_on=density, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, local=local, break_ties=break_ties, inplace=False)) layer = KWinners2d(channels, percent_on=density, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, local=local, break_ties=break_ties, inplace=inplace, relu=True) if compatibility_mode: # Preserve compatibility with old checkpoints that used an explicit # nn.ReLU before the KWinners. return nn.Sequential(nn.Identity(), layer) else: return layer
def __init__(self, cnn_out_channels=(32, 64), cnn_percent_on=(0.087, 0.293), linear_units=700, linear_percent_on=0.143, linear_weight_sparsity=0.3, boost_strength=1.5, boost_strength_factor=0.85, k_inference_factor=1.5, duty_cycle_period=1000): super(MNISTSparseCNN, self).__init__( OrderedDict([ # First Sparse CNN layer ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)), ("cnn1_maxpool", nn.MaxPool2d(2)), ("cnn1_kwinner", KWinners2d(channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Second Sparse CNN layer ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)), ("cnn2_maxpool", nn.MaxPool2d(2)), ("cnn2_kwinner", KWinners2d(channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), ("flatten", Flatten()), # Sparse Linear layer ("linear", SparseWeights(nn.Linear(16 * cnn_out_channels[1], linear_units), weight_sparsity=linear_weight_sparsity)), ("linear_kwinner", KWinners(n=linear_units, percent_on=linear_percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Classifier ("output", nn.Linear(linear_units, 10)), ("softmax", nn.LogSoftmax(dim=1)) ]))
def __init__(self, in_planes, planes, stride=1, percent_on=None, central_data=None): super(VDropSparsePreActBottleneckNoBN, self).__init__() if percent_on is None: percent_on = { "nonlinearity1": 0.9, "nonlinearity2": 0.9, "nonlinearity3": 0.9, } self.conv1 = VDropConv2d( in_planes, planes, kernel_size=1, central_data=central_data ) self.conv2 = VDropConv2d( planes, planes, kernel_size=3, central_data=central_data, stride=stride, padding=1, ) self.conv3 = VDropConv2d( planes, self.expansion * planes, kernel_size=1, central_data=central_data ) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( VDropConv2d( in_planes, self.expansion * planes, kernel_size=1, central_data=central_data, stride=stride, ) ) # activation sparsity if percent_on["nonlinearity1"] >= 0.5: self.nonlinearity1 = F.relu else: self.nonlinearity1 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity1"] ) if percent_on["nonlinearity2"] >= 0.5: self.nonlinearity1 = F.relu else: self.nonlinearity1 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity2"] ) if percent_on["nonlinearity3"] >= 0.5: self.nonlinearity1 = F.relu else: self.nonlinearity1 = KWinners2d( in_planes, percent_on=percent_on["nonlinearity3"] )
def _kwinners(self, fout): return KWinners2d( channels=fout, percent_on=self.percent_on, boost_strength=self.boost_strength, boost_strength_factor=self.boost_strength_factor, )
def vgg19_bn_kw(config): model = models.vgg19_bn() # remove all fc layers, replace for a single fc layer, from 143mi to 20mi parameters model.classifier = nn.Linear(7 * 7 * 512, config["num_classes"]) new_features = [] for layer in model.features: # remove max pooling if isinstance(layer, nn.MaxPool2d): nn.AvgPool2d(kernel_size=2, stride=2) # store the number of out channels from conv layers elif isinstance(layer, nn.Conv2d): new_features.append(layer) last_conv_out_channels = layer.out_channels # switch ReLU to kWinners2d elif isinstance(layer, nn.ReLU): new_features.append( KWinners2d( channels=last_conv_out_channels, percent_on=config["percent_on"], boost_strength=config["boost_strength"], boost_strength_factor=config["boost_strength_factor"], )) # otherwise add it as normal else: new_features.append(layer) model.features = nn.Sequential(*new_features) return model
def test_k_winners2d_two(self): """ Equal duty cycle, boost_strength=0, percent_on=0.5, batch size=2 """ x = self.x[0:2] n, c, h, w = x.shape kw = KWinners2d( percent_on=0.5, # k=2 channels=c, k_inference_factor=1.0, boost_strength=0.0, duty_cycle_period=1000, local=True ) kw.train(mode=False) expected = torch.zeros_like(x) expected[0, [2, 3], 0, 0] = x[0, [2, 3], 0, 0] expected[0, [1, 3], 0, 1] = x[0, [1, 3], 0, 1] expected[0, [0, 1], 1, 0] = x[0, [0, 1], 1, 0] expected[0, [2, 3], 1, 1] = x[0, [2, 3], 1, 1] expected[1, [2, 3], 0, 0] = x[1, [2, 3], 0, 0] expected[1, [1, 3], 0, 1] = x[1, [1, 3], 0, 1] expected[1, [0, 3], 1, 0] = x[1, [0, 3], 1, 0] expected[1, [0, 2], 1, 1] = x[1, [0, 2], 1, 1] result = kw(x) self.assertEqual(result.shape, expected.shape) num_correct = (result == expected).sum() self.assertEqual(num_correct, result.reshape(-1).size()[0])
def activation_layer( out, layer_params, kernel_size=0 ): """Basic activation layer. Defaults to ReLU if `activation_params` are evaluated from `layer_params`. Otherwise KWinners is used.""" # Compute layer_params for kwinners activation module. if layer_params is not None: activation_params = layer_params.get_activation_params(0, out, kernel_size) else: activation_params = None # Initialize kwinners module as specified. if activation_params is not None: return nn.Sequential( KWinners2d( out, **activation_params ), nn.ReLU(inplace=True) ) else: return nn.ReLU(inplace=True)
def test_k_winners2d_module(self): x = self.x2 kw = KWinners2d( percent_on=0.333, channels=3, k_inference_factor=0.5, boost_strength=1.0, boost_strength_factor=0.5, duty_cycle_period=1000, ) expected = torch.zeros_like(x) expected[0, 0, 1, 0] = 1.1 expected[0, 0, 1, 1] = 1.2 expected[0, 1, 0, 1] = 1.2 expected[0, 2, 1, 0] = 1.3 expected[1, 0, 0, 0] = 1.4 expected[1, 1, 0, 0] = 1.5 expected[1, 1, 0, 1] = 1.6 expected[1, 2, 1, 1] = 1.7 result = kw(x) self.assertEqual(result.shape, expected.shape) num_correct = (result == expected).sum() self.assertEqual(num_correct, result.reshape(-1).size()[0]) new_duty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0 diff = (kw.duty_cycle.reshape(-1) - new_duty).abs().sum() self.assertLessEqual(diff, 0.001)
def test_k_winners2d_module_one(self): x = self.x2 expected = torch.zeros_like(x) expected[0, 0, 1, 0] = x[0, 0, 1, 0] expected[0, 0, 1, 1] = x[0, 0, 1, 1] expected[0, 1, 0, 1] = x[0, 1, 0, 1] expected[0, 2, 1, 0] = x[0, 2, 1, 0] expected[1, 0, 0, 0] = x[1, 0, 0, 0] expected[1, 1, 0, 0] = x[1, 1, 0, 0] expected[1, 1, 0, 1] = x[1, 1, 0, 1] expected[1, 2, 1, 1] = x[1, 2, 1, 1] for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.333, channels=3, k_inference_factor=0.5, boost_strength=1.0, boost_strength_factor=0.5, duty_cycle_period=1000, local=False, break_ties=break_ties, ) result = kw(x) self.assertEqual(result.shape, expected.shape) num_correct = (result == expected).sum() self.assertEqual(num_correct, result.reshape(-1).size()[0]) new_duty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0 diff = (kw.duty_cycle.reshape(-1) - new_duty).abs().sum() self.assertLessEqual(diff, 0.001)
def testKWinners2dModule(self): x = self.x2 kw = KWinners2d(n=12, k=4, channels=3, kInferenceFactor=0.5, boostStrength=1.0, boostStrengthFactor=0.5, dutyCyclePeriod=1000) expected = torch.zeros_like(x) expected[0, 0, 1, 0] = 1.1 expected[0, 0, 1, 1] = 1.2 expected[0, 1, 0, 1] = 1.2 expected[0, 2, 1, 0] = 1.3 expected[1, 0, 0, 0] = 1.4 expected[1, 1, 0, 0] = 1.5 expected[1, 1, 0, 1] = 1.6 expected[1, 2, 1, 1] = 1.7 result = kw(x) self.assertEqual(result.shape, expected.shape) numCorrect = (result == expected).sum() self.assertEqual(numCorrect, result.reshape(-1).size()[0]) newDuty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0 diff = (kw.dutyCycle.reshape(-1) - newDuty).abs().sum() self.assertLessEqual(diff, 0.001)
def __init__(self): super(MLPAutoEncoder, self).__init__() self.dense1_encode = nn.Linear(in_features=7 * 7, out_features=25) self.dense2_encode = nn.Linear(in_features=25, out_features=128) self.k_winner = KWinners2d(channels=128, percent_on=percent_on, boost_strength=boost_strength, local=True) self.dense1_decode = nn.Linear(in_features=128, out_features=7 * 7)
def relu_maybe_kwinners2d(channels, density=1.0, k_inference_factor=1.0, boost_strength=1.0, boost_strength_factor=0.9, duty_cycle_period=1000, local=True): """ Get a nn.ReLU, possible followed by a KWinners2d :param density: Either a density or a function that returns a density. :type density: float or function(channels) """ layer = nn.ReLU(inplace=True) if callable(density): density = density(channels) if density < 1.0: layer = nn.Sequential( layer, KWinners2d(channels, percent_on=density, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, local=local, k_inference_factor=k_inference_factor)) return layer
def mobile_net_v1_sparse_depth( num_classes=1001, width_mult=1.0, percent_on=0.1, k_inference_factor=1.0, boost_strength=1.0, boost_strength_factor=1.0, duty_cycle_period=1000, ): """Create a MobileNetV1 network with sparse deep wise layers by replacing the Depth wise (3x3) convolution activation function from ReLU with k-winners. :param num_classes: Number of output classes (10 for CIFAR10) :type num_classes: int :param width_mult: Width multiplier, used to thin the network :type width_mult: float :param percent_on: The activity of the top k = percent_on * number of input units will be allowed to remain, the rest are set to zero. :type percent_on: float :param kInferenceFactor: During inference (training=False) we increase percent_on by this factor. percent_on * kInferenceFactor must be strictly less than 1.0, ideally much lower than 1.0 :type kInferenceFactor: float :param boostStrength: boost strength (0.0 implies no boosting). :type boostStrength: float :param boostStrengthFactor: Boost strength factor to use [0..1] :type boostStrengthFactor: float :param dutyCyclePeriod: The period used to calculate duty cycles :type dutyCyclePeriod: int :return: Depth wise Sparse MoblineNetV1 model """ model = MobileNetV1(num_classes=num_classes, width_mult=width_mult) # Replace Deep wise ReLU (3rd layer) with k-winners for block in model.deepwise: # Get number of features from previous BatchNorm2d layer channels = block[1].num_features block[2] = KWinners2d( channels, percent_on=percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, ) return model
def test_k_winners2d_relu(self): x = torch.zeros(2, 4, 2, 2) x[0, :, 0, 0] = torch.FloatTensor([-6, -7, -8, -9]) x[0, :, 0, 1] = torch.FloatTensor([0, 3, -42, -19]) x[0, :, 1, 0] = torch.FloatTensor([-1, -2, 3, -4]) x[0, :, 1, 1] = torch.FloatTensor([-10, -11, -12, -13]) x[1, :, 0, 0] = torch.FloatTensor([-10, -12, -31, -42]) x[1, :, 0, 1] = torch.FloatTensor([0, -1, 0, -6]) x[1, :, 1, 0] = torch.FloatTensor([-2, -10, -11, -4]) x[1, :, 1, 1] = torch.FloatTensor([-7, -1, -10, -3]) expected = torch.zeros(2, 4, 2, 2) expected[0, 1, 0, 1] = 3 expected[0, 2, 1, 0] = 3 for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.25, channels=4, k_inference_factor=0.5, boost_strength=1.0, boost_strength_factor=0.5, duty_cycle_period=1000, local=False, break_ties=break_ties, relu=True, ) result = kw(x) self.assertTrue(result.eq(expected).all())
def _sparsify_relu(parent, relu_names, channels, percent_on, k_inference_factor, boost_strength, boost_strength_factor, duty_cycle_period): """Replace ReLU with k-winners where percent_on < 1.0. :param parent: Parent Layer containing the ReLU modules to be replaced :param relu_names: List of ReLU module names to be replaced. :param channels: List of input channels for each k-winner. :param percent_on: List of 'percent_on' parameters for each ReLU :param k_inference_factor: During inference (training=False) we increase `percent_on` in all sparse layers by this factor :param boost_strength: boost strength (0.0 implies no boosting) :param boost_strength_factor: Boost strength factor to use [0..1] :param duty_cycle_period: The period used to calculate duty cycles """ for i, name in enumerate(relu_names): if percent_on[i] >= 1.0: continue assert isinstance(parent.__getattr__(name), nn.ReLU) parent.__setattr__(name, KWinners2d( channels=channels[i], percent_on=percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, ))
def test_k_winners2d_one_relu(self): """ Equal duty cycle, boost_strength=0, percent_on=0.5, batch size=1, relu """ x = self.x[0:1] n, c, h, w = x.shape expected = torch.zeros_like(x) expected[0, [2, 3], 0, 0] = x[0, [2, 3], 0, 0] expected[0, [1, 3], 0, 1] = x[0, [1, 3], 0, 1] expected[0, [2, 3], 1, 1] = x[0, [2, 3], 1, 1] for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.5, # k=2 channels=c, k_inference_factor=1.0, boost_strength=0.0, duty_cycle_period=1000, local=True, break_ties=break_ties, relu=True, ) kw.train(mode=False) result = kw(x) self.assertEqual(result.shape, expected.shape) num_correct = (result == expected).sum() self.assertEqual(num_correct, result.reshape(-1).size()[0])
def test_k_winners2d_module_two(self): """ Test a series of calls on the module in training mode. """ x = self.x2 expected = torch.zeros_like(x) expected[0, 0, 1, 0] = 1.1 expected[0, 0, 1, 1] = 1.2 expected[0, 2, 1, 0] = 1.3 expected[1, 0, 0, 0] = 1.4 expected[1, 1, 0, 1] = 1.6 expected[1, 2, 1, 1] = 1.7 kw = KWinners2d( percent_on=0.25, channels=3, k_inference_factor=0.5, boost_strength=1.0, boost_strength_factor=0.5, duty_cycle_period=1000, ) kw.train(mode=True) result = kw(x) result = kw(x) result = kw(x) result = kw(x) result = kw(x) result = kw(x) self.assertTrue(result.eq(expected).all())
def _kwinners(self, out): return KWinners2d( out, percent_on=self.percent_on_k_winner, boost_strength=self.boost_strength, boost_strength_factor=self.boost_strength_factor, k_inference_factor=self.k_inference_factor, )
def add_sparse_cnn_layer( network, suffix, in_channels, out_channels, use_batch_norm, weight_sparsity, percent_on, k_inference_factor, boost_strength, boost_strength_factor, ): """Add sparse cnn layer to network. :param network: The network to add the sparse layer to :param suffix: Layer suffix. Used to name its components :param in_channels: input channels :param out_channels: output channels :param use_batch_norm: whether or not to use batch norm :param weight_sparsity: Pct of weights that are allowed to be non-zero :param percent_on: Pct of ON (non-zero) units :param k_inference_factor: During inference we increase percent_on by this factor :param boost_strength: boost strength (0.0 implies no boosting) :param boost_strength_factor: boost strength is multiplied by this factor after each epoch """ cnn = nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=5, padding=0, stride=1, ) if 0 < weight_sparsity < 1.0: sparse_cnn = SparseWeights2d(cnn, weight_sparsity) network.add_module("cnnSdr{}_cnn".format(suffix), sparse_cnn) else: network.add_module("cnnSdr{}_cnn".format(suffix), cnn) if use_batch_norm: bn = nn.BatchNorm2d(out_channels, affine=False) network.add_module("cnnSdr{}_bn".format(suffix), bn) # Max pool maxpool = nn.MaxPool2d(kernel_size=2) network.add_module("cnnSdr{}_maxpool".format(suffix), maxpool) if 0 < percent_on < 1.0: kwinner = KWinners2d( channels=out_channels, percent_on=percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ) network.add_module("cnnSdr{}_kwinner".format(suffix), kwinner) else: network.add_module("cnnSdr{}_relu".format(suffix), nn.ReLU())
def _add_cnn_layer( self, index_str, in_channels, out_channels, kernel_size, percent_on, weight_sparsity, k_inference_factor, boost_strength, boost_strength_factor, add_pooling, use_max_pooling, ): """Add a single CNN layer to our modules.""" # Add CNN layer if kernel_size == 3: padding = 1 else: padding = 2 conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding) if weight_sparsity < 1.0: conv2d = SparseWeights2d(conv2d, weight_sparsity=weight_sparsity) self.add_module("cnn_" + index_str, conv2d) self.add_module("bn_" + index_str, nn.BatchNorm2d(out_channels)), if add_pooling: if use_max_pooling: self.add_module("maxpool_" + index_str, nn.MaxPool2d(kernel_size=2, stride=2)) else: self.add_module("avgpool_" + index_str, nn.AvgPool2d(kernel_size=2, stride=2)) if percent_on < 1.0: self.add_module( "kwinners_2d_" + index_str, KWinners2d( percent_on=percent_on, channels=out_channels, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ), ) else: self.add_module("ReLU_" + index_str, nn.ReLU(inplace=True))
def __init__(self, percent_on, boost_strength): super(SDRCNNBase, self).__init__() self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5, padding=2) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=0) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.k_winner = KWinners2d(channels=128, percent_on=percent_on, boost_strength=boost_strength, local=True) self.dense1 = nn.Linear(in_features=128 * 5 * 5, out_features=256) self.dense2 = nn.Linear(in_features=256, out_features=128) self.output = nn.Linear(in_features=128, out_features=10) self.softmax = nn.LogSoftmax(dim=1)
def test_k_winners2d_grad(self): """ Test gradient """ x = torch.randn(self.x.size(), dtype=torch.double, requires_grad=True) n, c, h, w = x.shape kw = KWinners2d( percent_on=0.5, channels=c, k_inference_factor=1.0, boost_strength=0.0, boost_strength_factor=1.0, duty_cycle_period=1000, local=True ) self.assertTrue(gradcheck(kw, x, raise_exception=True))
def test_k_winners2d_train(self): """ Test training Changing duty cycle, boost_strength=1, percent_on=0.5, batch size=2 """ x = self.x[0:2] n, c, h, w = x.shape # Expectation due to boosting after the second training step expected = torch.zeros_like(x) expected[0, [2, 3], 0, 0] = x[0, [2, 3], 0, 0] expected[0, [1, 3], 0, 1] = x[0, [1, 3], 0, 1] expected[0, [0, 1], 1, 0] = x[0, [0, 1], 1, 0] expected[0, [0, 1], 1, 1] = x[0, [0, 1], 1, 1] expected[1, [2, 3], 0, 0] = x[1, [2, 3], 0, 0] expected[1, [1, 3], 0, 1] = x[1, [1, 3], 0, 1] expected[1, [0, 3], 1, 0] = x[1, [0, 3], 1, 0] expected[1, [0, 2], 1, 1] = x[1, [0, 2], 1, 1] for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.5, channels=c, boost_strength=1.0, duty_cycle_period=10, local=True, break_ties=break_ties, ) kw.train(mode=True) result = kw(x) result = kw(x) self.assertTrue(result.eq(expected).all()) # Expectation due to boosting after the fourth training step expected_boosted = expected.clone() expected_boosted[0, [0, 1], 1, 1] = 0 expected_boosted[0, [0, 2], 1, 1] = x[0, [0, 2], 1, 1] result = kw(x) result = kw(x) self.assertTrue(result.eq(expected_boosted).all())
def __init__( self, dpc=3, cnn_w_sparsity=0.05, linear_w_sparsity=0.5, cat_w_sparsity=0.01, n_classes=4, ): super(ToyNetwork, self).__init__() conv_channels = 128 self.n_classes = n_classes self.conv1 = SparseWeights2d( nn.Conv2d( in_channels=1, out_channels=conv_channels, kernel_size=10, padding=0, stride=1, ), cnn_w_sparsity, ) self.kwin1 = KWinners2d(conv_channels, percent_on=0.1) self.bn = nn.BatchNorm2d(conv_channels, affine=False) self.mp1 = nn.MaxPool2d(kernel_size=2) self.flatten = Flatten() self.d1 = DendriteLayer( in_dim=int(conv_channels / 64) * 7744, out_dim=1000, dendrites_per_neuron=dpc, ) self.linear = SparseWeights(nn.Linear(1000, n_classes + 1), linear_w_sparsity) self.cat = SparseWeights(nn.Linear(n_classes + 1, 1000 * dpc), cat_w_sparsity)
def test_k_winners2d_module_two(self): """ Test a series of calls on the module in training mode. """ x = self.x2 expected = torch.zeros_like(x) expected[0, 0, 1, 0] = x[0, 0, 1, 0] expected[0, 0, 1, 1] = x[0, 0, 1, 1] expected[0, 2, 1, 0] = x[0, 2, 1, 0] expected[1, 0, 0, 0] = x[1, 0, 0, 0] expected[1, 1, 0, 1] = x[1, 1, 0, 1] expected[1, 2, 1, 1] = x[1, 2, 1, 1] for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.25, channels=3, k_inference_factor=0.5, boost_strength=1.0, boost_strength_factor=0.5, duty_cycle_period=1000, local=False, break_ties=break_ties, ) kw.train(mode=True) result = kw(x) result = kw(x) result = kw(x) result = kw(x) result = kw(x) result = kw(x) self.assertTrue(result.eq(expected).all())
def test_k_winners2d_local_grad(self): """ Test gradient """ x = self.x[0:2].clone().detach().requires_grad_(True) n, c, h, w = x.shape grad = torch.rand_like(x, requires_grad=True) expected = torch.zeros_like(grad, requires_grad=False) expected[0, [2, 3], 0, 0] = grad[0, [2, 3], 0, 0] expected[0, [1, 3], 0, 1] = grad[0, [1, 3], 0, 1] expected[0, [0, 1], 1, 0] = grad[0, [0, 1], 1, 0] expected[0, [2, 3], 1, 1] = grad[0, [2, 3], 1, 1] expected[1, [2, 3], 0, 0] = grad[1, [2, 3], 0, 0] expected[1, [1, 3], 0, 1] = grad[1, [1, 3], 0, 1] expected[1, [0, 3], 1, 0] = grad[1, [0, 3], 1, 0] expected[1, [0, 2], 1, 1] = grad[1, [0, 2], 1, 1] for break_ties in [True, False]: with self.subTest(break_ties=break_ties): kw = KWinners2d( percent_on=0.5, # k=2 channels=c, k_inference_factor=1.0, boost_strength=0.0, duty_cycle_period=1000, local=True, break_ties=break_ties, ) kw.train(mode=True) y = kw(x) y.backward(grad) assert_allclose(x.grad, expected) x.grad.zero_()
def _add_cnn_layer(self, index_str, in_channels, out_channels, kernel_size, percent_on, weight_sparsity, add_pooling): """ Add a single CNN layer to our modules """ # Add CNN layer if kernel_size == 3: padding = 1 else: padding = 2 conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding) if weight_sparsity < 1.0: conv2d = SparseWeights2d(conv2d, weightSparsity=weight_sparsity) self.model.add_module("cnn_" + index_str, conv2d) self.model.add_module("bn_" + index_str, nn.BatchNorm2d(out_channels)), if add_pooling: self.model.add_module("avgpool_" + index_str, nn.AvgPool2d(kernel_size=2)) if percent_on < 1.0: self.model.add_module( "kwinners_2d_" + index_str, KWinners2d(percent_on=percent_on, channels=out_channels, kInferenceFactor=self.k_inference_factor, boostStrength=self.boost_strength, boostStrengthFactor=self.boost_strength_factor)) else: self.model.add_module("ReLU_" + index_str, nn.ReLU())