Beispiel #1
0
    def test_tie_breaking_off(self):
        """
        Test k-winners with ties. Tie-breaking disabled.
        """
        x = self.x2
        # Force tie breaking
        x[0, 5] = x[0, 1]

        expected = torch.zeros_like(x)
        expected[0, 0] = x[0, 0]
        expected[0, 1] = x[0, 1]
        expected[0, 3] = x[0, 3]
        expected[0, 5] = x[0, 5]
        expected[1, 1] = x[1, 1]
        expected[1, 3] = x[1, 3]
        expected[1, 5] = x[1, 5]

        n = 6
        kw = KWinners(n,
                      percent_on=0.5,
                      k_inference_factor=1.0,
                      boost_strength=1.0,
                      boost_strength_factor=0.5,
                      duty_cycle_period=1000,
                      break_ties=False)
        kw.duty_cycle[:] = self.duty_cycle2

        result = kw(x)
        self.assertTrue(result.eq(expected).all())
Beispiel #2
0
    def test_k_winners_module(self):
        x = self.x2

        n = 6

        kw = KWinners(
            n,
            percent_on=0.333,
            boost_strength=1.0,
            boost_strength_factor=0.5,
            duty_cycle_period=1000,
        )

        kw.train()  # Testing with mod.training = True

        # Expect 2 winners per batch (33% of 6)
        expected = torch.zeros_like(x)
        expected[0, 0] = 1.5
        expected[0, 3] = 1.3
        expected[1, 2] = 1.2
        expected[1, 3] = 1.6
        result = kw(x)
        self.assertEqual(result.shape, expected.shape)

        num_correct = (result == expected).sum()
        self.assertEqual(num_correct, result.reshape(-1).size()[0])

        new_duty = torch.tensor([1.0, 0, 1.0, 2.000, 0, 0]) / 2.0

        diff = (kw.duty_cycle - new_duty).abs().sum()
        self.assertLessEqual(diff, 0.001)
    def __init__(self,
                 input_size,
                 output_size,
                 hidden_size,
                 num_segments,
                 dim_context,
                 sparsity,
                 kw=False,
                 relu=False,
                 dendritic_layer_class=AbsoluteMaxGatingDendriticLayer):

        # The nonlinearity can either be k-Winners or ReLU, but not both
        assert not (kw and relu)

        super().__init__()

        self.num_segments = num_segments
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dim_context = dim_context
        self.kw = kw
        self.relu = relu

        # Forward layers & k-winners
        self.dend1 = dendritic_layer_class(module=nn.Linear(
            input_size, hidden_size),
                                           num_segments=num_segments,
                                           dim_context=dim_context,
                                           module_sparsity=sparsity,
                                           dendrite_sparsity=sparsity)
        self.dend2 = dendritic_layer_class(module=nn.Linear(
            hidden_size, hidden_size),
                                           num_segments=num_segments,
                                           dim_context=dim_context,
                                           module_sparsity=sparsity,
                                           dendrite_sparsity=sparsity)

        if kw:
            self.kw1 = KWinners(n=hidden_size,
                                percent_on=0.05,
                                k_inference_factor=1.0,
                                boost_strength=0.0,
                                boost_strength_factor=0.0)
            self.kw2 = KWinners(n=hidden_size,
                                percent_on=0.05,
                                k_inference_factor=1.0,
                                boost_strength=0.0,
                                boost_strength_factor=0.0)
        if relu:
            self.relu1 = nn.ReLU()
            self.relu2 = nn.ReLU()

        # Final classifier layer
        self.classifier = SparseWeights(nn.Linear(hidden_size, output_size),
                                        sparsity=sparsity)
Beispiel #4
0
    def __init__(self,
                 input_size=28 * 28,
                 n_hidden_units=1000,
                 n_classes=10,
                 is_sparse=False,
                 sparsity=(0.75, 0.85),
                 percent_on=0.1):
        """
        Initialize a 2-layer MLP
        :param input_size: number of input features to the MLP
        :type input_size: int
        :param n_hidden_units: number of units in each of the two hidden layers
        :type n_hidden_units: int
        :param n_classes: number of output units
        :type n_classes: int
        :param is_sparse: whether or not to initialize the sparse network instead of a
        dense one
        :type is_sparse: bool
        :param sparsity: a 2-element list/tuple specifying the sparsity in each of the
        hidden layers
        :type sparsity: list/tuple of float
        :param percent_on: number of active units in the K-Winners layer (only applies
        to sparse networks)
        :type percent_on: float
        """
        super().__init__()

        self.is_sparse = is_sparse
        self.flatten = Flatten()
        self.n_classes = n_classes

        self.fc1 = torch.nn.Linear(input_size, n_hidden_units)
        self.fc2 = torch.nn.Linear(n_hidden_units, n_hidden_units)
        self.fc3 = torch.nn.Linear(n_hidden_units, n_classes)

        if is_sparse:
            self.fc1_sparsity, self.fc2_sparsity = sparsity
            self.percent_on = percent_on

            self.fc1 = SparseWeights(self.fc1, sparsity=self.fc1_sparsity)
            self.kw1 = KWinners(n=n_hidden_units,
                                percent_on=percent_on,
                                boost_strength=0.0)

            self.fc2 = SparseWeights(self.fc2, sparsity=self.fc2_sparsity)
            self.kw2 = KWinners(n=n_hidden_units,
                                percent_on=percent_on,
                                boost_strength=0.0)
Beispiel #5
0
    def _create_preprocess_module(self, module_type, preprocess_output_dim,
                                  kw_percent_on):
        if module_type is None:
            return None

        preprocess_module = nn.Sequential()
        linear_layer = SparseWeights(torch.nn.Linear(
            self.context_representation_dim + self.representation_dim,
            preprocess_output_dim,
            bias=True),
                                     sparsity=self.weight_sparsity,
                                     allow_extremes=True)
        DendriticMLP._init_sparse_weights(linear_layer, 0.0)

        if module_type == "relu":
            nonlinearity = nn.ReLU()
        else:
            nonlinearity = KWinners(n=preprocess_output_dim,
                                    percent_on=kw_percent_on,
                                    k_inference_factor=1.0,
                                    boost_strength=0.0,
                                    boost_strength_factor=0.0)
        preprocess_module.add_module("linear_layer", linear_layer)
        preprocess_module.add_module("nonlinearity", nonlinearity)

        self.context_representation_dim = preprocess_output_dim
        return preprocess_module
Beispiel #6
0
 def _kwinners(self, fout):
     return KWinners(
         n=fout,
         percent_on=self.percent_on,
         boost_strength=self.boost_strength,
         boost_strength_factor=self.boost_strength_factor,
     )
    def test_permuted_model_loading(self):

        model = torch.nn.Sequential(
            KWinners(8, percent_on=0.1),
            torch.nn.Linear(8, 8),
        )

        param_map = {
            "0.weight": "1.weight",
            "0.bias": "1.bias",
            "1.boost_strength": "0.boost_strength",
            "1.duty_cycle": "0.duty_cycle",
        }

        model = load_multi_state(
            model,
            restore_linear=self.checkpoint_path,
            param_map=param_map,
        )

        model = load_multi_state(
            model,
            restore_full_model=self.checkpoint_path,
            param_map=param_map,
        )
    def _create_preprocess_module(self, module_type, preprocess_output_dim, kw_percent_on):
        preprocess_module = nn.Sequential()

        if module_type is None:
            return preprocess_module, self.context_dim

        linear_layer = torch.nn.Linear(
            self.context_dim,
            preprocess_output_dim,
            bias=True
        )

        if module_type == "relu":
            nonlinearity = nn.ReLU()
        elif module_type == "kw":
            nonlinearity = KWinners(
                n=preprocess_output_dim,
                percent_on=kw_percent_on,
                k_inference_factor=1.0,
                boost_strength=0.0,
                boost_strength_factor=0.0
            )
        else:
            nonlinearity = nn.Identity()

        preprocess_module.add_module("linear_layer", linear_layer)
        preprocess_module.add_module("nonlinearity", nonlinearity)

        return preprocess_module, preprocess_output_dim
Beispiel #9
0
 def __init__(self, input_size, hidden, sparsity, percent_on,
              boost_strength):
     super().__init__()
     self.sparse_linear = SparseWeights(nn.Linear(input_size, hidden),
                                        sparsity=sparsity)
     self.kw = KWinners(n=hidden,
                        percent_on=percent_on,
                        boost_strength=boost_strength)
 def __init__(self, num_classes, input_shape):
     super().__init__()
     in_features = np.prod(input_shape)
     self.flatten = torch.nn.Flatten()
     self.kwinners = KWinners(in_features, percent_on=0.1)
     self.classifier = SparseWeights(
         nn.Linear(in_features, num_classes, bias=False), sparsity=0.9
     )
Beispiel #11
0
    def __init__(self,
                 cnn_out_channels=(64, 64),
                 cnn_percent_on=(0.095, 0.125),
                 linear_units=1000,
                 linear_percent_on=0.1,
                 linear_weight_sparsity=0.4,
                 boost_strength=1.5,
                 boost_strength_factor=0.9,
                 k_inference_factor=1.5,
                 duty_cycle_period=1000):
        super(GSCSparseCNN, self).__init__(
            OrderedDict([
                # First Sparse CNN layer
                ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)),
                ("cnn1_batchnorm",
                 nn.BatchNorm2d(cnn_out_channels[0], affine=False)),
                ("cnn1_maxpool", nn.MaxPool2d(2)),
                ("cnn1_kwinner",
                 KWinners2d(channels=cnn_out_channels[0],
                            percent_on=cnn_percent_on[0],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),

                # Second Sparse CNN layer
                ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1],
                                   5)),
                ("cnn2_batchnorm",
                 nn.BatchNorm2d(cnn_out_channels[1], affine=False)),
                ("cnn2_maxpool", nn.MaxPool2d(2)),
                ("cnn2_kwinner",
                 KWinners2d(channels=cnn_out_channels[1],
                            percent_on=cnn_percent_on[1],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),
                ("flatten", Flatten()),

                # Sparse Linear layer
                ("linear",
                 SparseWeights(nn.Linear(25 * cnn_out_channels[1],
                                         linear_units),
                               weight_sparsity=linear_weight_sparsity)),
                ("linear_bn", nn.BatchNorm1d(linear_units, affine=False)),
                ("linear_kwinner",
                 KWinners(n=linear_units,
                          percent_on=linear_percent_on,
                          k_inference_factor=k_inference_factor,
                          boost_strength=boost_strength,
                          boost_strength_factor=boost_strength_factor,
                          duty_cycle_period=duty_cycle_period)),

                # Classifier
                ("output", nn.Linear(linear_units, 12)),
                ("softmax", nn.LogSoftmax(dim=1))
            ]))
Beispiel #12
0
def add_sparse_linear_layer(
    network,
    suffix,
    input_size,
    linear_n,
    dropout,
    use_batch_norm,
    weight_sparsity,
    percent_on,
    k_inference_factor,
    boost_strength,
    boost_strength_factor,
):
    """Add sparse linear layer to network.

    :param network: The network to add the sparse layer to
    :param suffix: Layer suffix. Used to name its components
    :param input_size: Input size
    :param linear_n: Number of units
    :param dropout: dropout value
    :param use_batch_norm: whether or not to use batch norm
    :param weight_sparsity: Pct of weights that are allowed to be non-zero
    :param percent_on: Pct of ON (non-zero) units
    :param k_inference_factor: During inference we increase percent_on by this factor
    :param boost_strength: boost strength (0.0 implies no boosting)
    :param boost_strength_factor:
        boost strength is multiplied by this factor after each epoch
    """
    linear = nn.Linear(input_size, linear_n)
    if 0 < weight_sparsity < 1.0:
        network.add_module(
            "linear{}".format(suffix), SparseWeights(linear, weight_sparsity)
        )
    else:
        network.add_module("linear{}".format(suffix), linear)

    if use_batch_norm:
        network.add_module("linear_bn", nn.BatchNorm1d(linear_n, affine=False))

    if dropout > 0.0:
        network.add_module("linear{}_dropout".format(suffix), nn.Dropout(dropout))

    if 0 < percent_on < 1.0:
        network.add_module(
            "linear{}_kwinners".format(suffix),
            KWinners(
                n=linear_n,
                percent_on=percent_on,
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
            ),
        )

    else:
        network.add_module("linear{}_relu".format(suffix), nn.ReLU())
Beispiel #13
0
    def __init__(self, num_classes, input_shape):
        super().__init__()

        in_features = np.prod(input_shape)
        self.dendritic_gate = DendriticAbsoluteMaxGate1d()
        self.flatten = torch.nn.Flatten()
        self.kwinners = KWinners(n=16, percent_on=0.75, k_inference_factor=1)
        self.classifier = SparseWeights(
            torch.nn.Linear(in_features, num_classes, bias=False),
            sparsity=0.5,
        )
Beispiel #14
0
    def __init__(self,
                 cnn_out_channels=(32, 64),
                 cnn_percent_on=(0.087, 0.293),
                 linear_units=700,
                 linear_percent_on=0.143,
                 linear_weight_sparsity=0.3,
                 boost_strength=1.5,
                 boost_strength_factor=0.85,
                 k_inference_factor=1.5,
                 duty_cycle_period=1000):
        super(MNISTSparseCNN, self).__init__(
            OrderedDict([
                # First Sparse CNN layer
                ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)),
                ("cnn1_maxpool", nn.MaxPool2d(2)),
                ("cnn1_kwinner",
                 KWinners2d(channels=cnn_out_channels[0],
                            percent_on=cnn_percent_on[0],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),

                # Second Sparse CNN layer
                ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1],
                                   5)),
                ("cnn2_maxpool", nn.MaxPool2d(2)),
                ("cnn2_kwinner",
                 KWinners2d(channels=cnn_out_channels[1],
                            percent_on=cnn_percent_on[1],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),
                ("flatten", Flatten()),

                # Sparse Linear layer
                ("linear",
                 SparseWeights(nn.Linear(16 * cnn_out_channels[1],
                                         linear_units),
                               weight_sparsity=linear_weight_sparsity)),
                ("linear_kwinner",
                 KWinners(n=linear_units,
                          percent_on=linear_percent_on,
                          k_inference_factor=k_inference_factor,
                          boost_strength=boost_strength,
                          boost_strength_factor=boost_strength_factor,
                          duty_cycle_period=duty_cycle_period)),

                # Classifier
                ("output", nn.Linear(linear_units, 10)),
                ("softmax", nn.LogSoftmax(dim=1))
            ]))
Beispiel #15
0
    def test_kwinners_grad(self):
        n = 3
        x = torch.tensor([[0, 2, 1], [14, 13, 12]],
                         dtype=torch.float,
                         requires_grad=True)
        grad = torch.tensor([[5, 6, 7], [45, 46, 47]], dtype=torch.float)
        expected = torch.tensor([[0, 6, 0], [45, 0, 0]], dtype=torch.float)

        for break_ties in [True, False]:
            with self.subTest(break_ties=break_ties):
                kw = KWinners(n,
                              percent_on=(1 / 3),
                              k_inference_factor=1.0,
                              boost_strength=0.0,
                              break_ties=break_ties)
                kw(x).backward(grad)
                torch.testing.assert_allclose(x.grad, expected)
                x.grad.zero_()
Beispiel #16
0
    def __init__(
        self,
        input_size,
        output_size,
        kw_percent_on=0.05,
        boost_strength=0.0,
        weight_sparsity=0.95,
        duty_cycle_period=1000,
    ):
        super().__init__()

        self.linear = SparseWeights(nn.Linear(input_size, output_size),
                                    sparsity=weight_sparsity,
                                    allow_extremes=True)
        self.kw = KWinners(n=output_size,
                           percent_on=kw_percent_on,
                           boost_strength=boost_strength,
                           duty_cycle_period=duty_cycle_period)
Beispiel #17
0
    def test_kwinners_relu(self):
        n = 4
        x = torch.tensor([[-5, -2, -1, 2], [-2, -1, 1, 2], [-4, -3, -2, -1]],
                         dtype=torch.float)
        expected = torch.tensor([[0, 0, 0, 2], [0, 0, 1, 2], [0, 0, 0, 0]],
                                dtype=torch.float)

        for break_ties in [True, False]:
            with self.subTest(break_ties=break_ties):

                kw = KWinners(n,
                              percent_on=0.5,
                              k_inference_factor=1.0,
                              boost_strength=1.0,
                              break_ties=break_ties,
                              relu=True)

                result = kw(x)
                self.assertTrue(result.eq(expected).all())
Beispiel #18
0
    def test_k_winners_module_two(self):
        """
        Test a series of calls on the layer in training mode.
        """

        # Set up test input and module.
        x = self.x2
        n = 6

        for break_ties in [True, False]:
            with self.subTest(break_ties=break_ties):
                expected = torch.zeros_like(x)
                expected[0, 0] = x[0, 0]
                expected[0, 5] = x[0, 5]
                expected[1, 2] = x[1, 2]
                expected[1, 3] = x[1, 3]

                kw = KWinners(
                    n,
                    percent_on=0.333,
                    k_inference_factor=1.5,
                    boost_strength=1.0,
                    boost_strength_factor=0.5,
                    duty_cycle_period=1000,
                    break_ties=break_ties,
                )

                kw.train(mode=True)
                result = kw(x)
                result = kw(x)
                result = kw(x)
                result = kw(x)
                result = kw(x)
                result = kw(x)
                result = kw(x)

                self.assertTrue(result.eq(expected).all())

                # Test with mod.training = False.
                kw.train(mode=False)
                result = kw(x)
                expected = torch.zeros_like(x)
                expected[0, 0] = x[0, 0]
                expected[0, 1] = x[0, 1]
                expected[0, 5] = x[0, 5]
                expected[1, 2] = x[1, 2]
                expected[1, 3] = x[1, 3]
                expected[1, 4] = x[1, 4]
                self.assertTrue(result.eq(expected).all())
Beispiel #19
0
def add_sparse_dendrite_layer(
    network,
    suffix,
    in_dim,
    out_dim,
    dendrites_per_neuron,
    use_batch_norm=False,
    weight_sparsity=0.2,
    percent_on=0.1,
    k_inference_factor=1,
    boost_strength=1.5,
    boost_strength_factor=0.9,
    duty_cycle_period=1000,
):

    dendrite_layer = DendriteLayer(
        in_dim=in_dim,
        out_dim=out_dim,
        dendrites_per_neuron=dendrites_per_neuron,
        weight_sparsity=weight_sparsity,
    )

    network.add_module("dendrites{}".format(suffix), dendrite_layer)

    if use_batch_norm:
        network.add_module("dendrites{}_bn".format(suffix),
                           nn.BatchNorm1d(out_dim, affine=False))

    network.add_module(
        "linear{}_kwinners".format(suffix),
        KWinners(
            n=out_dim,
            percent_on=percent_on,
            k_inference_factor=k_inference_factor,
            boost_strength=boost_strength,
            boost_strength_factor=boost_strength_factor,
            duty_cycle_period=duty_cycle_period,
        ),
    )
    def setUp(self):

        set_random_seed(20)
        self.model = torch.nn.Sequential(
            torch.nn.Linear(8, 8),
            KWinners(8, percent_on=0.1),
        )

        # Create temporary results directory.
        self.tempdir = tempfile.TemporaryDirectory()
        self.results_dir = Path(self.tempdir.name) / Path("results")
        self.results_dir.mkdir()

        # Save model state.
        state = {}
        with io.BytesIO() as buffer:
            serialize_state_dict(buffer, self.model.state_dict(), compresslevel=-1)
            state["model"] = buffer.getvalue()

        self.checkpoint_path = self.results_dir / Path("mymodel")
        with open(self.checkpoint_path, "wb") as f:
            pickle.dump(state, f)
    def __init__(
        self,
        input_size,
        output_size,
        hidden_sizes,
        num_segments,
        dim_context,
        kw,
        kw_percent_on=0.05,
        context_percent_on=1.0,
        dendrite_weight_sparsity=0.95,
        weight_sparsity=0.95,
        weight_init="modified",
        dendrite_init="modified",
        freeze_dendrites=False,
        output_nonlinearity=None,
        dendritic_layer_class=AbsoluteMaxGatingDendriticLayer,
    ):

        # Forward & dendritic weight initialization must be either "kaiming" or
        # "modified"
        assert weight_init in ("kaiming", "modified")
        assert dendrite_init in ("kaiming", "modified")
        assert kw_percent_on is None or (kw_percent_on >= 0.0
                                         and kw_percent_on < 1.0)
        assert context_percent_on >= 0.0

        if kw_percent_on == 0.0:
            kw = False

        super().__init__()

        if num_segments == 1:
            # use optimized 1 segment class
            dendritic_layer_class = OneSegmentDendriticLayer

        self.num_segments = num_segments
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.dim_context = dim_context
        self.kw = kw
        self.kw_percent_on = kw_percent_on
        self.weight_sparsity = weight_sparsity
        self.dendrite_weight_sparsity = dendrite_weight_sparsity
        self.output_nonlinearity = output_nonlinearity
        self.hardcode_dendrites = (dendrite_init == "hardcoded")

        self._layers = nn.ModuleList()
        self._activations = nn.ModuleList()

        if self.hardcode_dendrites:
            dendrite_sparsity = 0.0
        else:
            dendrite_sparsity = self.dendrite_weight_sparsity
        for i in range(len(self.hidden_sizes)):
            curr_dend = dendritic_layer_class(
                module=nn.Linear(input_size, self.hidden_sizes[i], bias=True),
                num_segments=num_segments,
                dim_context=dim_context,
                module_sparsity=self.weight_sparsity,
                dendrite_sparsity=dendrite_sparsity,
            )

            if weight_init == "modified":
                # Scale weights to be sampled from the new initialization U(-h, h) where
                # h = sqrt(1 / (weight_density * previous_layer_percent_on))
                if i == 0:
                    # first hidden layer can't have kw input
                    self._init_sparse_weights(curr_dend, 0.0)
                else:
                    self._init_sparse_weights(curr_dend,
                                              1 - kw_percent_on if kw else 0.0)

            if dendrite_init == "modified":
                self._init_sparse_dendrites(curr_dend, 1 - context_percent_on)

            if freeze_dendrites:
                # Dendritic weights will not be updated during backward pass
                for name, param in curr_dend.named_parameters():
                    if "segments" in name:
                        param.requires_grad = False

            if self.kw:
                curr_activation = KWinners(n=hidden_sizes[i],
                                           percent_on=kw_percent_on,
                                           k_inference_factor=1.0,
                                           boost_strength=0.0,
                                           boost_strength_factor=0.0)
            else:
                curr_activation = nn.ReLU()

            self._layers.append(curr_dend)
            self._activations.append(curr_activation)

            input_size = self.hidden_sizes[i]

        self._single_output_head = not isinstance(output_size, Iterable)
        if self._single_output_head:
            output_size = (output_size, )

        self._output_layers = nn.ModuleList()
        for out_size in output_size:
            output_layer = nn.Sequential()
            output_linear = SparseWeights(module=nn.Linear(
                input_size, out_size),
                                          sparsity=weight_sparsity,
                                          allow_extremes=True)
            if weight_init == "modified":
                self._init_sparse_weights(output_linear,
                                          1 - kw_percent_on if kw else 0.0)
            output_layer.add_module("output_linear", output_linear)

            if self.output_nonlinearity is not None:
                output_layer.add_module("non_linearity", output_nonlinearity)
            self._output_layers.append(output_layer)
Beispiel #22
0
 def _kwinners(self, num_units):
     return KWinners(n=num_units,
                     percent_on=0.25,
                     boost_strength=1.4,
                     boost_strength_factor=0.7)
    def __init__(self,
                 sparsify=False,
                 percent_on=0.3,
                 k_inference_factor=1.5,
                 boost_strength=1.0,
                 boost_strength_factor=0.9,
                 duty_cycle_period=1000,
                 num_classes=10,
                 hidden_units=2048,
                 hidden_layers=1,
                 dropout=0.5,
                 weight_sparsity=0.5,
                 input_size=28 * 28,
                 stats=False):
        """
        Constructor for the object SimpleCNN
            Args:
                num_classes (int): total number of classes of the benchmark,
                                   i.e. maximum output neurons of the model.
                sparsify (bool): if we want to introduce the Kwinners and
                                 SparseWeights layers in the model.
                percent_on (float): Percentage of active units in fc layers.
                k_inference_factor (float): boosting parameter. Check the
                                            official Kwinners docs for further
                                            details.
                boost_strength (float): boosting parameter.
                boost_strength_factor (float): boosting parameter.
                hidden_units (int): number of units for the hidden layer.
                hidden_layers (int): number of hidden layers.
                dropout (float): dropout probability for each dropout layer.
                weight_sparsity (float): percentage of active weights for
                                         each fc layer.
                input_size (int): input size (assumed a linearized input).
                stats (bool): if we want to record sparsity statistics.
        """

        super(SimpleMLP, self).__init__()

        self.active_perc_list = []
        self.on_idxs = [0] * hidden_units
        self.hidden_units = hidden_units
        self.num_classes = num_classes
        self.stats = stats

        ft_modules = []

        if sparsify:
            for i in range(hidden_layers):
                if i == 0:
                    ft_modules.append(
                        SparseWeights(nn.Linear(input_size, hidden_units),
                                      weight_sparsity=weight_sparsity))
                else:
                    ft_modules.append(
                        SparseWeights(nn.Linear(hidden_units, hidden_units),
                                      weight_sparsity=weight_sparsity))
                ft_modules.append(
                    KWinners(hidden_units, percent_on, k_inference_factor,
                             boost_strength, boost_strength_factor,
                             duty_cycle_period))
                ft_modules.append(nn.Dropout(dropout))

        else:
            for i in range(hidden_layers):
                if i == 0:
                    ft_modules.append(nn.Linear(input_size, hidden_units))
                else:
                    ft_modules.append(nn.Linear(hidden_units, hidden_units))
                ft_modules.append(nn.ReLU(inplace=True))
                ft_modules.append(nn.Dropout(dropout))

        self.features = nn.Sequential(*ft_modules)
        self.classifier = nn.Linear(hidden_units, num_classes)
Beispiel #24
0
    def __init__(
        self,
        input_size,
        context_size,
        output_size,
        hidden_sizes,
        layers_modulated,
        num_segments,
        kw_percent_on,
        context_percent_on,
        weight_sparsity,
        weight_init,
        dendrite_weight_sparsity,
        dendrite_init,
        dendritic_layer_class,
        output_nonlinearity,
        freeze_dendrites=False,
    ):
        super().__init__()

        self.input_size = input_size
        self.context_size = context_size
        self.output_size = output_size
        self.hidden_sizes = hidden_sizes
        self.layers_modulated = layers_modulated
        self.num_segments = num_segments
        self.kw_percent_on = kw_percent_on
        self.context_percent_on = context_percent_on
        self.weight_sparsity = weight_sparsity
        self.weight_init = weight_init
        self.dendrite_weight_sparsity = dendrite_weight_sparsity
        self.dendrite_init = dendrite_init
        self.output_nonlinearity = output_nonlinearity

        self.layers = nn.ModuleList()

        for i in range(len(self.hidden_sizes)):
            block_name = ""

            if i not in self.layers_modulated:
                linear = FFLayer(
                    module=nn.Linear(input_size, self.hidden_sizes[i], bias=True),
                    module_sparsity=self.weight_sparsity,
                )
                block_name = "ff"
            else:
                linear = dendritic_layer_class(
                    module=nn.Linear(input_size, self.hidden_sizes[i], bias=True),
                    num_segments=self.num_segments,
                    dim_context=self.context_size,
                    module_sparsity=self.weight_sparsity,
                    dendrite_sparsity=self.dendrite_weight_sparsity,
                )
                block_name = "dendrite"

                if self.dendrite_init == "modified":
                    self._init_sparse_dendrites(linear, 1 - self.context_percent_on)

                if freeze_dendrites:
                    # Dendritic weights will not be updated during backward pass
                    for name, param in linear.named_parameters():
                        if "segments" in name:
                            param.requires_grad = False

            if self.weight_init == "modified":
                # Scale weights to be sampled from the new initialization U(-h, h) where
                # h = sqrt(1 / (weight_density * previous_layer_percent_on))

                # first hidden layer can't have kw input
                if i == 0:
                    self._init_sparse_weights(linear, 0.0)
                else:
                    self._init_sparse_weights(
                        linear,
                        1 - self.kw_percent_on if self.kw_percent_on else 0.0
                    )

            if self.kw_percent_on:
                activation = KWinners(n=hidden_sizes[i],
                                      percent_on=kw_percent_on,
                                      k_inference_factor=1.0,
                                      boost_strength=0.0,
                                      boost_strength_factor=0.0)
            else:
                activation = nn.ReLU()

            block = SequentialBlock()
            block.add_module(block_name, SequentialBlock(linear, activation))
            self.layers.append(block)

            input_size = self.hidden_sizes[i]


        if not isinstance(output_size, Iterable):
            output_size = (output_size,)

        self._output_layers = nn.ModuleList()

        for out_size in output_size:
            output_layer = nn.Sequential()

            output_linear = SparseWeights(module=nn.Linear(input_size, out_size),
                                          sparsity=self.weight_sparsity, allow_extremes=True)

            if self.weight_init == "modified":
                self._init_sparse_weights(
                    output_linear,
                    1 - self.kw_percent_on if self.kw_percent_on else 0.0
                )

            output_layer.add_module("output_linear", output_linear)

            if self.output_nonlinearity is not None:
                output_layer.add_module("non_linearity", self.output_nonlinearity)

            self._output_layers.append(output_layer)
Beispiel #25
0
    def test_k_winners_module_one(self):

        # Set up test input and module.
        x = self.x2
        n = 6

        for break_ties in [True, False]:
            with self.subTest(break_ties=break_ties):
                kw = KWinners(
                    n,
                    percent_on=0.333,
                    k_inference_factor=1.5,
                    boost_strength=1.0,
                    boost_strength_factor=0.5,
                    duty_cycle_period=1000,
                    break_ties=break_ties,
                )

                # Test with mod.training = False.
                kw.train(mode=False)

                # Expect 3 winners per batch (1.5 * 33% of 6 is 1 / 2 of 6)
                expected = torch.zeros_like(x)
                expected[0, 0] = x[0, 0]
                expected[0, 2] = x[0, 2]
                expected[0, 3] = x[0, 3]
                expected[1, 0] = x[1, 0]
                expected[1, 2] = x[1, 2]
                expected[1, 3] = x[1, 3]
                result = kw(x)

                self.assertEqual(result.shape, expected.shape)
                self.assertTrue(result.eq(expected).all())

                # Run forward pass again while still not in training mode.
                # Should give the same result as the duty cycles are not updated.
                result = kw(x)

                self.assertEqual(result.shape, expected.shape)
                self.assertTrue(result.eq(expected).all())

                # Test with mod.training = True
                kw.train(mode=True)

                # Expect 2 winners per batch (33% of 6)
                expected = torch.zeros_like(x)
                expected[0, 0] = x[0, 0]
                expected[0, 3] = x[0, 3]
                expected[1, 2] = x[1, 2]
                expected[1, 3] = x[1, 3]
                result = kw(x)

                self.assertEqual(result.shape, expected.shape)
                self.assertTrue(result.eq(expected).all())

                # Test values of updated duty cycle.
                new_duty = torch.tensor([1.0, 0, 1.0, 2.0, 0, 0]) / 2.0

                self.assertTrue(kw.duty_cycle.eq(new_duty).all())

                # Test forward with updated duty cycle.
                result = kw(x)

                expected = torch.zeros_like(x)
                expected[0, 1] = x[0, 1]
                expected[0, 5] = x[0, 5]
                expected[1, 1] = x[1, 1]
                expected[1, 5] = x[1, 5]

                self.assertEqual(result.shape, expected.shape)
                self.assertTrue(result.eq(expected).all())
Beispiel #26
0
    def _setup(self, config):

        # Get trial parameters
        seed = config["seed"]
        datadir = config["datadir"]
        batch_size = config["batch_size"]
        test_batch_size = config["test_batch_size"]
        first_epoch_batch_size = config["first_epoch_batch_size"]
        in_channels, h, w = config["c1_input_shape"]
        learning_rate = config["learning_rate"]
        momentum = config["momentum"]
        weight_sparsity = config["weight_sparsity"]
        boost_strength = config["boost_strength"]
        boost_strength_factor = config["boost_strength_factor"]
        n = config["n"]
        percent_on = config["percent_on"]
        cnn_percent_on = config["cnn_percent_on"]
        k_inference_factor = config["k_inference_factor"]
        kernel_size = config["kernel_size"]
        out_channels = config["out_channels"]
        output_size = config["output_size"]
        cnn_output_len = out_channels * ((w - kernel_size + 1) // 2)**2

        torch.manual_seed(seed)
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            torch.cuda.manual_seed(seed)
        else:
            self.device = torch.device("cpu")

        xforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        train_dataset = datasets.MNIST(datadir, train=True, transform=xforms)
        test_dataset = datasets.MNIST(datadir, train=False, transform=xforms)

        self.train_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=test_batch_size, shuffle=True)
        self.first_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=first_epoch_batch_size, shuffle=True)

        # Create simple sparse model
        self.model = nn.Sequential()

        # CNN layer
        self.model.add_module(
            "cnn",
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
            ),
        )

        if cnn_percent_on < 1.0:
            self.model.add_module(
                "kwinners_cnn",
                KWinners2d(
                    percent_on=cnn_percent_on,
                    channels=out_channels,
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                ),
            )
        else:
            self.model.add_module("ReLU_cnn", nn.ReLU())

        self.model.add_module("maxpool", nn.MaxPool2d(kernel_size=2))

        # Flatten max pool output before passing to linear layer
        self.model.add_module("flatten", Flatten())

        # Linear layer
        linear = nn.Linear(cnn_output_len, n)
        if weight_sparsity < 1.0:
            self.model.add_module("sparse_linear",
                                  SparseWeights(linear, weight_sparsity))
        else:
            self.model.add_module("linear", linear)

        if percent_on < 1.0:
            self.model.add_module(
                "kwinners_kinear",
                KWinners(
                    n=n,
                    percent_on=percent_on,
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                ),
            )
        else:
            self.model.add_module("Linear_ReLU", nn.ReLU())

        # Output layer
        self.model.add_module("fc", nn.Linear(n, output_size))
        self.model.add_module("softmax", nn.LogSoftmax(dim=1))

        self.model.to(self.device)
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=learning_rate,
                                   momentum=momentum)
Beispiel #27
0
    def __init__(self, config=None):
        super().__init__()

        defaults = dict(
            device="cpu",
            input_size=784,
            num_classes=10,
            hidden_sizes=[100, 100, 100],
            percent_on_k_winner=[1.0, 1.0, 1.0],
            boost_strength=[1.4, 1.4, 1.4],
            boost_strength_factor=[0.7, 0.7, 0.7],
            batch_norm=False,
            dropout=False,
            bias=True,
            k_inference_factor=1.0,
        )
        assert (
            config is None or "use_kwinners" not in config
        ), "use_kwinners is deprecated"

        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        # decide which actiovation function to use
        self.activation_funcs = []
        for layer, hidden_size in enumerate(self.hidden_sizes):
            if self.percent_on_k_winner[layer] < 0.5:
                self.activation_funcs.append(
                    KWinners(
                        hidden_size,
                        percent_on=self.percent_on_k_winner[layer],
                        boost_strength=self.boost_strength[layer],
                        boost_strength_factor=self.boost_strength_factor[layer],
                        k_inference_factor=self.k_inference_factor,
                    )
                )
            else:
                self.activation_funcs.append(nn.ReLU())

        # Construct layers.
        layers = []
        kwargs = dict(bias=self.bias, batch_norm=self.batch_norm, dropout=self.dropout)
        # Flatten image.
        layers = [nn.Flatten()]
        # Add the first layer
        layers.append(
            DSLinearBlock(
                self.input_size,
                self.hidden_sizes[0],
                activation_func=self.activation_funcs[0],
                config=config,
                **kwargs,
            )
        )
        # Add hidden layers.
        for i in range(1, len(self.hidden_sizes)):
            layers.append(
                DSLinearBlock(
                    self.hidden_sizes[i - 1],
                    self.hidden_sizes[i],
                    activation_func=self.activation_funcs[i],
                    config=config,
                    **kwargs,
                )
            )
        # Add last layer.
        layers.append(
            DSLinearBlock(
                self.hidden_sizes[-1], self.num_classes, bias=self.bias, config=config
            )
        )

        # Create the classifier.
        self.dynamic_sparse_modules = [layer[0] for layer in layers[1:]]
        self.classifier = nn.Sequential(*layers)

        # Initialize attr to decide whether to update coactivations during learning.
        self._track_coactivations = False  # Off by default.
Beispiel #28
0
    def __init__(self,
                 cnn_out_channels=(64, 64),
                 cnn_percent_on=(0.095, 0.125),
                 cnn_weight_sparsity=(0.5, 0.2),
                 linear_units=1000,
                 linear_percent_on=0.1,
                 linear_weight_sparsity=0.1,
                 boost_strength=1.5,
                 boost_strength_factor=0.9,
                 k_inference_factor=1.0,
                 duty_cycle_period=1000,
                 kwinner_local=False):
        super(GSCSparseCNN, self).__init__()
        # input_shape = (1, 32, 32)
        # First Sparse CNN layer
        if cnn_weight_sparsity[0] < 1.0:
            self.add_module(
                "cnn1",
                SparseWeights2d(nn.Conv2d(1, cnn_out_channels[0], 5),
                                weight_sparsity=cnn_weight_sparsity[0]))
        else:
            self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5))
        self.add_module("cnn1_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[0], affine=False))
        self.add_module(
            "cnn1_kwinner",
            KWinners2d(
                channels=cnn_out_channels[0],
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
                local=kwinner_local,
            ))
        self.add_module("cnn1_maxpool", nn.MaxPool2d(2))

        # Second Sparse CNN layer
        if cnn_weight_sparsity[1] < 1.0:
            self.add_module(
                "cnn2",
                SparseWeights2d(nn.Conv2d(cnn_out_channels[0],
                                          cnn_out_channels[1], 5),
                                weight_sparsity=cnn_weight_sparsity[1]))
        else:
            self.add_module(
                "cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5))
        self.add_module("cnn2_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[1], affine=False))
        self.add_module(
            "cnn2_kwinner",
            KWinners2d(
                channels=cnn_out_channels[1],
                percent_on=cnn_percent_on[1],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
                local=kwinner_local,
            ))
        self.add_module("cnn2_maxpool", nn.MaxPool2d(2))

        self.add_module("flatten", Flatten())

        # Sparse Linear layer
        self.add_module(
            "linear",
            SparseWeights(nn.Linear(25 * cnn_out_channels[1], linear_units),
                          weight_sparsity=linear_weight_sparsity))
        self.add_module("linear_bn", nn.BatchNorm1d(linear_units,
                                                    affine=False))
        self.add_module(
            "linear_kwinner",
            KWinners(n=linear_units,
                     percent_on=linear_percent_on,
                     k_inference_factor=k_inference_factor,
                     boost_strength=boost_strength,
                     boost_strength_factor=boost_strength_factor,
                     duty_cycle_period=duty_cycle_period))

        # Classifier
        self.add_module("output", nn.Linear(linear_units, 12))
        self.add_module("softmax", nn.LogSoftmax(dim=1))
Beispiel #29
0
    def _create_vgg_model(self):
        """
        block_sizes = [1,1,1] - number of CNN layers in each block
        cnn_out_channels = [c1, c2, c3] - # out_channels in each layer of this block
        cnn_kernel_size = [k1, k2, k3] - kernel_size in each layer of this block
        cnn_weight_sparsity = [w1, w2, w3] - weight sparsity of each layer of this block
        cnn_percent_on = [p1, p2, p3] - percent_on in each layer of this block
        """
        # Here we require exactly 3 blocks
        # assert(len(self.block_sizes) == 3)

        # Create simple CNN model, with options for sparsity
        self.model = nn.Sequential()

        in_channels = 3
        output_size = 32 * 32
        output_units = output_size * in_channels
        for ly, block_size in enumerate(self.block_sizes):
            for b in range(block_size):
                self._add_cnn_layer(
                    index_str=str(ly) + "_" + str(b),
                    in_channels=in_channels,
                    out_channels=self.cnn_out_channels[ly],
                    kernel_size=self.cnn_kernel_sizes[ly],
                    percent_on=self.cnn_percent_on[ly],
                    weight_sparsity=self.cnn_weight_sparsity[ly],
                    add_pooling=b == block_size - 1,
                )
                in_channels = self.cnn_out_channels[ly]
            output_size = int(output_size / 4)
            output_units = output_size * in_channels

        # Flatten CNN output before passing to linear layer
        self.model.add_module("flatten", Flatten())

        # Linear layer
        input_size = output_units
        for ly, linear_n in enumerate(self.linear_n):
            linear = nn.Linear(input_size, linear_n)
            if self.linear_weight_sparsity[ly] < 1.0:
                self.model.add_module(
                    "linear_" + str(ly),
                    SparseWeights(linear, self.linear_weight_sparsity[ly]),
                )
            else:
                self.model.add_module("linear_" + str(ly), linear)

            if self.linear_percent_on[ly] < 1.0:
                self.model.add_module(
                    "kwinners_linear_" + str(ly),
                    KWinners(
                        n=linear_n,
                        percent_on=self.linear_percent_on[ly],
                        k_inference_factor=self.k_inference_factor,
                        boost_strength=self.boost_strength,
                        boost_strength_factor=self.boost_strength_factor,
                    ),
                )
            else:
                self.model.add_module("Linear_ReLU_" + str(ly), nn.ReLU())

            input_size = self.linear_n[ly]

        # Output layer
        self.model.add_module("output", nn.Linear(input_size,
                                                  self.output_size))

        print(self.model)

        self.model.to(self.device)

        self._initialize_weights()
Beispiel #30
0
    def __init__(self, config=None):
        super().__init__()

        defaults = dict(
            device="cpu",
            input_size=1024,
            num_classes=12,
            boost_strength=[1.5, 1.5, 1.5],
            boost_strength_factor=[0.9, 0.9, 0.9],
            duty_cycle_period=1000,
            k_inference_factor=1.5,
            percent_on_k_winner=[0.095, 0.125, 0.1],
            hidden_neurons_conv=[64, 64],
            hidden_neurons_fc=1000,
            batch_norm=True,
            dropout=False,
            bias=True,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        kwargs = dict(bias=self.bias, batch_norm=self.batch_norm, dropout=self.dropout)

        # decide which actiovation function to use for conv
        self.activation_funcs = []
        for layer, hidden_size in enumerate(self.hidden_neurons_conv):
            if self.percent_on_k_winner[layer] < 0.5:
                self.activation_funcs.append(
                    KWinners2d(
                        hidden_size,
                        percent_on=self.percent_on_k_winner[layer],
                        boost_strength=self.boost_strength[layer],
                        boost_strength_factor=self.boost_strength_factor[layer],
                        k_inference_factor=self.k_inference_factor,
                    )
                )
            else:
                self.activation_funcs.append(nn.ReLU())

        # decide which activvation to use for linear
        if self.percent_on_k_winner[-1] < 0.5:
            linear_activation = KWinners(
                self.hidden_neurons_fc,
                percent_on=self.percent_on_k_winner[-1],
                boost_strength=self.boost_strength[-1],
                boost_strength_factor=self.boost_strength_factor[-1],
                k_inference_factor=self.k_inference_factor,
            )
        else:
            linear_activation = nn.ReLU()

        # linear layers
        conv_layers = [
            # 28x28 -> 14x14
            *self._conv_block(1, self.hidden_neurons_conv[0], self.activation_funcs[0]),
            # 10x10 -> 5x5
            *self._conv_block(
                self.hidden_neurons_conv[0],
                self.hidden_neurons_conv[1],
                self.activation_funcs[1],
            ),
            Flatten(),
        ]
        linear_layers = [
            DSLinearBlock(
                self.hidden_neurons_conv[1] * 25,
                self.hidden_neurons_fc,
                activation_func=linear_activation,
                batch_norm_affine=False,
                config=config,
                **kwargs,
            ),
            DSLinearBlock(self.hidden_neurons_fc, self.num_classes, config=config),
        ]

        self.features = nn.Sequential(*conv_layers)
        self.classifier = nn.Sequential(*linear_layers)