def __init__(self, config=None): super().__init__() defaults = dict( device="cpu", input_size=1024, num_classes=12, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000, use_kwinners=True, hidden_neurons_fc=207, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) # hidden layers conv_layers = [ *self._conv_block(1, 12, percent_on=0.095), # 28x28 -> 14x14 *self._conv_block(12, 12, percent_on=0.125), # 10x10 -> 5x5 Flatten(), ] linear_layers = [ # *self._linear_block(1600, 1500, percent_on= 0.067), *self._linear_block(300, self.hidden_neurons_fc, percent_on=0.1), nn.Linear(self.hidden_neurons_fc, self.num_classes), ] self.features = nn.Sequential(*conv_layers) self.classifier = nn.Sequential(*linear_layers)
def __init__(self, block_config=None, depth=100, growth_rate=12, reduction=0.5, num_classes=10, bottleneck_size=4, avg_pool_size=8): super(DenseNetCIFAR, self).__init__() # Compute blocks from depth if block_config is None: layers = (depth - 4) // 6 block_config = (layers,) * 3 # First convolution num_features = growth_rate * 2 self.add_module("conv", nn.Conv2d(in_channels=3, out_channels=num_features, kernel_size=3, padding=1, bias=False)) for i, num_layers in enumerate(block_config): block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bottleneck_size, growth_rate=growth_rate, drop_rate=0) self.add_module("block{0}".format(i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: out_features = math.floor(num_features * reduction) trans = _Transition(num_input_features=num_features, num_output_features=out_features) self.add_module("transition{0}".format(i + 1), trans) num_features = out_features # Final batch norm self.add_module("norm", nn.BatchNorm2d(num_features)) self.add_module("relu", nn.ReLU(inplace=True)) self.add_module("avg_pool", nn.AvgPool2d(kernel_size=avg_pool_size)) # classifier layer outputs = int(num_features * 16 / (avg_pool_size * avg_pool_size)) self.add_module("flatten", Flatten()) self.add_module("classifier", nn.Linear(outputs, num_classes)) # Official init from torch repo. for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
def __init__(self, cnn_out_channels=(64, 64), cnn_percent_on=(0.095, 0.125), linear_units=1000, linear_percent_on=0.1, linear_weight_sparsity=0.4, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000): super(GSCSparseCNN, self).__init__( OrderedDict([ # First Sparse CNN layer ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)), ("cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False)), ("cnn1_maxpool", nn.MaxPool2d(2)), ("cnn1_kwinner", KWinners2d(channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Second Sparse CNN layer ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)), ("cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False)), ("cnn2_maxpool", nn.MaxPool2d(2)), ("cnn2_kwinner", KWinners2d(channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), ("flatten", Flatten()), # Sparse Linear layer ("linear", SparseWeights(nn.Linear(25 * cnn_out_channels[1], linear_units), weight_sparsity=linear_weight_sparsity)), ("linear_bn", nn.BatchNorm1d(linear_units, affine=False)), ("linear_kwinner", KWinners(n=linear_units, percent_on=linear_percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Classifier ("output", nn.Linear(linear_units, 12)), ("softmax", nn.LogSoftmax(dim=1)) ]))
def simple_conv_net(): return torch.nn.Sequential( torch.nn.Conv2d(1, 3, 5), torch.nn.MaxPool2d(2), torch.nn.ReLU(), Flatten(), torch.nn.Linear(111, 3), torch.nn.ReLU(), torch.nn.Linear(3, 2) )
def __init__(self, cnn_out_channels=(32, 64), cnn_percent_on=(0.087, 0.293), linear_units=700, linear_percent_on=0.143, linear_weight_sparsity=0.3, boost_strength=1.5, boost_strength_factor=0.85, k_inference_factor=1.5, duty_cycle_period=1000): super(MNISTSparseCNN, self).__init__( OrderedDict([ # First Sparse CNN layer ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)), ("cnn1_maxpool", nn.MaxPool2d(2)), ("cnn1_kwinner", KWinners2d(channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Second Sparse CNN layer ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)), ("cnn2_maxpool", nn.MaxPool2d(2)), ("cnn2_kwinner", KWinners2d(channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), ("flatten", Flatten()), # Sparse Linear layer ("linear", SparseWeights(nn.Linear(16 * cnn_out_channels[1], linear_units), weight_sparsity=linear_weight_sparsity)), ("linear_kwinner", KWinners(n=linear_units, percent_on=linear_percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)), # Classifier ("output", nn.Linear(linear_units, 10)), ("softmax", nn.LogSoftmax(dim=1)) ]))
def __init__(self, config=None): super(VGG19, self).__init__() defaults = dict( device="gpu", input_size=784, num_classes=10, hidden_sizes=[4000, 1000, 4000], batch_norm=False, dropout=0.3, bias=False, init_weights=True, kwinners=False, percent_on=0.3, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) # define if kwinners or regular network if self.kwinners: self.pool_func = lambda: nn.AvgPool2d(kernel_size=2, stride=2) self.nonlinear_func = self._kwinners else: self.pool_func = lambda: nn.MaxPool2d(kernel_size=2, stride=2) self.nonlinear_func = lambda fout: nn.ReLU() # initialize network layers = [ *self._conv_block(3, 64), *self._conv_block(64, 64, pool=True), # 16x16 *self._conv_block(64, 128), *self._conv_block(128, 128, pool=True), # 8x8 *self._conv_block(128, 256), *self._conv_block(256, 256), *self._conv_block(256, 256), *self._conv_block(256, 256, pool=True), # 4x4 *self._conv_block(256, 512), *self._conv_block(512, 512), *self._conv_block(512, 512), *self._conv_block(512, 512, pool=True), # 2x2 *self._conv_block(512, 512), *self._conv_block(512, 512), *self._conv_block(512, 512), *self._conv_block(512, 512, pool=True), # 1x1 ] layers.append(Flatten()) layers.append(nn.Linear(512, self.num_classes)) self.classifier = nn.Sequential(*layers) if self.init_weights: self._initialize_weights()
def __init__(self, config=None): super(ResNet, self).__init__() # update config defaults = dict( depth=50, num_classes=10, percent_on_k_winner=1.0, boost_strength=1.4, boost_strength_factor=0.7, k_inference_factor=1.0, ) defaults.update(config or {}) self.__dict__.update(defaults) # adds kwinners for attr in [ "percent_on_k_winner", "boost_strength", "boost_strength_factor", "k_inference_factor", ]: if type(self.__dict__[attr]) == list: raise ValueError("""ResNet currently supports only single percentage of activations for KWinners layers""") if self.percent_on_k_winner < 0.5: self.activation_func = lambda out: self._kwinners(out) else: self.activation_func = lambda _: nn.ReLU() self.in_planes = 64 # TODO: analyze what are these attributes used for in torchvision: # self.groups, self.base_width block, num_blocks = self._config_layers() self.features = nn.Sequential( conv7x7(3, 64, stride=2), nn.BatchNorm2d(64), self.activation_func(64), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), self._make_layer(block, 64, num_blocks[0], stride=1), self._make_layer(block, 128, num_blocks[1], stride=2), self._make_layer(block, 256, num_blocks[2], stride=2), self._make_layer(block, 512, num_blocks[3], stride=2), nn.AdaptiveAvgPool2d(1), Flatten(), # TODO: see if I still need it ) self.classifier = nn.Linear(512 * block.expansion, self.num_classes)
def __init__(self, input_size=28 * 28, n_hidden_units=1000, n_classes=10, is_sparse=False, sparsity=(0.75, 0.85), percent_on=0.1): """ Initialize a 2-layer MLP :param input_size: number of input features to the MLP :type input_size: int :param n_hidden_units: number of units in each of the two hidden layers :type n_hidden_units: int :param n_classes: number of output units :type n_classes: int :param is_sparse: whether or not to initialize the sparse network instead of a dense one :type is_sparse: bool :param sparsity: a 2-element list/tuple specifying the sparsity in each of the hidden layers :type sparsity: list/tuple of float :param percent_on: number of active units in the K-Winners layer (only applies to sparse networks) :type percent_on: float """ super().__init__() self.is_sparse = is_sparse self.flatten = Flatten() self.n_classes = n_classes self.fc1 = torch.nn.Linear(input_size, n_hidden_units) self.fc2 = torch.nn.Linear(n_hidden_units, n_hidden_units) self.fc3 = torch.nn.Linear(n_hidden_units, n_classes) if is_sparse: self.fc1_sparsity, self.fc2_sparsity = sparsity self.percent_on = percent_on self.fc1 = SparseWeights(self.fc1, sparsity=self.fc1_sparsity) self.kw1 = KWinners(n=n_hidden_units, percent_on=percent_on, boost_strength=0.0) self.fc2 = SparseWeights(self.fc2, sparsity=self.fc2_sparsity) self.kw2 = KWinners(n=n_hidden_units, percent_on=percent_on, boost_strength=0.0)
def __init__(self, config=None): super(VGG19Heb, self).__init__() defaults = dict( device="gpu", input_size=784, num_classes=10, hidden_sizes=[4000, 1000, 4000], batch_norm=False, dropout=0.3, bias=False, init_weights=True, kwinners=False, percent_on=0.3, boost_strength=1.4, boost_strength_factor=0.7, hebbian_learning=True, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) # define if kwinners or regular network if self.kwinners: self.pool_func = lambda: nn.AvgPool2d(kernel_size=2, stride=2) self.nonlinear_func = self._kwinners else: self.pool_func = lambda: nn.MaxPool2d(kernel_size=2, stride=2) self.nonlinear_func = lambda fout: nn.ReLU() # initialize network layers = [ *self._conv_block(3, 64, pool=True), # 16x16 *self._conv_block(64, 64, pool=True), # 8x8 *self._conv_block(64, 128, pool=True), # 4x4 *self._conv_block(128, 256, pool=True), # 2x2 *self._conv_block(256, 512, pool=True), # 1x1 ] layers.append(Flatten()) layers.append(nn.Linear(512, self.num_classes)) self.classifier = nn.Sequential(*layers) # track the activations # should reset at the end of each round, done in the model self.correlations = [] if self.init_weights: self._initialize_weights()
def __init__(self, config=None): super(GSCHebDepreciated, self).__init__() defaults = dict( input_size=1024, num_classes=12, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000, use_kwinners=True, hidden_neurons_fc=1000, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) if self.model == "DSNNMixedHeb": self.hebbian_learning = True else: self.hebbian_learning = False # hidden layers conv_layers = [ *self._conv_block(1, 64, percent_on=0.095), # 28x28 -> 14x14 *self._conv_block(64, 64, percent_on=0.125), # 10x10 -> 5x5 ] linear_layers = [ Flatten(), # *self._linear_block(1600, 1500, percent_on= 0.067), *self._linear_block(1600, self.hidden_neurons_fc, percent_on=0.1), nn.Linear(self.hidden_neurons_fc, self.num_classes), ] # classifier (*redundancy on layers to facilitate traversing) self.layers = conv_layers + linear_layers self.features = nn.Sequential(*conv_layers) self.classifier = nn.Sequential(*linear_layers) # track correlations self.correlations = []
def __init__(self, config=None): super().__init__() defaults = dict( device="cpu", input_size=784, num_classes=10, hidden_sizes=[100, 100, 100], batch_norm=False, dropout=False, use_kwinners=False, hebbian_learning=False, bias=True, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) # decide which actiovation function to use if self.use_kwinners: self.activation_func = self._kwinners else: self.activation_func = lambda _: nn.ReLU() layers = [Flatten()] # add the first layer layers.extend(self._linear_block(self.input_size, self.hidden_sizes[0])) # all hidden layers for i in range(1, len(self.hidden_sizes)): layers.extend( self._linear_block(self.hidden_sizes[i - 1], self.hidden_sizes[i])) # last layer layers.append( nn.Linear(self.hidden_sizes[-1], self.num_classes, bias=self.bias)) # create the layers self.classifier = nn.Sequential(*layers)
def __init__( self, dpc=3, cnn_w_sparsity=0.05, linear_w_sparsity=0.5, cat_w_sparsity=0.01, n_classes=4, ): super(ToyNetwork, self).__init__() conv_channels = 128 self.n_classes = n_classes self.conv1 = SparseWeights2d( nn.Conv2d( in_channels=1, out_channels=conv_channels, kernel_size=10, padding=0, stride=1, ), cnn_w_sparsity, ) self.kwin1 = KWinners2d(conv_channels, percent_on=0.1) self.bn = nn.BatchNorm2d(conv_channels, affine=False) self.mp1 = nn.MaxPool2d(kernel_size=2) self.flatten = Flatten() self.d1 = DendriteLayer( in_dim=int(conv_channels / 64) * 7744, out_dim=1000, dendrites_per_neuron=dpc, ) self.linear = SparseWeights(nn.Linear(1000, n_classes + 1), linear_w_sparsity) self.cat = SparseWeights(nn.Linear(n_classes + 1, 1000 * dpc), cat_w_sparsity)
def __init__( self, in_channels=1, cnn_out_channels=2, linear_units=3, sparse_weights=False, ): super(SimpleCNN, self).__init__() if sparse_weights: self.add_module( "cnn1_sparse", SparseWeights2d(nn.Conv2d(in_channels, cnn_out_channels, 5), 0.5)) else: self.add_module("cnn1", nn.Conv2d(in_channels, cnn_out_channels, 5)) self.add_module("cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels, affine=False)) self.add_module("cnn1_maxpool", nn.MaxPool2d(2)) self.add_module("cnn1_relu", nn.ReLU()) # Linear layer self.add_module("flatten", Flatten()) if sparse_weights: self.add_module( "linear_sparse", SparseWeights(nn.Linear(196 * cnn_out_channels, linear_units), 0.5)) else: self.add_module("linear", nn.Linear(196 * cnn_out_channels, linear_units)) self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False)) self.add_module("linear_relu", nn.ReLU()) # Classifier layer with 12 classes self.add_module("output", nn.Linear(linear_units, 12))
def __init__(self, config=None): super().__init__() defaults = dict( device="cpu", input_size=1024, num_classes=12, boost_strength=[1.5, 1.5, 1.5], boost_strength_factor=[0.9, 0.9, 0.9], duty_cycle_period=1000, k_inference_factor=1.5, percent_on_k_winner=[0.095, 0.125, 0.1], hidden_neurons_conv=[64, 64], hidden_neurons_fc=1000, batch_norm=True, dropout=False, bias=True, ) defaults.update(config or {}) self.__dict__.update(defaults) self.device = torch.device(self.device) kwargs = dict(bias=self.bias, batch_norm=self.batch_norm, dropout=self.dropout) # decide which actiovation function to use for conv self.activation_funcs = [] for layer, hidden_size in enumerate(self.hidden_neurons_conv): if self.percent_on_k_winner[layer] < 0.5: self.activation_funcs.append( KWinners2d( hidden_size, percent_on=self.percent_on_k_winner[layer], boost_strength=self.boost_strength[layer], boost_strength_factor=self.boost_strength_factor[layer], k_inference_factor=self.k_inference_factor, ) ) else: self.activation_funcs.append(nn.ReLU()) # decide which activvation to use for linear if self.percent_on_k_winner[-1] < 0.5: linear_activation = KWinners( self.hidden_neurons_fc, percent_on=self.percent_on_k_winner[-1], boost_strength=self.boost_strength[-1], boost_strength_factor=self.boost_strength_factor[-1], k_inference_factor=self.k_inference_factor, ) else: linear_activation = nn.ReLU() # linear layers conv_layers = [ # 28x28 -> 14x14 *self._conv_block(1, self.hidden_neurons_conv[0], self.activation_funcs[0]), # 10x10 -> 5x5 *self._conv_block( self.hidden_neurons_conv[0], self.hidden_neurons_conv[1], self.activation_funcs[1], ), Flatten(), ] linear_layers = [ DSLinearBlock( self.hidden_neurons_conv[1] * 25, self.hidden_neurons_fc, activation_func=linear_activation, batch_norm_affine=False, config=config, **kwargs, ), DSLinearBlock(self.hidden_neurons_fc, self.num_classes, config=config), ] self.features = nn.Sequential(*conv_layers) self.classifier = nn.Sequential(*linear_layers)
def __init__( self, input_shape=(1, 32, 32), cnn_out_channels=(64, 64), cnn_activity_percent_on=(0.1, 0.1), cnn_weight_percent_on=(1.0, 1.0), linear_n=(1000, ), linear_activity_percent_on=(0.1, ), linear_weight_percent_on=(0.4, ), use_dendrites=False, dendrites_per_cell=5, num_classes=10, boost_strength=1.67, boost_strength_factor=0.9, duty_cycle_period=1000, k_inference_factor=1.5, use_batch_norm=True, dropout=0.0, activation_fct_before_max_pool=False, consolidated_sparse_weights=False, use_kwinners_local=False, use_softmax=True, ): super(LeSparseNet, self).__init__() # Add CNN Layers current_input_shape = input_shape cnn_layers = len(cnn_out_channels) self.dpc = dendrites_per_cell for i in range(cnn_layers): in_channels, height, width = current_input_shape # We only do consolidated weights for the second CNN layer csw = (i == 1) and consolidated_sparse_weights add_sparse_cnn_layer( network=self, suffix=i + 1, in_channels=in_channels, out_channels=cnn_out_channels[i], use_batch_norm=use_batch_norm, weight_sparsity=cnn_weight_percent_on[i], percent_on=cnn_activity_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, activation_fct_before_max_pool=activation_fct_before_max_pool, use_kwinners_local=use_kwinners_local, consolidated_sparse_weights=csw, ) # Compute next layer input shape wout = (width - 5) + 1 maxpool_width = wout // 2 current_input_shape = (cnn_out_channels[i], maxpool_width, maxpool_width) # Flatten CNN output before passing to linear layer self.add_module("flatten", Flatten()) # Add Linear layers input_size = np.prod(current_input_shape) for i in range(len(linear_n)): if use_dendrites and i == 0: add_sparse_dendrite_layer( network=self, suffix=i + 1, in_dim=input_size, out_dim=linear_n[i], dendrites_per_neuron=self.dpc, use_batch_norm=use_batch_norm, weight_sparsity=linear_weight_percent_on[i], percent_on=linear_activity_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, ) else: add_sparse_linear_layer( network=self, suffix=i + 1, input_size=input_size, linear_n=linear_n[i], dropout=dropout, use_batch_norm=use_batch_norm, weight_sparsity=linear_weight_percent_on[i], percent_on=linear_activity_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, consolidated_sparse_weights=consolidated_sparse_weights, ) input_size = linear_n[i] if use_softmax: self.add_module("softmax", nn.LogSoftmax(dim=1))
def __init__(self, depth=50, num_classes=1000, conv_layer=nn.Conv2d, conv_args=None, linear_layer=nn.Linear, linear_args=None, act_layer=default_activation_layer, act_args=None, norm_layer=nn.BatchNorm2d, norm_args=None, deprecated_compatibility_mode=False): """ :param conv_layer: A conv2d layer that receives the arguments of a nn.Conv2d and custom conv_args :type conv_layer: callable :param conv_args: A dictionary specifying extra kwargs for the conv_layer, possibly assigning different args to each layer. :type conv_args: dict or None :param linear_layer: A linear layer that receives the arguments of a nn.Linear and custom linear_args :type linear_layer: callable :param linear_args: A dictionary specifying extra kwargs for the linear_layer, possibly assigning different args to each layer. :type linear_args: dict or None :param act_layer: An activation layer that receives the number of input channels and custom linear_args :type act_layer: callable :param act_args: A dictionary specifying extra kwargs for the act_layer, possibly assigning different args to each layer. :type act_args: dict or None :param norm_layer: A normalization layer that receives the arguments of nn.BatchNorm2d and custom norm_args :type norm_layer: callable :param norm_args: A dictionary specifying extra kwargs for the norm_layer, possibly assigning different args to each layer. :type norm_args: dict or None :param deprecated_compatibility_mode: Enables behavior required by SparseResNet :type deprecated_compatibility_mode: bool """ super().__init__() assert str(depth) in cf_dict, "Resnet depth should be in {}".format( ",".join(cf_dict.keys())) block, num_blocks = cf_dict[str(depth)] conv_args = expand_args(conv_args, num_blocks, block.conv_keys) norm_args = expand_args(norm_args, num_blocks, block.norm_keys) act_args = expand_args(act_args, num_blocks, block.act_keys) linear_args = linear_args or {} if not deprecated_compatibility_mode: # Previous models expect to receive the kernel size in the # activation layer. Do this in the Bottleneck code, but discard it # by default. act_layer = discard_kernel_size(act_layer) self.quant = QuantStub() features = [ # stem ("stem", conv_layer(3, 64, kernel_size=7, stride=2, padding=3, bias=False, **conv_args["stem"])), ("bn_stem", norm_layer(64, **norm_args["stem"])), ("act_stem", act_layer(64, **act_args["stem"])), ("pool_stem", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ] # Track the previous out_channels during initialization. self.in_planes = 64 features += [ # groups 1 to 4 ("group1", self._make_group( block, 64, num_blocks[0], stride=1, conv_layer=conv_layer, conv_args=conv_args["filters64"], act_layer=act_layer, act_args=act_args["filters64"], norm_layer=norm_layer, norm_args=norm_args["filters64"])), ("group2", self._make_group( block, 128, num_blocks[1], stride=2, conv_layer=conv_layer, conv_args=conv_args["filters128"], act_layer=act_layer, act_args=act_args["filters128"], norm_layer=norm_layer, norm_args=norm_args["filters128"])), ("group3", self._make_group( block, 256, num_blocks[2], stride=2, conv_layer=conv_layer, conv_args=conv_args["filters256"], act_layer=act_layer, act_args=act_args["filters256"], norm_layer=norm_layer, norm_args=norm_args["filters256"])), ("group4", self._make_group( block, 512, num_blocks[3], stride=2, conv_layer=conv_layer, conv_args=conv_args["filters512"], act_layer=act_layer, act_args=act_args["filters512"], norm_layer=norm_layer, norm_args=norm_args["filters512"])), ("avg_pool", nn.AdaptiveAvgPool2d(1)), ("flatten", Flatten()), ] self.features = nn.Sequential(OrderedDict(features)) del self.in_planes # last output layer self.classifier = linear_layer( 512 * block.expansion, num_classes, **linear_args ) self.dequant = DeQuantStub()
def __init__( self, cnn_out_channels=(32, 64, 32), cnn_percent_on=(0.095, 0.125, 0.0925), linear_units=1600, linear_percent_on=0.1, linear_weight_sparsity=0.4, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000, ): super(GSCSparseFullCNN, self).__init__() # input_shape = (1, 32, 32) # First Sparse CNN layer self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)) self.add_module("cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False)) self.add_module("cnn1_maxpool", nn.MaxPool2d(2)) self.add_module( "cnn1_kwinner", KWinners2d( channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, ), ) # Second Sparse CNN layer self.add_module("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)) self.add_module("cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False)) self.add_module("cnn2_maxpool", nn.MaxPool2d(2)) self.add_module( "cnn2_kwinner", KWinners2d( channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, ), ) # # Third Sparse CNN layer # self.add_module("cnn3", # nn.Conv2d(cnn_out_channels[1], cnn_out_channels[2], 5)) # self.add_module("cnn3_batchnorm", # nn.BatchNorm2d(cnn_out_channels[2], affine=False)) # # self.add_module("cnn3_maxpool", nn.MaxPool2d(2)) # self.add_module("cnn3_kwinner", KWinners2d( # channels=cnn_out_channels[2], # percent_on=cnn_percent_on[2], # k_inference_factor=k_inference_factor, # boost_strength=boost_strength, # boost_strength_factor=boost_strength_factor, # duty_cycle_period=duty_cycle_period)) self.add_module("flatten", Flatten()) # # Sparse Linear layer # self.add_module("linear", SparseWeights( # nn.Linear(25 * cnn_out_channels[1], linear_units), # weight_sparsity=linear_weight_sparsity)) # self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False)) # self.add_module("linear_kwinner", KWinners( # n=linear_units, # percent_on=linear_percent_on, # k_inference_factor=k_inference_factor, # boost_strength=boost_strength, # boost_strength_factor=boost_strength_factor, # duty_cycle_period=duty_cycle_period)) # Classifier self.add_module("output", nn.Linear(1600, 12)) self.add_module("softmax", nn.LogSoftmax(dim=1))
def setup_model(in_features, out_features): return torch.nn.Sequential(Flatten(), torch.nn.Linear(in_features, out_features))
def __init__( self, input_shape=(1, 32, 32), cnn_out_channels=(64, 64), cnn_activity_percent_on=(0.1, 0.1), cnn_weight_percent_on=(1.0, 1.0), linear_n=(1000, ), linear_activity_percent_on=(0.1, ), linear_weight_percent_on=(0.4, ), num_classes=10, temperature=10.0, eval_temperature=1.0, temperature_decay_rate=0.99, k_inference_factor=1.5, use_batch_norm=True, dropout=0.0, activation_fct_before_max_pool=False, consolidated_sparse_weights=False, use_softmax=True, ): super(SampledKWinnerLeSparseNet, self).__init__() # Add CNN Layers current_input_shape = input_shape cnn_layers = len(cnn_out_channels) for i in range(cnn_layers): in_channels, height, width = current_input_shape # We only do consolidated weights for the second CNN layer csw = (i == 1) and consolidated_sparse_weights add_sparse_cnn_layer( network=self, suffix=i + 1, in_channels=in_channels, out_channels=cnn_out_channels[i], use_batch_norm=use_batch_norm, weight_sparsity=cnn_weight_percent_on[i], percent_on=cnn_activity_percent_on[i], k_inference_factor=k_inference_factor, temperature=temperature, eval_temperature=eval_temperature, temperature_decay_rate=temperature_decay_rate, activation_fct_before_max_pool=activation_fct_before_max_pool, consolidated_sparse_weights=csw) # Compute next layer input shape wout = (width - 5) + 1 maxpool_width = wout // 2 current_input_shape = (cnn_out_channels[i], maxpool_width, maxpool_width) # Flatten CNN output before passing to linear layer self.add_module("flatten", Flatten()) # Add Linear layers input_size = np.prod(current_input_shape) for i in range(len(linear_n)): add_sparse_linear_layer( network=self, suffix=i + 1, input_size=input_size, linear_n=linear_n[i], dropout=dropout, use_batch_norm=use_batch_norm, weight_sparsity=linear_weight_percent_on[i], percent_on=linear_activity_percent_on[i], k_inference_factor=k_inference_factor, temperature=temperature, eval_temperature=eval_temperature, temperature_decay_rate=temperature_decay_rate, consolidated_sparse_weights=consolidated_sparse_weights, ) input_size = linear_n[i] # Classifier self.add_module("output", nn.Linear(input_size, num_classes)) if use_softmax: self.add_module("softmax", nn.LogSoftmax(dim=1))
def __init__(self, config): """Called once at the beginning of each experiment.""" super(MNISTSparseExperiment, self).__init__() self.start_time = time.time() self.logger = get_logger(config["name"], config.get("verbose", 2)) self.logger.debug("Config: %s", config) # Setup random seed seed = config["seed"] set_random_seed(seed) self.data_dir = config["data_dir"] self.batch_size = config["batch_size"] self.test_batch_size = config["test_batch_size"] self.first_epoch_batch_size = config["first_epoch_batch_size"] self.validation = config.get("validation", 50000.0 / 60000.0) self.learning_rate_factor = config["learning_rate_factor"] self.lr_scheduler_params = config.get("lr_scheduler_params", None) self._configure_dataloaders() # Configure Model cnn_input_shape = config.get("cnn_input_shape", (1, 28, 28)) linear_n = config["linear_n"] linear_percent_on = config["linear_percent_on"] cnn_out_channels = config["cnn_out_channels"] cnn_percent_on = config["cnn_percent_on"] boost_strength = config["boost_strength"] weight_sparsity = config["weight_sparsity"] cnn_weight_sparsity = config["cnn_weight_sparsity"] boost_strength_factor = config["boost_strength_factor"] k_inference_factor = config["k_inference_factor"] use_batch_norm = config["use_batch_norm"] dropout = config.get("dropout", 0.0) model = nn.Sequential() # Add CNN Layers input_shape = cnn_input_shape cnn_layers = len(cnn_out_channels) if cnn_layers > 0: for i in range(cnn_layers): in_channels, height, width = input_shape add_sparse_cnn_layer( network=model, suffix=i + 1, in_channels=in_channels, out_channels=cnn_out_channels[i], use_batch_norm=use_batch_norm, weight_sparsity=cnn_weight_sparsity, percent_on=cnn_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ) # Feed this layer output into next layer input in_channels = cnn_out_channels[i] # Compute next layer input shape wout = (width - 5) + 1 maxpool_width = wout // 2 input_shape = (in_channels, maxpool_width, maxpool_width) # Flatten CNN output before passing to linear layer model.add_module("flatten", Flatten()) # Add Linear layers input_size = np.prod(input_shape) for i in range(len(linear_n)): add_sparse_linear_layer( network=model, suffix=i + 1, input_size=input_size, linear_n=linear_n[i], dropout=dropout, use_batch_norm=False, weight_sparsity=weight_sparsity, percent_on=linear_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ) input_size = linear_n[i] # Output layer model.add_module("output", nn.Linear(input_size, 10)) model.add_module("softmax", nn.LogSoftmax(dim=1)) if torch.cuda.is_available(): self.device = torch.device("cuda") model = model.cuda() else: self.device = torch.device("cpu") if torch.cuda.device_count() > 1: self.logger.debug("Using", torch.cuda.device_count(), "GPUs") model = torch.nn.DataParallel(model) self.model = model self.logger.debug("Model: %s", self.model) self.learning_rate = config["learning_rate"] self.momentum = config["momentum"] self.batches_in_epoch = config["batches_in_epoch"] self.batches_in_first_epoch = config["batches_in_first_epoch"] self.config = config self.optimizer = self._create_optimizer(name=config["optimizer"], model=self.model) self.lr_scheduler = self._create_learning_rate_scheduler( name=config.get("lr_scheduler", None), optimizer=self.optimizer)
def __init__( self, input_shape, block_sizes, cnn_out_channels, cnn_kernel_sizes, cnn_weight_sparsity, cnn_percent_on, linear_units, linear_weight_sparsity, linear_percent_on, k_inference_factor, boost_strength, boost_strength_factor, use_max_pooling, num_classes, ): super(VGGSparseNet, self).__init__() in_channels, h, w = input_shape output_size = h * w output_units = output_size * in_channels for l, block_size in enumerate(block_sizes): for b in range(block_size): self._add_cnn_layer( index_str=str(l) + "_" + str(b), in_channels=in_channels, out_channels=cnn_out_channels[l], kernel_size=cnn_kernel_sizes[l], percent_on=cnn_percent_on[l], weight_sparsity=cnn_weight_sparsity[l], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, add_pooling=b == block_size - 1, use_max_pooling=use_max_pooling, ) in_channels = cnn_out_channels[l] output_size = int(output_size / 4) output_units = output_size * in_channels # Flatten CNN output before passing to linear layer self.add_module("flatten", Flatten()) # Linear layer input_size = output_units for l, linear_n in enumerate(linear_units): linear = nn.Linear(input_size, linear_n) if linear_weight_sparsity[l] < 1.0: self.add_module( "linear_" + str(l), SparseWeights(linear, linear_weight_sparsity[l]), ) else: self.add_module("linear_" + str(l), linear) if linear_percent_on[l] < 1.0: self.add_module( "kwinners_linear_" + str(l), KWinners( n=linear_n, percent_on=linear_percent_on[l], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ), ) else: self.add_module("Linear_ReLU_" + str(l), nn.ReLU()) input_size = linear_n # Output layer self.add_module("output", nn.Linear(input_size, num_classes)) self._initialize_weights()
def __init__(self, config=None): super(ResNet, self).__init__() # update config defaults = dict( depth=50, num_classes=1000, linear_sparse_weights_type="SparseWeights", conv_sparse_weights_type="SparseWeights2d", defaults_sparse=False, layer_params_type=None, # Sub-classed from `LayerParams`. # To be passed to layer_params_type: layer_params_kwargs=None, linear_params_func=None, conv_params_func=None, activation_params_func=None, ) defaults.update(config or {}) self.__dict__.update(defaults) if isinstance(self.linear_sparse_weights_type, str): self.linear_sparse_weights_type = getattr( nupic_modules, self.linear_sparse_weights_type) if isinstance(self.conv_sparse_weights_type, str): self.conv_sparse_weights_type = getattr( nupic_modules, self.conv_sparse_weights_type) if self.defaults_sparse: if self.conv_params_func is None: self.conv_params_func = auto_sparse_conv_params if self.activation_params_func is None: self.activation_params_func = auto_sparse_activation_params if not hasattr(self, "sparse_params"): self.sparse_params = default_resnet_params( *cf_dict[str(self.depth)], layer_params_type=self.layer_params_type, layer_params_kwargs=self.layer_params_kwargs, linear_params_func=self.linear_params_func, conv_params_func=self.conv_params_func, activation_params_func=self.activation_params_func, ) self.in_planes = 64 block, num_blocks = self._config_layers() self.features = nn.Sequential( # stem conv_layer( "7x7", 3, 64, self.sparse_params["stem"], sparse_weights_type=self.conv_sparse_weights_type, stride=2, ), nn.BatchNorm2d(64), activation_layer(64, self.sparse_params["stem"], kernel_size=7), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # groups 1 to 4 self._make_group( block, 64, num_blocks[0], self.sparse_params["filters64"], stride=1 ), self._make_group( block, 128, num_blocks[1], self.sparse_params["filters128"], stride=2 ), self._make_group( block, 256, num_blocks[2], self.sparse_params["filters256"], stride=2 ), self._make_group( block, 512, num_blocks[3], self.sparse_params["filters512"], stride=2 ), nn.AdaptiveAvgPool2d(1), Flatten(), ) # last output layer self.classifier = linear_layer( 512 * block.expansion, self.num_classes, self.sparse_params["linear"], self.linear_sparse_weights_type, )
def __init__(self, config=None): config = config or {} defaults = dict( input_size=(1, 32, 32), l0_strength=7e-4, l2_strength=0, droprate_init=0.5, temperature=2 / 3, learn_weight=True, num_classes=12, cnn_out_channels=(64, 64), kernel_size=5, linear_units=1000, maxpool_stride=2, ) new_defaults = { k: (config.get(k, None) or v) for k, v in defaults.items() } self.__dict__.update(new_defaults) feature_map_sidelength = (( ((self.input_size[1] - self.kernel_size + 1) / self.maxpool_stride) - self.kernel_size + 1) / self.maxpool_stride) assert (feature_map_sidelength == int(feature_map_sidelength)) feature_map_sidelength = int(feature_map_sidelength) l0_strengths = [self.l0_strength] * 4 super().__init__( OrderedDict([ # ------------- # Conv Block # ------------- ("cnn1", HardConcreteGatedConv2d(self.input_size[0], self.cnn_out_channels[0], self.kernel_size, droprate_init=self.droprate_init, temperature=self.temperature, l2_strength=self.l2_strength, l0_strength=l0_strengths[0], learn_weight=self.learn_weight)), ("cnn1_bn", nn.BatchNorm2d(self.cnn_out_channels[0], affine=False)), ("cnn1_maxpool", nn.MaxPool2d(self.maxpool_stride)), ("cnn1_relu", nn.ReLU()), # ------------- # Conv Block # ------------- ("cnn2", HardConcreteGatedConv2d(self.cnn_out_channels[0], self.cnn_out_channels[1], self.kernel_size, droprate_init=self.droprate_init, temperature=self.temperature, l2_strength=self.l2_strength, l0_strength=l0_strengths[1], learn_weight=self.learn_weight)), ("cnn2_bn", nn.BatchNorm2d(self.cnn_out_channels[1], affine=False)), ("cnn2_maxpool", nn.MaxPool2d(self.maxpool_stride)), ("cnn2_relu", nn.ReLU()), ("flatten", Flatten()), # ------------- # Linear Block # ------------- ("fc1", HardConcreteGatedLinear( (feature_map_sidelength**2) * self.cnn_out_channels[1], self.linear_units, droprate_init=self.droprate_init, l2_strength=self.l2_strength, l0_strength=l0_strengths[2], temperature=self.temperature, learn_weight=self.learn_weight)), ("fc1_bn", nn.BatchNorm1d(self.linear_units, affine=False)), ("fc1_relu", nn.ReLU()), # ------------- # Output Layer # ------------- ("fc2", HardConcreteGatedLinear(self.linear_units, self.num_classes, droprate_init=self.droprate_init, l2_strength=self.l2_strength, l0_strength=l0_strengths[3], temperature=self.temperature, learn_weight=self.learn_weight)), ]))
def __init__(self, num_classes=1001, width_mult=1.0): """Inspired by https://github.com/kuangliu/pytorch- cifar/blob/master/models/mobilenet.py. :param num_classes: Number of output classes (10 for CIFAR10) :param width_mult: Width multiplier, used to thin the network """ super(MobileNetV1, self).__init__() # Check for CIFAR10 if num_classes == 10: first_stride = 1 avgpool_size = 2 else: first_stride = 2 avgpool_size = 7 # First 3x3 convolution layer self.conv = nn.Sequential( nn.Conv2d( in_channels=3, out_channels=int(32 * width_mult), kernel_size=3, stride=first_stride, padding=1, bias=False, ), nn.BatchNorm2d(int(32 * width_mult)), nn.ReLU(True), ) # Depthwise Separable Convolution layers self.deepwise = nn.Sequential( separable_convolution2d( in_channels=32, out_channels=64, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=64, out_channels=128, stride=2, width_mult=width_mult ), separable_convolution2d( in_channels=128, out_channels=128, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=128, out_channels=256, stride=2, width_mult=width_mult ), separable_convolution2d( in_channels=256, out_channels=256, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=256, out_channels=512, stride=2, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=512, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=512, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=512, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=512, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=512, stride=1, width_mult=width_mult ), separable_convolution2d( in_channels=512, out_channels=1024, stride=2, width_mult=width_mult ), separable_convolution2d( in_channels=1024, out_channels=1024, stride=1, width_mult=width_mult ), ) # Classifier self.classifier = nn.Sequential( nn.AvgPool2d(avgpool_size), Flatten(), nn.Linear(in_features=int(1024 * width_mult), out_features=num_classes), )
def _setup(self, config): l0_strength = config["l0_strength"] l2_strength = config["l2_strength"] data_path = os.path.expanduser("~/nta/datasets") batch_size = 100 transform = transforms.Compose([transforms.ToTensor()]) self.train_loader = torch.utils.data.DataLoader( datasets.MNIST(data_path, train=True, download=True, transform=transform), batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=torch.cuda.is_available()) self.val_loader = torch.utils.data.DataLoader( datasets.MNIST(data_path, train=False, transform=transform), batch_size=batch_size, num_workers=4, pin_memory=torch.cuda.is_available()) num_classes = 10 input_size = (1, 28, 28) conv_dims = (20, 50) fc_dims = 500 l0_strengths = (l0_strength, l0_strength, l0_strength, l0_strength) kernel_sidelength = 5 maxpool_stride = 2 feature_map_sidelength = ((( (input_size[1] - kernel_sidelength + 1) / maxpool_stride) - kernel_sidelength + 1) / maxpool_stride) assert (feature_map_sidelength == int(feature_map_sidelength)) feature_map_sidelength = int(feature_map_sidelength) model_type = config["model_type"] learn_weight = config["learn_weight"] if model_type == "HardConcrete": temperature = 2 / 3 self.model = nn.Sequential( OrderedDict([ ("cnn1", HardConcreteGatedConv2d(input_size[0], conv_dims[0], kernel_sidelength, droprate_init=0.5, temperature=temperature, l2_strength=l2_strength, l0_strength=l0_strengths[0], learn_weight=learn_weight)), ("cnn1_relu", nn.ReLU()), ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)), ("cnn2", HardConcreteGatedConv2d(conv_dims[0], conv_dims[1], kernel_sidelength, droprate_init=0.5, temperature=temperature, l2_strength=l2_strength, l0_strength=l0_strengths[1], learn_weight=learn_weight)), ("cnn2_relu", nn.ReLU()), ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)), ("flatten", Flatten()), ("fc1", HardConcreteGatedLinear( (feature_map_sidelength**2) * conv_dims[1], fc_dims, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[2], temperature=temperature, learn_weight=learn_weight)), ("fc1_relu", nn.ReLU()), ("fc2", HardConcreteGatedLinear(fc_dims, num_classes, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[3], temperature=temperature, learn_weight=learn_weight)), ])) elif model_type == "Binary": self.model = nn.Sequential( OrderedDict([ ("cnn1", BinaryGatedConv2d(input_size[0], conv_dims[0], kernel_sidelength, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[0], learn_weight=learn_weight)), ("cnn1_relu", nn.ReLU()), ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)), ("cnn2", BinaryGatedConv2d(conv_dims[0], conv_dims[1], kernel_sidelength, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[1], learn_weight=learn_weight)), ("cnn2_relu", nn.ReLU()), ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)), ("flatten", Flatten()), ("fc1", BinaryGatedLinear( (feature_map_sidelength**2) * conv_dims[1], fc_dims, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[2], learn_weight=learn_weight)), ("fc1_relu", nn.ReLU()), ("fc2", BinaryGatedLinear(fc_dims, num_classes, droprate_init=0.5, l2_strength=l2_strength, l0_strength=l0_strengths[3], learn_weight=learn_weight)), ])) else: raise ValueError("Unrecognized model type: {}".format(model_type)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(device) self.device = device self.loglike = nn.CrossEntropyLoss().to(self.device) self.optimizer = torch.optim.Adam(self.model.parameters(), config["lr"])
def __init__( self, cnn_out_channels=(64, 64), cnn_percent_on=(0.095, 0.125), cnn_weight_sparsity=None, linear_units=1000, linear_percent_on=0.1, linear_weight_sparsity=None, temperature=10.0, eval_temperature=1.0, temperature_decay_rate=0.99, k_inference_factor=1.0, cnn_sparsity=(0.5, 0.8), linear_sparsity=0.9, ): super(SampledKWinnerGSCSparseCNN, self).__init__() if cnn_weight_sparsity is not None: warnings.warn( "Parameter `cnn_weight_sparsity` is deprecated. Use " "`cnn_sparsity` instead.", DeprecationWarning, ) cnn_sparsity = (1.0 - cnn_weight_sparsity[0], 1.0 - cnn_weight_sparsity[1]) if linear_weight_sparsity is not None: warnings.warn( "Parameter `linear_weight_sparsity` is deprecated. Use " "`linear_sparsity` instead.", DeprecationWarning, ) linear_sparsity = 1.0 - linear_weight_sparsity # input_shape = (1, 32, 32) # First Sparse CNN layer if cnn_sparsity[0] > 0: self.add_module( "cnn1", SparseWeights2d( nn.Conv2d(1, cnn_out_channels[0], 5), sparsity=cnn_sparsity[0] ), ) else: self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)) self.add_module( "cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False) ) self.add_module( "cnn1_kwinner", SampledKWinners2d( percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, temperature=temperature, eval_temperature=eval_temperature, temperature_decay_rate=temperature_decay_rate, relu=False, ), ) self.add_module("cnn1_maxpool", nn.MaxPool2d(2)) # Second Sparse CNN layer if cnn_sparsity[1] > 0: self.add_module( "cnn2", SparseWeights2d( nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5), sparsity=cnn_sparsity[1], ), ) else: self.add_module( "cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5) ) self.add_module( "cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False) ) self.add_module( "cnn2_kwinner", SampledKWinners2d( percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, temperature=temperature, eval_temperature=eval_temperature, temperature_decay_rate=temperature_decay_rate, relu=False, ), ) self.add_module("cnn2_maxpool", nn.MaxPool2d(2)) self.add_module("flatten", Flatten()) # Sparse Linear layer self.add_module( "linear", SparseWeights( nn.Linear(25 * cnn_out_channels[1], linear_units), sparsity=linear_sparsity, ), ) self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False)) self.add_module( "linear_kwinner", SampledKWinners( percent_on=linear_percent_on, k_inference_factor=k_inference_factor, temperature=temperature, eval_temperature=eval_temperature, temperature_decay_rate=temperature_decay_rate, relu=False, ), ) # Classifier self.add_module("output", nn.Linear(linear_units, 12)) self.add_module("softmax", nn.LogSoftmax(dim=1))
def __init__(self, cnn_out_channels=(64, 64), cnn_percent_on=(0.095, 0.125), cnn_weight_sparsity=(0.5, 0.2), linear_units=1000, linear_percent_on=0.1, linear_weight_sparsity=0.1, boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.0, duty_cycle_period=1000, kwinner_local=False): super(GSCSparseCNN, self).__init__() # input_shape = (1, 32, 32) # First Sparse CNN layer if cnn_weight_sparsity[0] < 1.0: self.add_module( "cnn1", SparseWeights2d(nn.Conv2d(1, cnn_out_channels[0], 5), weight_sparsity=cnn_weight_sparsity[0])) else: self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)) self.add_module("cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False)) self.add_module( "cnn1_kwinner", KWinners2d( channels=cnn_out_channels[0], percent_on=cnn_percent_on[0], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, local=kwinner_local, )) self.add_module("cnn1_maxpool", nn.MaxPool2d(2)) # Second Sparse CNN layer if cnn_weight_sparsity[1] < 1.0: self.add_module( "cnn2", SparseWeights2d(nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5), weight_sparsity=cnn_weight_sparsity[1])) else: self.add_module( "cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)) self.add_module("cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False)) self.add_module( "cnn2_kwinner", KWinners2d( channels=cnn_out_channels[1], percent_on=cnn_percent_on[1], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period, local=kwinner_local, )) self.add_module("cnn2_maxpool", nn.MaxPool2d(2)) self.add_module("flatten", Flatten()) # Sparse Linear layer self.add_module( "linear", SparseWeights(nn.Linear(25 * cnn_out_channels[1], linear_units), weight_sparsity=linear_weight_sparsity)) self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False)) self.add_module( "linear_kwinner", KWinners(n=linear_units, percent_on=linear_percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period)) # Classifier self.add_module("output", nn.Linear(linear_units, 12)) self.add_module("softmax", nn.LogSoftmax(dim=1))
def __init__(self, config): """Called once at the beginning of each experiment.""" self.start_time = time.time() self.logger = get_logger(config["name"], config.get("verbose", 2)) self.logger.debug("Config: %s", config) # Setup random seed seed = config["seed"] set_random_seed(seed) # Get our directories correct self.data_dir = config["data_dir"] # Configure Model self.model_type = config["model_type"] self.num_classes = 12 self.log_interval = config["log_interval"] self.batches_in_epoch = config["batches_in_epoch"] self.batch_size = config["batch_size"] self.background_noise_dir = config["background_noise_dir"] self.noise_values = [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5] cnn_input_shape = config.get("cnn_input_shape", (1, 32, 32)) linear_n = config["linear_n"] linear_percent_on = config["linear_percent_on"] cnn_out_channels = config["cnn_out_channels"] cnn_percent_on = config["cnn_percent_on"] boost_strength = config["boost_strength"] weight_sparsity = config["weight_sparsity"] cnn_weight_sparsity = config["cnn_weight_sparsity"] boost_strength_factor = config["boost_strength_factor"] k_inference_factor = config["k_inference_factor"] use_batch_norm = config["use_batch_norm"] dropout = config.get("dropout", 0.0) self.load_datasets() model = nn.Sequential() if self.model_type == "cnn": # Add CNN Layers input_shape = cnn_input_shape cnn_layers = len(cnn_out_channels) if cnn_layers > 0: for i in range(cnn_layers): in_channels, height, width = input_shape add_sparse_cnn_layer( network=model, suffix=i + 1, in_channels=in_channels, out_channels=cnn_out_channels[i], use_batch_norm=use_batch_norm, weight_sparsity=cnn_weight_sparsity, percent_on=cnn_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ) # Feed this layer output into next layer input in_channels = cnn_out_channels[i] # Compute next layer input shape wout = (width - 5) + 1 maxpool_width = wout // 2 input_shape = (in_channels, maxpool_width, maxpool_width) # Flatten CNN output before passing to linear layer model.add_module("flatten", Flatten()) # Add Linear layers input_size = np.prod(input_shape) for i in range(len(linear_n)): add_sparse_linear_layer( network=model, suffix=i + 1, input_size=input_size, linear_n=linear_n[i], dropout=dropout, use_batch_norm=use_batch_norm, weight_sparsity=weight_sparsity, percent_on=linear_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ) input_size = linear_n[i] # Output layer model.add_module( "output", nn.Linear(input_size, self.num_classes) ) model.add_module("softmax", nn.LogSoftmax(dim=1)) elif self.model_type == "resnet9": model = resnet9( num_classes=self.num_classes, in_channels=1 ) elif self.model_type == "gsc_sparse_cnn": model = GSCSparseCNN() elif self.model_type == "gsc_super_sparse_cnn": model = GSCSuperSparseCNN() else: raise RuntimeError("Unknown model type") self.use_cuda = torch.cuda.is_available() self.logger.debug("use_cuda %s", self.use_cuda) if self.use_cuda: self.device = torch.device("cuda") model = model.cuda() else: self.device = torch.device("cpu") self.logger.debug("device %s", self.device) if torch.cuda.device_count() > 1: self.logger.debug("Using %s GPUs", torch.cuda.device_count()) model = torch.nn.DataParallel(model) self.model = model self.logger.debug("Model: %s", self.model) self.learning_rate = config["learning_rate"] self.optimizer = self.create_optimizer(config, self.model) self.lr_scheduler = self.create_learning_rate_scheduler(config, self.optimizer)
def _create_vgg_model(self): """ block_sizes = [1,1,1] - number of CNN layers in each block cnn_out_channels = [c1, c2, c3] - # out_channels in each layer of this block cnn_kernel_size = [k1, k2, k3] - kernel_size in each layer of this block cnn_weight_sparsity = [w1, w2, w3] - weight sparsity of each layer of this block cnn_percent_on = [p1, p2, p3] - percent_on in each layer of this block """ # Here we require exactly 3 blocks # assert(len(self.block_sizes) == 3) # Create simple CNN model, with options for sparsity self.model = nn.Sequential() in_channels = 3 output_size = 32 * 32 output_units = output_size * in_channels for ly, block_size in enumerate(self.block_sizes): for b in range(block_size): self._add_cnn_layer( index_str=str(ly) + "_" + str(b), in_channels=in_channels, out_channels=self.cnn_out_channels[ly], kernel_size=self.cnn_kernel_sizes[ly], percent_on=self.cnn_percent_on[ly], weight_sparsity=self.cnn_weight_sparsity[ly], add_pooling=b == block_size - 1, ) in_channels = self.cnn_out_channels[ly] output_size = int(output_size / 4) output_units = output_size * in_channels # Flatten CNN output before passing to linear layer self.model.add_module("flatten", Flatten()) # Linear layer input_size = output_units for ly, linear_n in enumerate(self.linear_n): linear = nn.Linear(input_size, linear_n) if self.linear_weight_sparsity[ly] < 1.0: self.model.add_module( "linear_" + str(ly), SparseWeights(linear, self.linear_weight_sparsity[ly]), ) else: self.model.add_module("linear_" + str(ly), linear) if self.linear_percent_on[ly] < 1.0: self.model.add_module( "kwinners_linear_" + str(ly), KWinners( n=linear_n, percent_on=self.linear_percent_on[ly], k_inference_factor=self.k_inference_factor, boost_strength=self.boost_strength, boost_strength_factor=self.boost_strength_factor, ), ) else: self.model.add_module("Linear_ReLU_" + str(ly), nn.ReLU()) input_size = self.linear_n[ly] # Output layer self.model.add_module("output", nn.Linear(input_size, self.output_size)) print(self.model) self.model.to(self.device) self._initialize_weights()
def _setup(self, config): # Get trial parameters seed = config["seed"] datadir = config["datadir"] batch_size = config["batch_size"] test_batch_size = config["test_batch_size"] first_epoch_batch_size = config["first_epoch_batch_size"] in_channels, h, w = config["c1_input_shape"] learning_rate = config["learning_rate"] momentum = config["momentum"] weight_sparsity = config["weight_sparsity"] boost_strength = config["boost_strength"] boost_strength_factor = config["boost_strength_factor"] n = config["n"] percent_on = config["percent_on"] cnn_percent_on = config["cnn_percent_on"] k_inference_factor = config["k_inference_factor"] kernel_size = config["kernel_size"] out_channels = config["out_channels"] output_size = config["output_size"] cnn_output_len = out_channels * ((w - kernel_size + 1) // 2)**2 torch.manual_seed(seed) if torch.cuda.is_available(): self.device = torch.device("cuda") torch.cuda.manual_seed(seed) else: self.device = torch.device("cpu") xforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) train_dataset = datasets.MNIST(datadir, train=True, transform=xforms) test_dataset = datasets.MNIST(datadir, train=False, transform=xforms) self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) self.test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=test_batch_size, shuffle=True) self.first_loader = torch.utils.data.DataLoader( train_dataset, batch_size=first_epoch_batch_size, shuffle=True) # Create simple sparse model self.model = nn.Sequential() # CNN layer self.model.add_module( "cnn", nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, ), ) if cnn_percent_on < 1.0: self.model.add_module( "kwinners_cnn", KWinners2d( percent_on=cnn_percent_on, channels=out_channels, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ), ) else: self.model.add_module("ReLU_cnn", nn.ReLU()) self.model.add_module("maxpool", nn.MaxPool2d(kernel_size=2)) # Flatten max pool output before passing to linear layer self.model.add_module("flatten", Flatten()) # Linear layer linear = nn.Linear(cnn_output_len, n) if weight_sparsity < 1.0: self.model.add_module("sparse_linear", SparseWeights(linear, weight_sparsity)) else: self.model.add_module("linear", linear) if percent_on < 1.0: self.model.add_module( "kwinners_kinear", KWinners( n=n, percent_on=percent_on, k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, ), ) else: self.model.add_module("Linear_ReLU", nn.ReLU()) # Output layer self.model.add_module("fc", nn.Linear(n, output_size)) self.model.add_module("softmax", nn.LogSoftmax(dim=1)) self.model.to(self.device) self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, momentum=momentum)