def add_layer(self, layer): from snntoolbox.parsing.utils import get_type spike_layer_name = getattr(self.sim, 'Spike' + get_type(layer)) # noinspection PyProtectedMember inbound = layer._inbound_nodes[0].inbound_layers if not isinstance(inbound, (list, tuple)): inbound = [inbound] inbound = [self._spiking_layers[inb.name] for inb in inbound] if len(inbound) == 1: inbound = inbound[0] layer_kwargs = layer.get_config() layer_kwargs['config'] = self.config # Check if layer uses binary activations. In that case, we will want to # tell the following to MaxPool layer because then we can use a # cheaper operation. if 'Conv' in layer.name and 'binary' in layer.activation.__name__: self._binary_activation = layer.activation.__name__ if 'MaxPool' in layer.name and self._binary_activation is not None: layer_kwargs['activation'] = self._binary_activation self._binary_activation = None # Replace activation from kwargs by 'linear' before initializing # superclass, because the relu activation is applied by the spike- # generation mechanism automatically. In some cases (quantized # activation), we need to apply the activation manually. This # information is taken from the 'activation' key during conversion. activation_str = str(layer_kwargs.pop(str('activation'), None)) spike_layer = spike_layer_name(**layer_kwargs) spike_layer.activation_str = activation_str spike_layer.is_first_spiking = \ len(get_inbound_layers_with_params(layer)) == 0 self._spiking_layers[layer.name] = spike_layer(inbound)
def normalize_parameters(model, config, **kwargs): """Normalize the parameters of a network. The parameters of each layer are normalized with respect to the maximum activation, or the ``n``-th percentile of activations. Generates plots of the activity- and weight-distribution before and after normalization. Note that plotting the activity-distribution can be very time- and memory-consuming for larger networks. """ import json from collections import OrderedDict from snntoolbox.parsing.utils import get_inbound_layers_with_params print("Normalizing parameters...") norm_dir = kwargs[str('path')] if 'path' in kwargs else \ os.path.join(config.get('paths', 'log_dir_of_current_run'), 'normalization') activ_dir = os.path.join(norm_dir, 'activations') if not os.path.exists(activ_dir): os.makedirs(activ_dir) # Store original weights for later plotting if not os.path.isfile(os.path.join(activ_dir, 'weights.npz')): weights = {} for layer in model.layers: w = layer.get_weights() if len(w) > 0: weights[layer.name] = w[0] np.savez_compressed(os.path.join(activ_dir, 'weights.npz'), **weights) # Either load scale factors from disk, or get normalization data set to # calculate them. x_norm = None if 'scale_facs' in kwargs: scale_facs = kwargs[str('scale_facs')] elif 'x_norm' in kwargs or 'dataflow' in kwargs: if 'x_norm' in kwargs: x_norm = kwargs[str('x_norm')] elif 'dataflow' in kwargs: x_norm, y = kwargs[str('dataflow')].next() print("Using {} samples for normalization.".format(len(x_norm))) sizes = [ len(x_norm) * np.array(layer.output_shape[1:]).prod() * 32 / (8 * 1e9) for layer in model.layers if len(layer.weights) > 0 ] size_str = ['{:.2f}'.format(s) for s in sizes] print("INFO: Need {} GB for layer activations.\n".format(size_str) + "May have to reduce size of data set used for normalization.") scale_facs = OrderedDict({model.layers[0].name: 1}) else: import warnings warnings.warn( "Scale factors or normalization data set could not be " "loaded. Proceeding without normalization.", RuntimeWarning) return batch_size = config.getint('simulation', 'batch_size') # If scale factors have not been computed in a previous run, do so now. if len(scale_facs) == 1: i = 0 sparsity = [] for layer in model.layers: # Skip if layer has no parameters if len(layer.weights) == 0: continue activations = try_reload_activations(layer, model, x_norm, batch_size, activ_dir) nonzero_activations = activations[np.nonzero(activations)] sparsity.append(1 - nonzero_activations.size / activations.size) del activations perc = get_percentile(config, i) scale_facs[layer.name] = get_scale_fac(nonzero_activations, perc) print("Scale factor: {:.2f}.".format(scale_facs[layer.name])) # Since we have calculated output activations here, check at this # point if the output is mostly negative, in which case we should # stick to softmax. Otherwise ReLU is preferred. # Todo: Determine the input to the activation by replacing the # combined output layer by two distinct layers ``Dense`` and # ``Activation``! # if layer.activation == 'softmax' and settings['softmax_to_relu']: # softmax_inputs = ... # if np.median(softmax_inputs) < 0: # print("WARNING: You allowed the toolbox to replace " # "softmax by ReLU activations. However, more than " # "half of the activations are negative, which " # "could reduce accuracy. Consider setting " # "settings['softmax_to_relu'] = False.") # settings['softmax_to_relu'] = False i += 1 # Write scale factors to disk filepath = os.path.join( norm_dir, config.get('normalization', 'percentile') + '.json') from snntoolbox.utils.utils import confirm_overwrite if config.get('output', 'overwrite') or confirm_overwrite(filepath): with open(filepath, str('w')) as f: json.dump(scale_facs, f) np.savez_compressed(os.path.join(norm_dir, 'activations', 'sparsity'), sparsity=sparsity) # Apply scale factors to normalize the parameters. for layer in model.layers: # Skip if layer has no parameters if len(layer.weights) == 0: continue # Scale parameters parameters = layer.get_weights() if layer.activation.__name__ == 'softmax': # When using a certain percentile or even the max, the scaling # factor can be extremely low in case of many output classes # (e.g. 0.01 for ImageNet). This amplifies weights and biases # greatly. But large biases cause large offsets in the beginning # of the simulation (spike input absent). scale_fac = 1.0 print("Using scale factor {:.2f} for softmax layer.".format( scale_fac)) else: scale_fac = scale_facs[layer.name] inbound = get_inbound_layers_with_params(layer) if len(inbound) == 0: # Input layer parameters_norm = [ parameters[0] * scale_facs[model.layers[0].name] / scale_fac, parameters[1] / scale_fac ] elif len(inbound) == 1: parameters_norm = [ parameters[0] * scale_facs[inbound[0].name] / scale_fac, parameters[1] / scale_fac ] else: # In case of this layer receiving input from several layers, we can # apply scale factor to bias as usual, but need to rescale weights # according to their respective input. parameters_norm = [parameters[0], parameters[1] / scale_fac] if parameters[0].ndim == 4: # In conv layers, just need to split up along channel dim. offset = 0 # Index offset at input filter dimension for inb in inbound: f_out = inb.filters # Num output features of inbound layer f_in = range(offset, offset + f_out) parameters_norm[0][:, :, f_in, :] *= \ scale_facs[inb.name] / scale_fac offset += f_out else: # Fully-connected layers need more consideration, because they # could receive input from several conv layers that are # concatenated and then flattened. The neuron position in the # flattened layer depend on the image_data_format. raise NotImplementedError # Check if the layer happens to be Sparse # if the layer is sparse, add the mask to the list of parameters if len(parameters) == 3: parameters_norm.append(parameters[-1]) # Update model with modified parameters layer.set_weights(parameters_norm) # Plot distributions of weights and activations before and after norm. if 'normalization_activations' in eval(config.get('output', 'plot_vars')): from snntoolbox.simulation.plotting import plot_hist from snntoolbox.simulation.plotting import plot_max_activ_hist print("Plotting distributions of weights and activations before and " "after normalizing...") # Load original parsed model to get parameters before normalization weights = np.load(os.path.join(activ_dir, 'weights.npz')) for idx, layer in enumerate(model.layers): # Skip if layer has no parameters if len(layer.weights) == 0: continue label = str(idx) + layer.__class__.__name__ \ if config.getboolean('output', 'use_simple_labels') \ else layer.name parameters = weights[layer.name] parameters_norm = layer.get_weights()[0] weight_dict = { 'weights': parameters.flatten(), 'weights_norm': parameters_norm.flatten() } plot_hist(weight_dict, 'Weight', label, norm_dir) # Load activations of model before normalization activations = try_reload_activations(layer, model, x_norm, batch_size, activ_dir) if activations is None or x_norm is None: continue # Compute activations with modified parameters nonzero_activations = activations[np.nonzero(activations)] activations_norm = get_activations_layer(model.input, layer.output, x_norm, batch_size) activation_dict = { 'Activations': nonzero_activations, 'Activations_norm': activations_norm[np.nonzero(activations_norm)] } scale_fac = scale_facs[layer.name] plot_hist(activation_dict, 'Activation', label, norm_dir, scale_fac) ax = tuple(np.arange(len(layer.output_shape))[1:]) plot_max_activ_hist( {'Activations_max': np.max(activations, axis=ax)}, 'Maximum Activation', label, norm_dir, scale_fac) print('')