def add_layer(self, layer):
        from snntoolbox.parsing.utils import get_type
        spike_layer_name = getattr(self.sim, 'Spike' + get_type(layer))
        # noinspection PyProtectedMember
        inbound = layer._inbound_nodes[0].inbound_layers
        if not isinstance(inbound, (list, tuple)):
            inbound = [inbound]
        inbound = [self._spiking_layers[inb.name] for inb in inbound]
        if len(inbound) == 1:
            inbound = inbound[0]
        layer_kwargs = layer.get_config()
        layer_kwargs['config'] = self.config

        # Check if layer uses binary activations. In that case, we will want to
        # tell the following to MaxPool layer because then we can use a
        # cheaper operation.
        if 'Conv' in layer.name and 'binary' in layer.activation.__name__:
            self._binary_activation = layer.activation.__name__

        if 'MaxPool' in layer.name and self._binary_activation is not None:
            layer_kwargs['activation'] = self._binary_activation
            self._binary_activation = None

        # Replace activation from kwargs by 'linear' before initializing
        # superclass, because the relu activation is applied by the spike-
        # generation mechanism automatically. In some cases (quantized
        # activation), we need to apply the activation manually. This
        # information is taken from the 'activation' key during conversion.
        activation_str = str(layer_kwargs.pop(str('activation'), None))

        spike_layer = spike_layer_name(**layer_kwargs)
        spike_layer.activation_str = activation_str
        spike_layer.is_first_spiking = \
            len(get_inbound_layers_with_params(layer)) == 0
        self._spiking_layers[layer.name] = spike_layer(inbound)
Ejemplo n.º 2
0
def normalize_parameters(model, config, **kwargs):
    """Normalize the parameters of a network.

    The parameters of each layer are normalized with respect to the maximum
    activation, or the ``n``-th percentile of activations.

    Generates plots of the activity- and weight-distribution before and after
    normalization. Note that plotting the activity-distribution can be very
    time- and memory-consuming for larger networks.
    """

    import json
    from collections import OrderedDict
    from snntoolbox.parsing.utils import get_inbound_layers_with_params

    print("Normalizing parameters...")

    norm_dir = kwargs[str('path')] if 'path' in kwargs else \
        os.path.join(config.get('paths', 'log_dir_of_current_run'),
                     'normalization')

    activ_dir = os.path.join(norm_dir, 'activations')
    if not os.path.exists(activ_dir):
        os.makedirs(activ_dir)
    # Store original weights for later plotting
    if not os.path.isfile(os.path.join(activ_dir, 'weights.npz')):
        weights = {}
        for layer in model.layers:
            w = layer.get_weights()
            if len(w) > 0:
                weights[layer.name] = w[0]
        np.savez_compressed(os.path.join(activ_dir, 'weights.npz'), **weights)

    # Either load scale factors from disk, or get normalization data set to
    # calculate them.
    x_norm = None
    if 'scale_facs' in kwargs:
        scale_facs = kwargs[str('scale_facs')]
    elif 'x_norm' in kwargs or 'dataflow' in kwargs:
        if 'x_norm' in kwargs:
            x_norm = kwargs[str('x_norm')]
        elif 'dataflow' in kwargs:
            x_norm, y = kwargs[str('dataflow')].next()
        print("Using {} samples for normalization.".format(len(x_norm)))
        sizes = [
            len(x_norm) * np.array(layer.output_shape[1:]).prod() * 32 /
            (8 * 1e9) for layer in model.layers if len(layer.weights) > 0
        ]
        size_str = ['{:.2f}'.format(s) for s in sizes]
        print("INFO: Need {} GB for layer activations.\n".format(size_str) +
              "May have to reduce size of data set used for normalization.")
        scale_facs = OrderedDict({model.layers[0].name: 1})
    else:
        import warnings
        warnings.warn(
            "Scale factors or normalization data set could not be "
            "loaded. Proceeding without normalization.", RuntimeWarning)
        return

    batch_size = config.getint('simulation', 'batch_size')

    # If scale factors have not been computed in a previous run, do so now.
    if len(scale_facs) == 1:
        i = 0
        sparsity = []
        for layer in model.layers:
            # Skip if layer has no parameters
            if len(layer.weights) == 0:
                continue

            activations = try_reload_activations(layer, model, x_norm,
                                                 batch_size, activ_dir)
            nonzero_activations = activations[np.nonzero(activations)]
            sparsity.append(1 - nonzero_activations.size / activations.size)
            del activations
            perc = get_percentile(config, i)
            scale_facs[layer.name] = get_scale_fac(nonzero_activations, perc)
            print("Scale factor: {:.2f}.".format(scale_facs[layer.name]))
            # Since we have calculated output activations here, check at this
            # point if the output is mostly negative, in which case we should
            # stick to softmax. Otherwise ReLU is preferred.
            # Todo: Determine the input to the activation by replacing the
            # combined output layer by two distinct layers ``Dense`` and
            # ``Activation``!
            # if layer.activation == 'softmax' and settings['softmax_to_relu']:
            #     softmax_inputs = ...
            #     if np.median(softmax_inputs) < 0:
            #         print("WARNING: You allowed the toolbox to replace "
            #               "softmax by ReLU activations. However, more than "
            #               "half of the activations are negative, which "
            #               "could reduce accuracy. Consider setting "
            #               "settings['softmax_to_relu'] = False.")
            #         settings['softmax_to_relu'] = False
            i += 1
        # Write scale factors to disk
        filepath = os.path.join(
            norm_dir,
            config.get('normalization', 'percentile') + '.json')
        from snntoolbox.utils.utils import confirm_overwrite
        if config.get('output', 'overwrite') or confirm_overwrite(filepath):
            with open(filepath, str('w')) as f:
                json.dump(scale_facs, f)
        np.savez_compressed(os.path.join(norm_dir, 'activations', 'sparsity'),
                            sparsity=sparsity)

    # Apply scale factors to normalize the parameters.
    for layer in model.layers:
        # Skip if layer has no parameters
        if len(layer.weights) == 0:
            continue

        # Scale parameters
        parameters = layer.get_weights()
        if layer.activation.__name__ == 'softmax':
            # When using a certain percentile or even the max, the scaling
            # factor can be extremely low in case of many output classes
            # (e.g. 0.01 for ImageNet). This amplifies weights and biases
            # greatly. But large biases cause large offsets in the beginning
            # of the simulation (spike input absent).
            scale_fac = 1.0
            print("Using scale factor {:.2f} for softmax layer.".format(
                scale_fac))
        else:
            scale_fac = scale_facs[layer.name]
        inbound = get_inbound_layers_with_params(layer)
        if len(inbound) == 0:  # Input layer
            parameters_norm = [
                parameters[0] * scale_facs[model.layers[0].name] / scale_fac,
                parameters[1] / scale_fac
            ]
        elif len(inbound) == 1:
            parameters_norm = [
                parameters[0] * scale_facs[inbound[0].name] / scale_fac,
                parameters[1] / scale_fac
            ]
        else:
            # In case of this layer receiving input from several layers, we can
            # apply scale factor to bias as usual, but need to rescale weights
            # according to their respective input.
            parameters_norm = [parameters[0], parameters[1] / scale_fac]
            if parameters[0].ndim == 4:
                # In conv layers, just need to split up along channel dim.
                offset = 0  # Index offset at input filter dimension
                for inb in inbound:
                    f_out = inb.filters  # Num output features of inbound layer
                    f_in = range(offset, offset + f_out)
                    parameters_norm[0][:, :, f_in, :] *= \
                        scale_facs[inb.name] / scale_fac
                    offset += f_out
            else:
                # Fully-connected layers need more consideration, because they
                # could receive input from several conv layers that are
                # concatenated and then flattened. The neuron position in the
                # flattened layer depend on the image_data_format.
                raise NotImplementedError

        # Check if the layer happens to be Sparse
        # if the layer is sparse, add the mask to the list of parameters
        if len(parameters) == 3:
            parameters_norm.append(parameters[-1])
        # Update model with modified parameters
        layer.set_weights(parameters_norm)

    # Plot distributions of weights and activations before and after norm.
    if 'normalization_activations' in eval(config.get('output', 'plot_vars')):
        from snntoolbox.simulation.plotting import plot_hist
        from snntoolbox.simulation.plotting import plot_max_activ_hist

        print("Plotting distributions of weights and activations before and "
              "after normalizing...")

        # Load original parsed model to get parameters before normalization
        weights = np.load(os.path.join(activ_dir, 'weights.npz'))
        for idx, layer in enumerate(model.layers):
            # Skip if layer has no parameters
            if len(layer.weights) == 0:
                continue

            label = str(idx) + layer.__class__.__name__ \
                if config.getboolean('output', 'use_simple_labels') \
                else layer.name
            parameters = weights[layer.name]
            parameters_norm = layer.get_weights()[0]
            weight_dict = {
                'weights': parameters.flatten(),
                'weights_norm': parameters_norm.flatten()
            }
            plot_hist(weight_dict, 'Weight', label, norm_dir)

            # Load activations of model before normalization
            activations = try_reload_activations(layer, model, x_norm,
                                                 batch_size, activ_dir)

            if activations is None or x_norm is None:
                continue

            # Compute activations with modified parameters
            nonzero_activations = activations[np.nonzero(activations)]
            activations_norm = get_activations_layer(model.input, layer.output,
                                                     x_norm, batch_size)
            activation_dict = {
                'Activations': nonzero_activations,
                'Activations_norm':
                activations_norm[np.nonzero(activations_norm)]
            }
            scale_fac = scale_facs[layer.name]
            plot_hist(activation_dict, 'Activation', label, norm_dir,
                      scale_fac)
            ax = tuple(np.arange(len(layer.output_shape))[1:])
            plot_max_activ_hist(
                {'Activations_max': np.max(activations, axis=ax)},
                'Maximum Activation', label, norm_dir, scale_fac)
    print('')