def count_derivatives(self, layer_output, batches): """ Returns estimated impact of input of layer on output of network :param Numlike layer_output: :param tuple input_shape: shape of input :param int batches: number of batches :return Numlike: derivatives """ layer_output = self.normalize_derivatives(layer_output) input_shape = add_tuples(batches, change_order(self.layer.input_shape)) output_shape = add_tuples(batches, change_order(self.layer.output_shape)) layer_output = layer_output.reshape(output_shape) if self.need_derivatives: derivatives = self.load_derivatives() new_derivatives = layer_output.sum((0, )) if derivatives is not None: derivatives = derivatives + new_derivatives else: derivatives = new_derivatives self.save_derivatives(derivatives) return self._count_derivatives(layer_output, input_shape)
def _count_activation(self, layer_input): """ Return estimated activations :param Numlike layer_input: input for layer :return Numlike: """ return a_conv(layer_input, change_order(self.layer.input_shape), self.layer.W, change_order(self.layer.filter_shape), theano.shared(self.layer.b), self.layer.stride, self.layer.padding, self.layer.n_groups)
def _count_activation(self, layer_input): """ Returns estimated activations :param Numlike layer_input: :return Numlike: """ return a_pool(layer_input, change_order(self.layer.input_shape), self.layer.poolsize, self.layer.stride, self.layer.padding, self.layer.mode)
def _count_activation(self, layer_input): """ Returns estimated activations :param Numlike layer_input: :return Numlike: """ return a_norm(layer_input, change_order(self.layer.input_shape), self.layer.local_range, self.layer.k, self.layer.alpha, self.layer.beta)
def count_activation(self, layer_input): """ Returns estimated activations :param Numlike layer_input: :return Numlike: activations """ layer_input = self.normalize_activation(layer_input) input_shape = change_order(make_iterable(self.layer.input_shape)) layer_input = layer_input.reshape(input_shape) if self.need_activation: self.save_activations(layer_input) return self._count_activation(layer_input)
def _count_derivatives(self, layer_output, input_shape): """ Returns estimated impact of input of layer on output of network. :param Numlike layer_output: impact of input of next layer on output of network :param tuple input_shape: :return Numlike: """ return d_conv(layer_output, input_shape, change_order(self.layer.filter_shape), self.layer.W, self.layer.stride, self.layer.padding, self.layer.n_groups, self.theano_ops)
def get_derest_indicators(network, input_=None, count_function=length, max_batch_size=None, normalize_activations=lambda x: x, normalize_derivatives=divide_by_max): """ Returns indicators of importance using derest algorithm :param Network network: network to work with :param input_: possible input for network :type input_: Numlike or None :param function count_function: function to use :param batch_size: size of batch in computing derivatives :type batch_size: int or None :param function normalize_activations: function to normalize activations between layers :param function normalize_derivatives: function to normalize derivatives between layers :return array of numpy.ndarrays: """ if input_ is None: input_ = NpInterval.from_shape( change_order(network.layers[0].input_shape), neutral=False ) random_id = randint(0, 10**6) network_folder = TMP_DIR + str(random_id) derest_network = DerestNetwork( network, network_folder, normalize_activations, normalize_derivatives) derest_network.count_activations(input_) output_nr = network.layers[-1].output_shape if max_batch_size is None: max_batch_size = output_nr output = input_.derest_output(output_nr) for i in xrange(0, output_nr, max_batch_size): print "BATCH:", i derest_network.count_derivatives(output[i:(i+max_batch_size)]) results = derest_network.count_derest(count_function) derest_network.delete_folder() return to_indicators(results)
def count_derest(self, count_function): """ Returns indicators of each weight importance :param function count_function: function to count indicators, takes Numlike and returns float :return list of numpy arrays: """ indicators = numpy.zeros_like(self.layer.W) W = self.layer.W derivatives = self.load_derivatives() input_shape = (1, ) + change_order(self.layer.input_shape) activation = self.load_activations().reshape(input_shape) activation = activation.\ reshape_for_padding(input_shape, self.layer.padding) activation = activation.reshape(activation.shape[1:]) act_group_size = activation.shape[0] / self.layer.n_groups der_group_size = derivatives.shape[0] / self.layer.n_groups w_group_size = W.shape[0] / self.layer.n_groups for n_group in xrange(self.layer.n_groups): act_first = n_group * act_group_size act = \ activation[act_first:(act_first + act_group_size), :, :] der_first = n_group * der_group_size der = \ derivatives[der_first:(der_first + der_group_size), :, :] w_first = n_group * w_group_size weights = W[w_first:(w_first + w_group_size), :, :, :] for j2, j3 in product(xrange(W.shape[2]), xrange(W.shape[3])): ind = count_function( self._count_derest_for_weight(act, der, weights, j2, j3)) indicators[w_first:(w_first + w_group_size), :, j2, j3] = ind return [indicators]