def backward(self, delta, prev_delta, copy=False): ''' Backward function of the Shortcut layer Parameters ---------- delta : array of shape (batch, w, h, c), first delta to be backpropagated. delta_prev : array of shape (batch, w, h, c), second delta to be backporpagated. Returns ------- Shortcut layer object ''' check_is_fitted(self, 'delta') # derivatives of the activation funtion w.r.t. to input self.delta *= self.gradient(self.output, copy=copy) delta[:] += self.delta * self.alpha if (self.ix, self.iy, self.kx) == (None, None, None): # same shapes prev_delta[:] += self.delta[:] * self.beta else: # different shapes prev_delta[:, self.ix, self.jx, self.kx] += self.beta * self.delta[:, self.iy, self.jy, self.ky] return self
def backward(self, delta=None): ''' Backward function of the Softmax Layer. Parameters ---------- delta : array of shape (batch, w, h, c), default is None. If an array is passed, it's the global delta to be backpropagated Returns ------- Softmax layer object ''' check_is_fitted(self, 'output') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') # This is an approximation if delta is not None: # print('In BACKWARD','\n', self.delta[0,0,0,:], '\n') delta[:] += self.delta # print('\nDELTA is', delta[0,0,0,:],'\n') ## darknet issue version # dot = (self.output * self.delta).sum(axis=(1, 2, 3), keepdims=True) # delta[:] += self.temperature * self.output * (self.delta - dot) # maybe output normalized ## softmax gradient formula # s = self.output.reshape(-1, 1) # delta[:] += np.diagflat(s) - np.dot(s, s.T) return self
def backward(self, delta): ''' Backward function of the shuffler layer: it reorganize the delta to match the input shape, the operation is the exact inverse of the forward pass. Parameters: delta : global delta to be backpropagated with shape (batch, out_w, out_h, out_c) Returns ------- Shuffler_layer object ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') c = self.input_shape[-1] channel_out = c // self.scale_step # out_c # I apply the reverse function only for a single channel X = np.concatenate([self._reverse(self.delta[..., i], self.scale) for i in range(channel_out)], axis=3) # The 'reverse' concatenate actually put the correct channels together but in a # weird order, so this part sorts the 'layers' correctly idx = sum([list(range(i, c, channel_out)) for i in range(channel_out)], []) idx = np.argsort(idx) delta[:] = X[..., idx] return self
def backward(self, delta, network): ''' Sum self.delta to the correct layer delta on the network Parameters ---------- delta : 4-d numpy array, network delta to be backpropagated network: Network object type. Returns ------- Route layer object ''' check_is_fitted(self, 'delta') # NumPyNet implementation if self.axis == 3: # this works for concatenation by channels axis channels_sum = 0 for idx in self.input_layers: channels = network[idx].out_shape[3] network[idx].delta += self.delta[..., channels_sum:channels_sum + channels] channels_sum += channels elif self.axis == 0: # this works for concatenation by batch axis batch_sum = 0 for idx in self.self.input_layers: batches = network[idx].out_shape[0] network[idx].delta += self.delta[batch_sum:batch_sum + batches, ...] batch_sum += batches return self
def backward(self, delta): ''' ''' check_is_fitted(self, 'delta') delta[:] += self.delta return self
def backward(self, delta): ''' backward function of the average_pool layer: the function modifies the net delta to be backpropagated. Parameters ---------- delta : global delta to be backpropagated with shape (batch, out_w, out_h, out_c). Returns ---------- A Avgpool_layer object. ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') # kx, ky = self.size # Padding delta for a coherent _asStrided dimension if self.pad: mat_pad = self._pad(delta) else: mat_pad = delta # _asStrid of padded delta let me access every pixel of the memory in the order I want. # This is used to create a 1-1 correspondence between output and input pixels. net_delta_view = self._asStride(mat_pad) # norm = 1./(kx*ky) # needs to count only no nan values for keras _, w, h, c = self.output.shape # The indexes are necessary to access every pixel value one at a time, since # modifing the same memory address more times at once doesn't produce the correct result # norm = 1. / (kx*ky) norm = self.delta * ( 1. / np.count_nonzero(~np.isnan(net_delta_view), axis=(4, 5))) net_delta_review = np.moveaxis(net_delta_view, source=[1, 2, 3], destination=[0, 1, 2]) for (i, j, k), n in zip(np.ndindex(w, h, c), np.nditer(norm)): net_delta_review[i, j, k, ...] += n # net_delta_view *= norm # Here delta is updated correctly if self.pad: _, w_pad, h_pad, _ = mat_pad.shape # Excluding the padded part of the image delta[:] = mat_pad[:, self.pad_top:w_pad - self.pad_bottom, self.pad_left:h_pad - self.pad_right, :] else: delta[:] = mat_pad return self
def backward(self, inpt, delta=None, copy=False): ''' Backward function of the connected layer, updates the global delta of the network to be Backpropagated, he weights upadtes and the biases updates Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. copy : bool (default=False) States if the activation function have to return a copy of the input or not. Returns ------- self ''' check_is_fitted(self, 'delta') # reshape to (batch , w * h * c) inpt = inpt.reshape(inpt.shape[0], -1) self._check_dims(shape=(self.input_shape[0], self.inputs), arr=inpt, func='Backward') # out = self.output.reshape(-1, self.outputs) self.delta *= self.gradient(self.output, copy=copy) self.delta = self.delta.reshape(-1, self.outputs) self.bias_update = self.delta.sum(axis=0) # shape : (outputs,) # self.weights_update += inpt.transpose() @ self.delta') # shape : (w * h * c, outputs) self.weights_update = np.einsum('ji, jk -> ik', inpt, self.delta, optimize=True) if delta is not None: delta_shaped = delta.reshape(inpt.shape[0], -1) # it's a reshaped VIEW self._check_dims(shape=(self.input_shape[0], self.inputs), arr=delta_shaped, func='Backward') # shapes : (batch , w * h * c) = (batch , w * h * c) + (batch, outputs) @ (outputs, w * h * c) # delta_shaped[:] += self.delta @ self.weights.transpose()') # I can modify delta using its view delta_shaped[:] += np.einsum('ij, kj -> ik', self.delta, self.weights, optimize=True) return self
def update(self): ''' update function for the rnn layer. optimizer must be assigned externally as an optimizer object. ''' check_is_fitted(self, 'delta') self.bias, self.weights, self.recurrent_weights = \ self.optimizer.update(params = [self.bias, self.weights, self.recurrent_weights], gradients = [self.bias_update, self.weights_update, self.recurrent_weights_update] ) return self
def update(self): ''' Update function for the RNN_layer object. optimizer must be assigned externally as an optimizer object. ''' check_is_fitted(self, 'delta') self.input_layer.update() self.self_layer.update() self.output_layer.update() return self
def backward(self, delta): ''' Backward function of the cost_layer, it updates the delta variable to be backpropagated. self.delta is updated inside the cost function. Parameters: delta: array, error of the network, to be backpropagated ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] += self.scale * self.delta return self
def backward(self, delta): ''' Backward function of maxpool layer: it access avery position where in the input image there's a chosen maximum and add the correspondent self.delta value. Since we work with a 'view' of delta, the same pixel may appear more than one time, and an atomic acces to it's value is needed to correctly modifiy it. Parameters ---------- delta : array-like Global delta to be backpropagated with shape (batch, out_w, out_h, out_c). Returns ---------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') # Padding delta in order to create another view if self.pad: mat_pad = self._pad(delta) else: mat_pad = delta # Create a view of mat_pad, following the padding true or false net_delta_view = self._asStride(mat_pad) b, w, h, c = self.output.shape # those indexes are usefull to access 'Atomically'(one at a time) every element in net_delta_view for (i, j, k, l), m, o, D in zip(np.ndindex(b, w, h, c), self.indexes[0], self.indexes[1], np.nditer(self.delta)): net_delta_view[i, j, k, l, m, o] += D # Here delta is correctly modified if self.pad: _, w_pad, h_pad, _ = mat_pad.shape delta[:] = mat_pad[:, self.pad_top:w_pad - self.pad_bottom, self.pad_left:h_pad - self.pad_right, :] else: delta[:] = mat_pad return self
def update(self): ''' Update function for the convolution layer. Optimizer must be assigned externally as an optimizer object. Returns ------- self ''' check_is_fitted(self, 'delta') self.bias, self.weights = self.optimizer.update(params=[self.bias, self.weights], gradients=[self.bias_update, self.weights_update] ) return self
def update(self): ''' Update function for the LSTM_layer object. optimizer must be assigned externally as an optimizer object. ''' check_is_fitted(self, 'delta') self.uf.update() self.ui.update() self.ug.update() self.uo.update() self.wf.update() self.wi.update() self.wg.update() self.wo.update() return self
def backward(self, delta): ''' Simply pass the gradient. Parameter ---------- delta : numpy array, global error to be backpropagated. Returns ------- Input layer object. ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') delta[:] = self.delta return self
def backward(self, delta=None): ''' BackPropagation function of the BatchNormalization layer. Every formula is a derivative computed by chain rules: dbeta = derivative of output w.r.t. bias, dgamma = derivative of output w.r.t. scales etc... Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Forward') invN = 1. / np.prod(self.mean.shape) # Those are the explicit computation of every derivative involved in BackPropagation # of the batchNorm layer, where dbeta = dout / dbeta, dgamma = dout / dgamma etc... self.bias_update = self.delta.sum(axis=0) # dbeta self.scales_update = (self.delta * self.x_norm).sum(axis=0) # dgamma self.delta *= self.scales # self.delta = dx_norm from now on self.mean_delta = (self.delta * (-self.var)).mean(axis=0) # dmu self.var_delta = ((self.delta * (self.x - self.mean)).sum(axis=0) * (-.5 * self.var * self.var * self.var)) # dvar # Here, delta is the derivative of the output w.r.t. input self.delta = (self.delta * self.var + self.var_delta * 2 * (self.x - self.mean) * invN + self.mean_delta * invN) if delta is not None: delta[:] += self.delta return self
def backward(self, delta): ''' Simply pass the gradient. Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') delta[:] = self.delta return self
def backward(self, delta): ''' Compute the backward of the l2norm layer Parameters ---------- delta : numpy array, global error to be backpropagated. Returns ------- L2norm_layer object ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') self.delta += self.scales delta[:] += self.delta return self
def backward(self, delta, copy=False): ''' Compute the backward of the activation layer Parameter ---------- delta : global error to be backpropagated Returns ---------- activation layer object. ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') self.delta *= self.gradient(self.output, copy=copy) delta[:] = self.delta return self
def backward(self, delta): ''' Backward function of the cost_layer, it updates the delta variable to be backpropagated. `self.delta` is updated inside the cost function. Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] += self.scale * self.delta return self
def backward(self, delta): ''' Backward function of the l2norm layer Parameters --------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') self.delta += self.scales delta[:] += self.delta return self
def backward(self, delta=None): ''' Backward function of the Logistic Layer Parameters --------- delta : array-like (default = None) delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') if delta is not None: delta[:] += self.delta # as for darknet, probably an approx return self
def backward(self, delta, network): ''' Sum self.delta to the correct layer delta on the network Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. network: Network object type. The network model to which this layer belongs to. Returns ------- self ''' check_is_fitted(self, 'delta') # NumPyNet implementation if self.axis == 3: # this works for concatenation by channels axis channels_sum = 0 for idx in self.input_layers: channels = network[idx].out_shape[3] network[idx].delta += self.delta[..., channels_sum:channels_sum + channels] channels_sum += channels elif self.axis == 0: # this works for concatenation by batch axis batch_sum = 0 for idx in self.self.input_layers: batches = network[idx].out_shape[0] network[idx].delta += self.delta[batch_sum:batch_sum + batches, ...] batch_sum += batches return self
def backward(self, delta, copy=False): ''' Backward function of the Convolutional layer. Source: https://arxiv.org/abs/1603.07285 Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. copy : bool (default=False) States if the activation function have to return a copy of the input or not. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') self.delta *= self.gradient(self.output, copy=copy) self.weights_update = np.einsum('ijklmn, ijko -> lmno', self.view, self.delta, optimize=True) self.bias_update = self.delta.sum(axis=(0, 1, 2)) # shape = (channels_out) # Rotated weights, as theory suggest w_rot = np.rot90(self.weights, 2, axes=(0, 1)) # Pad and dilate the delta array, then stride it and convolve self.delta = self._dilate_pad(self.delta) delta_view = self._asStride(self.delta, back=True) delta[:] = np.einsum('ijklmn, lmon -> ijko', delta_view, w_rot, optimize=True) return self
def backward(self, delta): ''' Compute the inverse transformation of the forward function on the gradient. Parameters ---------- delta : numpy array. Global error to be backpropagated Returns ------- UpSample_layer object ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') if self.reverse: # Upsample delta[:] = self._upsample(self.delta) * (1. / self.scale) else: # Downsample delta[:] = self._downsample(self.delta) * (1. / self.scale) return self
def backward(self, delta=None): ''' Backward function of the Dropout layer: given the same mask as the layer it backprogates delta only to those pixel which values has not been set to zero in the forward. Parameters ---------- delta : numpy array of shape (batch, w, h, c), default value is None. If given, is the global delta to be backpropagated Returns ---------- Dropout layer object ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.out_shape, arr=delta, func='Backward') if delta is not None: self.delta = self.rnd * delta[:] * self.scale delta[:] = self.delta.copy() return self
def backward(self, delta, prev_delta, copy=False): ''' Backward function of the Shortcut layer Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. delta_prev : array-like second delta to be backporpagated. copy : bool (default=False) States if the activation function have to return a copy of the input or not. Returns ------- self ''' check_is_fitted(self, 'delta') # derivatives of the activation funtion w.r.t. to input self.delta *= self.gradient(self.output, copy=copy) delta[:] += self.delta * self.alpha if (self.ix, self.iy, self.kx) == (None, None, None): # same shapes prev_delta[:] += self.delta[:] * self.beta else: # different shapes prev_delta[:, self.ix, self.jx, self.kx] += self.beta * self.delta[:, self.iy, self.jy, self.ky] return self
def backward(self, inpt, delta=None, copy=True): ''' Backward function of the RNN layer, updates the global delta of the network to be Backpropagated, he weights upadtes and the biases updates Parameters ---------- inpt : original input of the layer delta : global delta, to be backpropagated. Returns ---------- RNN_layer object ''' check_is_fitted(self, 'delta') last_input = self.input_layer.output[self.batches[-1]] last_self = self.self_layer.output[self.batches[-1]] for i, idx in reversed(list(enumerate(self.batches))): self.state = self.input_layer.output[idx, ...] + self.self_layer.output[idx, ...] _state = self.state.reshape(self.state.shape[0], -1) _input = inpt[idx, ...].reshape(len(idx), -1) # output_layer backward _delta = self.output_layer.delta[idx, ...] _delta[:] *= self.output_layer.gradient(self.output_layer.output[idx, ...], copy=copy) _delta_r = _delta.reshape(-1, self.output_layer.outputs) self.output_layer.bias_update = _delta_r.sum(axis=0) self.output_layer.weights_update = np.einsum('ji, jk -> ik', _state, _delta_r, optimize=True) delta_view = self.self_layer.delta[idx, ...] delta_shaped = delta_view.reshape(len(idx), -1) delta_shaped[:] += np.einsum('ij, kj -> ik', _delta_r, self.output_layer.weights, optimize=True) self.self_layer.delta[idx, ...] = delta_shaped.reshape(delta_view.shape) # end 1st backward if i != 0: idx2 = self.batches[i - 1] self.state = self.input_layer.output[idx2, ...] + self.self_layer.output[idx2, ...] else: self.state = self.prev_state.copy() self.input_layer.delta[idx, ...] = self.self_layer.delta[idx, ...] _state = self.state.reshape(self.state.shape[0], -1) # self_layer backward _delta = self.self_layer.delta[idx, ...] _delta[:] *= self.self_layer.gradient(self.self_layer.output[idx, ...], copy=copy) _delta_r = _delta.reshape(-1, self.self_layer.outputs) self.self_layer.bias_update = _delta_r.sum(axis=0) self.self_layer.weights_update = np.einsum('ji, jk -> ik', _state, _delta_r, optimize=True) if i > 0: idx2 = self.batches[i - 1] delta_view = self.self_layer.delta[idx2, ...] delta_shaped = delta_view.reshape(len(idx2), -1) delta_shaped[:] += np.einsum('ij, kj -> ik', _delta_r, self.self_layer.weights, optimize=True) self.self_layer.delta[idx2, ...] = delta_shaped.reshape(delta_view.shape) # end 2nd backward # input_layer backward _delta = self.input_layer.delta[idx, ...] _delta[:] *= self.input_layer.gradient(self.input_layer.output[idx, ...], copy=copy) _delta_r = _delta.reshape(-1, self.input_layer.outputs) self.input_layer.bias_update = _delta_r.sum(axis=0) self.input_layer.weights_update = np.einsum('ji, jk -> ik', _input, _delta_r, optimize=True) if delta is not None: delta_view = delta[idx, ...] delta_shaped = delta_view.reshape(len(idx), -1) delta_shaped[:] += np.einsum('ij, kj -> ik', _delta_r, self.input_layer.weights, optimize=True) delta[idx, ...] = delta_shaped.reshape(delta_view.shape) # end 3rd backward self.state = last_input[idx, ...] + last_self[idx, ...] return self
def backward(self, delta, copy=False): ''' Backward function of the Convolutional layer. Parameters ---------- delta : array of shape (batch, w, h, c). Global delta to be backpropagated. copy : bool, default False. States if the activation function have to return a copy of the input or not. Returns ---------- Convolutional_layer object. ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') # delta padding to match dimension with padded input when computing the view if self.pad: mat_pad = self._pad(delta) # padded with same values as input else: mat_pad = delta # View on delta, I can use this to modify it delta_view = self._asStride(mat_pad) self.delta *= self.gradient(self.output, copy=copy) # this operation should be +=, as darknet suggest (?) self.weights_update = np.einsum('ijklmn, ijko -> lmno', self.view, self.delta) # out_c number of bias_updates. self.bias_update = self.delta.sum(axis=(0, 1, 2)) # shape = (channels_out,) # Actual operation to be performed, it's basically the convolution of self.delta with weights.transpose operator = np.einsum('ijkl, mnol -> ijkmno', self.delta, self.weights) delta_review = np.moveaxis(delta_view, source=[1, 2], destination=[0, 1]) operator = np.moveaxis(operator, source=[1, 2], destination=[0, 1]) # Atomically modify, really slow as for maxpool and avgpool # The best results can be obtained reshaping the delta_review tensor # but we cannot reach them without losing the the view for d, o in zip(delta_review, operator): for di, oi in zip(d, o): di += oi # Here delta is updated correctly if self.pad: _, w_pad, h_pad, _ = mat_pad.shape delta[:] = mat_pad[:, self.pad_top:w_pad - self.pad_bottom, self.pad_left:h_pad - self.pad_right, :] else: delta[:] = mat_pad return self
def backward(self, inpt, delta=None, copy=False): ''' Backward function of the LSTM layer, updates the global delta of the network to be Backpropagated, he weights upadtes and the biases updates Parameters ---------- inpt : original input of the layer delta : global delta, to be backpropagated. Returns ---------- LSTM_layer object ''' check_is_fitted(self, 'delta') dh = np.zeros(shape=self.out_shape, dtype=float) prev_cell = None prev_state = None for _i, idx in reversed(list(enumerate(self.batches))): prev_cell = self.cell[idx, ...] if _i != 0 else prev_cell c = self.cell[idx, ...] prev_state = self.output[idx, ...] if _i != 0 else prev_state h = self.output[idx, ...] f = Logistic.activate(self.wf.output[idx, ...] + self.uf.output[idx, ...]) i = Logistic.activate(self.wi.output[idx, ...] + self.ui.output[idx, ...]) g = Tanh.activate(self.wg.output[idx, ...] + self.ug.output[idx, ...]) o = Logistic.activate(self.wo.output[idx, ...] + self.uo.output[idx, ...]) temp1 = Tanh.activate(c) temp2 = self.delta[idx, ...] * o * Tanh.gradient(temp1) temp1 *= self.delta[idx, ...] * Logistic.gradient(o) self.wo.delta[idx, ...] = temp1 self.uo.delta[idx, ...] = temp1 temp1 = temp2 * i * Tanh.gradient(g) self.wg.delta[idx, ...] = temp1 self.ug.delta[idx, ...] = temp1 temp1 = temp2 * g * Logistic.gradient(i) self.wi.delta[idx, ...] = temp1 self.ui.delta[idx, ...] = temp1 temp1 = temp2 * prev_cell * Logistic.gradient(f) self.wf.delta[idx, ...] = temp1 self.uf.delta[idx, ...] = temp1 dc = temp2 * f dh[idx, ...] = self._internal_backward(self.wo, inpt=prev_state, indices=range(prev_state.shape[0]), delta=dh[idx, ...], copy=copy) delta[idx, ...] = self._internal_backward(self.uo, inpt=inpt, indices=idx, delta=delta, copy=copy) dh[idx, ...] = self._internal_backward(self.wg, inpt=prev_state, indices=range(prev_state.shape[0]), delta=dh[idx, ...], copy=copy) delta[idx, ...] = self._internal_backward(self.ug, inpt=inpt, indices=idx, delta=delta, copy=copy) dh[idx, ...] = self._internal_backward(self.wi, inpt=prev_state, indices=range(prev_state.shape[0]), delta=dh[idx, ...], copy=copy) delta[idx, ...] = self._internal_backward(self.ui, inpt=inpt, indices=idx, delta=delta, copy=copy) dh[idx, ...] = self._internal_backward(self.wf, inpt=prev_state, indices=range(prev_state.shape[0]), delta=dh[idx, ...], copy=copy) delta[idx, ...] = self._internal_backward(self.uf, inpt=inpt, indices=idx, delta=delta, copy=copy) return self