def __backward(self, dy): dy = cp.array(dy) n_batches, _, _, _ = self.fire.shape n_channels, n_rows, n_cols = self.input_shape n_filters, n_rows_filter, n_cols_filter = self.filter_shape dy = dy.transpose(0, 2, 3, 1).reshape(-1, n_filters) input_shape = (n_batches, n_channels, n_rows, n_cols) backfire = np.dot(dy, cp.array(self.w[1:, :]).T) backfire = col2im(backfire, input_shape, self.output_shape, self.filter_shape, self.pad, self.strides, aggregate=True) if self.pad[0] > 0: backfire = backfire[:, :, self.pad[0]:-self.pad[0], :] if self.pad[1] > 0: backfire = backfire[:, :, :, self.pad[1]:-self.pad[1]] self.backfire = asnumpy(backfire) self.dw = asnumpy( self.dtype(1.) / n_batches * cp.dot(cp.array(self.x).T, dy))
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) w_ -= self.learning_rate * dw_ w[::] = asnumpy(w_)
def __predict(self, x): x = x if self.force_cpu else cp.array(x) gamma = self.gamma if self.force_cpu else cp.array(self.gamma) beta = self.beta if self.force_cpu else cp.array(self.beta) miu = self.miu if self.force_cpu else cp.array(self.miu) var = self.var if self.force_cpu else cp.array(self.var) fire = gamma * (x - miu) / ncp.sqrt(var + self.ep) + beta self.fire = asnumpy(fire)
def __forward(self, x): mask = self.mask if self.force_cpu else cp.array(self.mask) x = x if self.force_cpu else cp.array(x) ncp.random_shuffle(mask.reshape(mask.size)) mask_dropped = mask >= self.thresh fire = mask_dropped * x self.fire = asnumpy(fire) self.mask = asnumpy(mask) self.mask_dropped = asnumpy(mask_dropped)
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) h = cp.zeros_like(w_) if self.h is None else cp.array(self.h) h = self.gamma * h + (1. - self.gamma) * cp.power(dw_, 2) w_ -= self.learning_rate * dw_ / (cp.sqrt(h) + self.ep) w[::] = asnumpy(w_) self.h = asnumpy(h)
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) h = cp.zeros_like(w_) if self.h is None else cp.array(self.h) h += cp.power(dw_, 2) w_ -= self.learning_rate * (self.dtype(1.) / cp.sqrt(h + self.ep)) * dw_ w[::] = asnumpy(w_) self.h = h
def __backward(self, dy): dy = cp.array(dy) if is_multi_channels_image(self.output_shape): dy = flatten(dy, self.output_shape) batch_size = self.x.shape[0] self.dw = asnumpy( self.dtype(1.) / batch_size * cp.dot(cp.array(self.x).T, dy)) backfire = cp.dot(dy, cp.array(self.w[1:, :]).T) if is_multi_channels_image(self.input_shape): backfire = unflatten(backfire, self.input_shape) self.backfire = asnumpy(backfire)
def __forward(self, x): x = cp.array(x) if is_multi_channels_image(self.input_shape): x = flatten(x, self.input_shape) # Add bias terms. x = cp.c_[cp.ones((x.shape[0], 1), dtype=self.dtype), x] fire = cp.dot(x, cp.array(self.w)) if is_multi_channels_image(self.output_shape): fire = unflatten(fire, self.output_shape) self.x = asnumpy(x) self.fire = asnumpy(fire)
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) pre_dw = cp.zeros_like(dw_) if self.pre_dw is None else cp.array( self.pre_dw) pre_dw = self.learning_rate * dw_ + self.momentum_rate * pre_dw w_ -= pre_dw w[::] = asnumpy(w_) self.pre_dw = asnumpy(pre_dw)
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) v = cp.zeros_like(w_) if self.v is None else cp.array(self.v) r = cp.zeros_like(w_) if self.r is None else cp.array(self.r) dw_square = cp.power(dw_, 2) v = self.beta * (v - dw_) + dw_ r = self.gamma * (r - dw_square) + dw_square w_ -= self.learning_rate / cp.sqrt(r + self.ep) * v w[::] = asnumpy(w_) self.v = asnumpy(v) self.r = asnumpy(r)
def __forward(self, x): x = x if self.force_cpu else cp.array(x) miu = ncp.mean(x, axis=0) xmiu = x - miu var = ncp.mean(xmiu**2, axis=0) std_inv = 1. / (ncp.sqrt(var + self.ep)) gamma, beta = None, None shape = self.input_shape if self.gamma is None: gamma = ncp.ones(shape, dtype=self.dtype, arr_type=type(x)) else: gamma = self.gamma if self.force_cpu else cp.array(self.gamma) if self.beta is None: beta = ncp.zeros(shape, dtype=self.dtype, arr_type=type(x)) else: beta = self.beta if self.force_cpu else cp.array(self.beta) xhat = xmiu * std_inv fire = gamma * xhat + beta pre_miu, pre_var = None, None if self.miu is None: pre_miu = miu else: pre_miu = self.miu if self.force_cpu else cp.array(self.miu) if self.var is None: pre_var = var else: pre_var = self.var if self.force_cpu else cp.array(self.var) miu = pre_miu * self.momentum + (1. - self.momentum) * miu var = pre_var * self.momentum + (1. - self.momentum) * var self.xmiu = asnumpy(xmiu) self.var = asnumpy(var) self.std_inv = asnumpy(std_inv) self.gamma = asnumpy(gamma) self.beta = asnumpy(beta) self.xhat = asnumpy(xhat) self.fire = asnumpy(fire) self.miu = asnumpy(miu) self.var = asnumpy(var)
def optimize(self, w, dw): w_ = cp.array(w) dw_ = cp.array(dw) w_[1:, :] *= self.regularization(self.learning_rate, self.weight_decay) r = cp.zeros_like(w_) if self.r is None else cp.array(self.r) s = cp.zeros_like(w_) if self.s is None else cp.array(self.s) v = cp.zeros_like(w_) if self.v is None else cp.array(self.v) r = self.gamma * r + (1. - self.gamma) * cp.power(dw_, 2) v = cp.sqrt(s + self.ep) / (cp.sqrt(r + self.ep)) * dw_ w_ -= self.learning_rate * v s = self.gamma + (1. - self.gamma) * cp.power(v, 2) w[::] = asnumpy(w_) self.r = asnumpy(r) self.s = asnumpy(s) self.v = asnumpy(v)
def __forward(self, x): x = cp.array(x) if len(x.shape) != 4: msg = 'Convolution layer assumes that input is 4-d array.\n'\ + ' shape : %s' % str(x.shape) raise DNNetRuntimeError(msg) n_batches, _, _, _ = x.shape n_channels, n_rows, n_cols = self.output_shape x_pad = pad_img(x, self.pad[0], self.pad[1]) x = im2col(x_pad, self.filter_shape, self.strides) x = cp.c_[cp.ones((x.shape[0], 1), dtype=self.dtype), x] fire = cp.dot(x, cp.array(self.w)) fire = fire.reshape(n_batches, n_rows, n_cols, n_channels) fire = fire.transpose(0, 3, 1, 2) self.x = asnumpy(x) self.fire = asnumpy(fire)
def __backward(self, dy): dy = dy if self.force_cpu else cp.array(dy) xhat = self.xhat if self.force_cpu else cp.array(self.xhat) xmiu = self.xmiu if self.force_cpu else cp.array(self.xmiu) std_inv = self.std_inv if self.force_cpu else cp.array(self.std_inv) beta = self.beta if self.force_cpu else cp.array(self.beta) gamma = self.gamma if self.force_cpu else cp.array(self.gamma) batch_size = dy.shape[0] dbeta = dy.sum(axis=0) dgamma = (xhat * dy).sum(axis=0) tmp1 = (gamma * xmiu * dy).sum(axis=0) tmp2 = -ncp.power(std_inv, 3) * tmp1 / batch_size tmp3 = xmiu * tmp2 + gamma * std_inv * dy tmp4 = tmp3.sum(axis=0) backfire = tmp3 - tmp4 / batch_size beta = beta - dbeta / batch_size gamma = gamma - dgamma / batch_size self.backfire = asnumpy(backfire) self.beta = asnumpy(beta) self.gamma = asnumpy(gamma)
def __predict(self, x): x = x if self.force_cpu else cp.array(x) self.fire = asnumpy((1. - self.drop_ratio) * x)
def __backward(self, dy): mask_dropped = self.mask_dropped if self.force_cpu else cp.array( self.mask_dropped) dy = dy if self.force_cpu else cp.array(dy) self.backfire = asnumpy(mask_dropped * dy)