def bprop(self): grad = ca.reshape(self.grad_array, self.bcast_shape) ca.multiply(self.mu.array, grad, self.mu.grad_array) ca.exp(self.logvar.array, out=self.logvar.grad_array) self.logvar.grad_array -= 1 self.logvar.grad_array *= 0.5 self.logvar.grad_array *= grad
def bprop(self): # -(target/pred - (1 - target)/(1 - pred)) tmp1 = 1 - self.target.array tmp2 = 1 - self.pred.array tmp2 += self.eps ca.divide(tmp1, tmp2, tmp1) ca.add(self.pred.array, self.eps, tmp2) ca.divide(self.target.array, tmp2, out=tmp2) ca.subtract(tmp1, tmp2, self.pred.grad_array) self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None]*len(self.layers) x_grams = [None]*len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0: x_feats[l] = next_x if self.style_weights[l] > 0: x_feats[l] = next_x x_grams[l] = gram_matrix(next_x) # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5*weight*ca.sum(diff**2) if self.style_weights[l] > 0: diff = x_grams[l] - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25*weight*ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def grad(self, target, x1, x2): dists = self.fprop(x1, x2) target = ca.reshape(target, target.shape+(1,)) grad_dists1 = 2*(x1-x2) genuine = target*grad_dists1 imposter = (1-target)*(-grad_dists1) non_saturated_imposters = self.margin-dists > 0.0 imposter *= non_saturated_imposters grad_x1 = genuine + imposter return grad_x1, -grad_x1
def input_grad(self, y, dists): x1 = self.last_x1 x2 = self.last_x2 y = ca.reshape(y, y.shape+(1,)) grad_dists1 = 2*(x1-x2) genuine = y*grad_dists1 imposter = (1-y)*(-grad_dists1) non_saturated_imposters = self.margin-dists > 0.0 imposter *= non_saturated_imposters grad_x1 = genuine + imposter return grad_x1, -grad_x1
def input_grad(self, y, dists): x1 = self.last_x1 x2 = self.last_x2 y = ca.reshape(y, y.shape + (1, )) grad_dists1 = 2 * (x1 - x2) genuine = y * grad_dists1 imposter = (1 - y) * (-grad_dists1) non_saturated_imposters = self.margin - dists > 0.0 imposter *= non_saturated_imposters grad_x1 = genuine + imposter return grad_x1, -grad_x1
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None] * len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0 or self.style_weights[l] > 0: x_feats[l] = next_x # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5 * weight * ca.sum(diff**2) if self.style_weights[l] > 0: diff = gram_matrix(x_feats[l]) - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25 * weight * ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def fprop(self): self.out = ca.reshape(self.x.out, self.out_shape)
def bprop(self, y_grad, to_x=True): return ca.reshape(y_grad, self.x_shape)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) if self.sigma != 1.0: self.pred.grad_array *= 2 * self.multiplier self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def bprop(self, y_grad): return ca.reshape(y_grad, self.x_shape)
def fprop(self, x): self.x_shape = x.shape return ca.reshape(x, self.y_shape(x.shape))
def bprop(self): self.x.out_grad = ca.reshape(self.out_grad, self.x.out_shape)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def bprop(self, y_grad): return ca.reshape(y_grad, self.last_x_shape)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) if self.sigma != 1.0: self.pred.grad_array *= 2*self.multiplier self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def gram_matrix(img_bc01): n_channels = img_bc01.shape[1] feats = ca.reshape(img_bc01, (n_channels, -1)) gram = ca.dot(feats, feats.T) return gram
def fprop(self, x, phase): self.name = 'flatten' self.last_x_shape = x.shape return ca.reshape(x, self.output_shape(x.shape))
def fprop(self): self.tmp.fill(0.0) feats = ca.reshape(self.feats.out, self.feats.out.shape + (1, 1)) ca.add(feats, self.tmp, out=self.tmp) ca.extra.concatenate(self.imgs.out, self.tmp, axis=1, out=self.out)
def fprop(self, x, phase): self.last_x_shape = x.shape return ca.reshape(x, self.y_shape(x.shape))
def fprop(self): self.array = ca.reshape(self.x.array, self.shape)
def bprop(self): self.x.grad_array = ca.reshape(self.grad_array, self.x.shape)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) self.pred.grad_array *= 2 self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)