def overlap(u, v): # u, v: (1 * -) Variable -> (1 * 1) Variable denominator = F.sqrt( F.batch_l2_norm_squared(u) * F.batch_l2_norm_squared(v)) if (np.array_equal(cuda.to_cpu(denominator.data), np.array([0]))): return F.matmul(u, F.transpose(v)) return F.matmul(u, F.transpose(v)) / F.reshape(denominator, (1, 1))
def n_pair_mc_loss(f, f_p, l2_reg): """Multi-class N-pair loss (N-pair-mc loss) function. Args: f (~chainer.Variable): Feature vectors. All examples must be different classes each other. f_p (~chainer.Variable): Positive examples corresponding to f. Each example must be the same class for each example in f. l2_reg (~float): A weight of L2 regularization for feature vectors. Returns: ~chainer.Variable: Loss value. See: `Improved Deep Metric Learning with Multi-class N-pair Loss \ Objective <https://papers.nips.cc/paper/6200-improved-deep-metric-\ learning-with-multi-class-n-pair-loss-objective>`_ """ logit = matmul(f, transpose(f_p)) N = len(logit.data) xp = cuda.get_array_module(logit.data) loss_sce = softmax_cross_entropy(logit, xp.arange(N)) l2_loss = sum(batch_l2_norm_squared(f) + batch_l2_norm_squared(f_p)) / (2.0 * N) loss = loss_sce + l2_reg * l2_loss return loss
def euclidean_pairwise_distances(u, v): """ This is not needed when applying l2 normalization. Args: u (chainer.Variable or xp.ndarray:(b, emb)) v (chainer.Variable or xp.ndarray:(b, emb)) """ B = u.shape[0] u2 = F.batch_l2_norm_squared(u).reshape(-1, 1) # u2: (b, 1) u2 = F.broadcast_to(u2, (B, B)) # u2: (b, b) v2 = F.batch_l2_norm_squared(v).reshape(1, -1) # v2: (1, b) v2 = F.broadcast_to(v2, (B, B)) # v2: (b, b) uv = F.matmul(u, v, transb=True) # uv: (b, b) distance = u2 - 2.0 * uv + v2 # distance: (b, b) return distance
def n_pair_mc_loss(f, f_p, l2_reg): logit = matmul(f, transpose(f_p)) N = len(logit.data) xp = cuda.get_array_module(logit.data) loss_sce = softmax_cross_entropy(logit, xp.arange(N)) l2_loss = sum(batch_l2_norm_squared(f) + batch_l2_norm_squared(f_p)) loss = loss_sce + l2_reg * l2_loss return loss
def reconstruction_loss(dis, recon, gt): with chainer.using_config('train', False): v1 = dis.feature_vector(recon) v2 = dis.feature_vector(gt) denom = F.sqrt(F.batch_l2_norm_squared(v1) * F.batch_l2_norm_squared(v2)) return -F.sum( F.reshape(F.batch_matmul(v1, v2, transa=True), (v1.shape[0], )) / denom)
def squared_distance(self, anc, pos, neg): """ Compute anchor-positive distance and anchor-negative distance on batches of anchors, positive, and negative samples. """ dist_pos = F.expand_dims(F.batch_l2_norm_squared(anc - pos), 1) dist_neg = F.expand_dims(F.batch_l2_norm_squared(anc - neg), 1) return dist_pos, dist_neg
def loss_func_dsgan(x, z, theta, tau=10): if x.shape[1] == 4: x = x[:, :3] loss_ds_1 = F.batch_l2_norm_squared(x[::2] - x[1::2]) / (F.batch_l2_norm_squared(z[::2] - z[1::2]) + 1e-8) loss_ds_2 = F.batch_l2_norm_squared(x[::2] - x[1::2]) / (F.absolute(theta[::2] - theta[1::2]) + 1e-8) / 1000 xp = chainer.cuda.get_array_module(x.array) loss_ds_1 = F.minimum(F.sqrt(loss_ds_1), xp.full_like(loss_ds_1.array, tau)) loss_ds_2 = F.minimum(F.sqrt(loss_ds_2), xp.full_like(loss_ds_2.array, tau)) print(loss_ds_1.array.mean(), loss_ds_2.array.mean()) return -F.mean(loss_ds_1) - F.mean(loss_ds_2)
def train(self, positive, negative, links, relations, edges, xp, RC, RCT, EC, ECT, relationsT, relationsEdges): self.cleargrads() entities = set() Rs = set() for h, r, t in positive: entities.add(h) entities.add(t) Rs.add(r) for h, r, t in negative: entities.add(h) entities.add(t) Rs.add(r) entities = list(entities) Rs = list(Rs) x = self.get_context(entities, links, relations, edges, 0, xp, RC, EC) x = F.split_axis(x, len(entities), axis=0) edict = dict() for e, x in zip(entities, x): edict[e] = x RsE = self.get_context_relation(Rs, links, relationsT, edges, 0, xp, RC, EC, relationsEdges) RsE = F.split_axis(RsE, len(Rs), axis=0) rdict = dict() for r, x in zip(Rs, RsE): rdict[r] = x pos, rels = [], [] for h, r, t in positive: rels.append(rdict[r]) pos.append(edict[h] - edict[t]) pos = F.concat(pos, axis=0) xr = F.concat(rels, axis=0) #xr = self.embedR(xp.array(rels,'i')) if self.is_bound_wr: xr = F.tanh(xr) #pos = F.sum(F.absolute(pos+xr),axis=1) pos = F.batch_l2_norm_squared(pos + xr) neg, rels = [], [] for h, r, t in negative: rels.append(rdict[r]) neg.append(edict[h] - edict[t]) neg = F.concat(neg, axis=0) xr = F.concat(rels, axis=0) #xr = self.embedR(xp.array(rels,'i')) if self.is_bound_wr: xr = F.tanh(xr) #neg = F.sum(F.absolute(neg+xr),axis=1) neg = F.batch_l2_norm_squared(neg + xr) if self.objective_function == 'relative': return sum(F.relu(self.threshold + pos - neg)), pos if self.objective_function == 'absolute': return sum(pos + F.relu(self.threshold - neg)), pos
def reconstruction_loss(dis, recon, gt): with chainer.using_config('train', False): v1 = dis.feature_vector(recon) v2 = dis.feature_vector(gt) denom = F.sqrt(F.batch_l2_norm_squared(v1) * F.batch_l2_norm_squared(v2)) xp = dis.xp sum = Variable(xp.array(0.0, dtype=xp.float32)) for i in range(gt.shape[0]): sum += F.matmul(v1[i], v2[i], transb=True) / denom[i] cos_dist2 = -sum return cos_dist2
def constrain_kernel(network): n_kernels = 0 norm = None for node in network.updatable_node: for lname, layer in node.model.layers: if isinstance(layer, (chainer.links.ConvolutionND, chainer.links.DeconvolutionND)): n_kernels += 1 if norm is None: norm = F.batch_l2_norm_squared(layer.W) else: norm += F.batch_l2_norm_squared(layer.W) return norm / n_kernels
def get_scores3(self, candidates, links, relations, edges, xp, mode, RC, EC): h, r, t, l = candidates xe1 = self.embedE2(xp.array(range(self.e_size), 'i')) xe2 = self.embedE2(xp.array([t], 'i')) xe2 = F.broadcast_to(xe2, (self.e_size, 200)) xe3 = self.embedE2(xp.array([h], 'i')) xe3 = F.broadcast_to(xe3, (self.e_size, 200)) xr = self.embedR(xp.array([r], 'i')) xr = F.broadcast_to(xr, (self.e_size, 200)) if self.is_bound_wr: xr = F.tanh(xr) scores1 = F.batch_l2_norm_squared(xe1 - xe2 + xr) scores2 = F.batch_l2_norm_squared(xe3 - xe1 + xr) return scores1, scores2
def train(self, positive, negative, links, relations, edges, xp): # 每次调用之前,需要对梯度进行清零操作 self.cleargrads() entities = set() erels = set() for h, r, t in positive: entities.add(h) entities.add(t) erels.add(r) for h, r, t in negative: entities.add(h) entities.add(t) erels.add(r) # 获取正负三元组的所有实体 entities = list(entities) erels = list(erels) # x为entities经过传播模型所得的embedding x = self.get_context(entities, erels, links, relations, edges, 0, xp) x = F.split_axis(x, len(entities), axis=0) # x.shape(len(entities), 1, dim) edict = dict() for e, x in zip(entities, x): edict[e] = x pos, rels = [], [] for h, r, t in positive: rels.append(r) pos.append(edict[h] - edict[t]) pos = F.concat(pos, axis=0) xr = self.embedR(xp.array(rels, 'i')) # 返回r的embedding if self.is_bound_wr: xr = F.tanh(xr) # 对之前的embedR会有更新 pos = F.batch_l2_norm_squared(pos + xr) # (h+r-t)的L2正则 neg, rels = [], [] for h, r, t in negative: rels.append(r) neg.append(edict[h] - edict[t]) neg = F.concat(neg, axis=0) xr = self.embedR(xp.array(rels, 'i')) if self.is_bound_wr: xr = F.tanh(xr) neg = F.batch_l2_norm_squared(neg + xr) # 输出模型 if self.objective_function == 'relative': return sum(F.relu(self.threshold + pos - neg)) # 返回Loss if self.objective_function == 'absolute': return sum(pos + F.relu(self.threshold - neg))
def get_margins(self, candidates, links, relations, edges, xp, mode): if mode == 'dev': is_train = True if mode == 'test': is_train = False entities = set() for h, r, t, l in candidates: entities.add(h) entities.add(t) entities = list(entities) xe = self.get_context(entities, links, relations, edges, 0, xp, is_train) xe = F.split_axis(xe, len(entities), axis=0) edict = dict() for e, x in zip(entities, xe): edict[e] = x diffs, rels = [], [] for h, r, t, l in candidates: rels.append(r) diffs.append(edict[h] - edict[t]) diffs = F.concat(diffs, axis=0) xr = self.embedR(xp.array_int(rels, is_train)) if self.is_bound_wr: xr = F.tanh(xr) margins = F.batch_l2_norm_squared(diffs + xr) return margins
def update(gen, dis, optimizer_gen, optimizer_dis, x_batch, margin): xp = gen.xp batch_size = len(x_batch) # from generated image z = xp.random.normal(0, 1, (batch_size, latent_size)).astype(np.float32) z = z / (xp.linalg.norm(z, axis=1, keepdims=True) + 1e-12) x_gen = gen(z) total_size = np.prod(x_gen.shape) y_gen, h_gen = dis(x_gen) h_gen = F.normalize(F.reshape(h_gen, (batch_size, -1))) similarity = F.sum(F.matmul(h_gen, h_gen, transb=True)) / (batch_size * batch_size) loss_gen = F.mean_squared_error(x_gen, y_gen) + 0.1 * similarity loss_dis = F.sum( F.relu(margin * margin - F.batch_l2_norm_squared(x_gen - y_gen))) / total_size # from real image x = xp.asarray(x_batch) y, h = dis(x) loss_dis += F.mean_squared_error(x, y) gen.cleargrads() loss_gen.backward() optimizer_gen.update() dis.cleargrads() loss_dis.backward() optimizer_dis.update() return float(loss_gen.data), float(loss_dis.data)
def forward(self, x): x = x.reshape((x.shape[0], -1)) x /= F.sqrt(F.batch_l2_norm_squared(x)).reshape((-1, 1)) h = self.fc(x) h /= F.sqrt(F.sum(F.square(self.fc.W), axis=1)) return h
def loss_gen(self, gen, imgs_masked, imgs_completed, masks): loss = F.mean( F.batch_l2_norm_squared( F.tile(F.expand_dims(masks, axis=1), (1, 3, 1, 1)) * (imgs_masked - imgs_completed))) chainer.report({'loss': loss}, gen) return loss
def get_scores(self, candidates, links, relations, edges, xp, mode, RC, EC, relationsT, relationsEdges): entities = set() Rs = set() for h, r, t, l in candidates: entities.add(h) entities.add(t) Rs.add(r) entities = list(entities) xe = self.get_context(entities, links, relations, edges, 0, xp, RC, EC) xe = F.split_axis(xe, len(entities), axis=0) edict = dict() for e, x in zip(entities, xe): edict[e] = x Rs = list(Rs) xr = self.get_context_relation(Rs, links, relationsT, edges, 0, xp, RC, EC, relationsEdges) xr = F.split_axis(xr, len(Rs), axis=0) rdict = dict() for r, x in zip(Rs, xr): rdict[r] = x diffs, rels = [], [] for h, r, t, l in candidates: rels.append(rdict[r]) diffs.append(edict[h] - edict[t]) diffs = F.concat(diffs, axis=0) xr = F.concat(rels, axis=0) #xr = self.embedR(xp.array(rels,'i')) if self.is_bound_wr: xr = F.tanh(xr) scores = F.batch_l2_norm_squared(diffs + xr) return scores
def path_length(ws, x, mask): levels, batch, size = len(ws), *(ws[0].shape) gradients = grad([x * mask], ws, enable_double_backprop=True) gradient = stack(gradients).transpose(1, 0, 2).reshape(batch * levels, size) path_lengths = batch_l2_norm_squared(gradient).reshape(batch, levels) return sqrt(mean(path_lengths, axis=1))
def update_core(self): def _update(optimizer, loss): optimizer.target.cleargrads() loss.backward() optimizer.update() xp = self.generator.xp if self.iteration < 50: n_critic = 100 else: n_critic = 5 # update critic n_critic times for _ in range(n_critic): # real image x_real = self.next_batch(self.x) y_real = self.critic(x_real) loss1 = -F.sum(y_real) / self.batchsize # fake image z = self.next_batch(self.z) x_fake = self.generator(z) y_fake = self.critic(x_fake) loss2 = F.sum(y_fake) / self.batchsize x_fake.unchain_backward() # gp eps = xp.random.uniform(0, 1, size=self.batchsize).astype("f")[:, None, None, None] x_mid = eps * x_real + (1.0 - eps) * x_fake y_mid = self.critic(x_mid) grad, = chainer.grad([y_mid], [x_mid], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_gp = self.lam * F.mean_squared_error(grad, xp.ones_like(grad.data)) # compute loss critic_loss = loss1 + loss2 + loss_gp # update critic _update(self.optimizer_critic, critic_loss) chainer.reporter.report({ 'critic/loss/real': loss1, 'critic/loss/fake': loss2, 'critic/loss/gp': loss_gp, 'critic/loss': critic_loss, 'wasserstein': -loss1 - loss2, }) # update generator 1 time z = self.next_batch(self.z) x_fake = self.generator(z) y_fake = self.critic(x_fake) gen_loss = -F.sum(y_fake) / self.batchsize _update(self.optimizer_generator, gen_loss) chainer.report({'generator/loss': gen_loss})
def compute_loss(real_o, o_current): concat_image = F.concat((real_o, o_current), axis=1) classification_loss = classifier(concat_image) classification_loss = F.squeeze(classification_loss) l2_loss = F.batch_l2_norm_squared(real_o - o_current) assert classification_loss.shape == l2_loss.shape loss = l2_loss - classification_loss return loss
def train(self, positive, negative, train_link, relations, aux_link, xp): self.cleargrads() entities = set() for h, r, t in positive: entities.add(h) entities.add(t) for h, r, t in negative: entities.add(h) entities.add(t) entities = list(entities) # complete one propagation of information from neighbor entities to anchor entities x = self.get_context(entities, train_link, relations, aux_link, 0, xp) x = F.split_axis(x, len(entities), axis=0) # edict maps from an entity to its result edict = dict() for e, x in zip(entities, x): edict[e] = x # positives pos, rels = [], [] for h, r, t in positive: rels.append(r) pos.append(edict[h] - edict[t]) pos = F.concat(pos, axis=0) xr = F.tanh(self.embedR(xp.array(rels, 'i'))) # TransE score function # batch_l2_norm_squared aka Euclidean norm for batches # f_pos = ||h + r - t|| pos = F.batch_l2_norm_squared(pos + xr) # negatives neg, rels = [], [] for h, r, t in negative: rels.append(r) neg.append(edict[h] - edict[t]) neg = F.concat(neg, axis=0) xr = F.tanh(self.embedR(xp.array(rels, 'i'))) # TransE score function # f_neg = ||h + r -t|| neg = F.batch_l2_norm_squared(neg + xr) # Absolute Margin Objective Function return sum(pos + F.relu(self.threshold - neg))
def norms(self): """ Returns: norm_u (xp.array (1, )) norm_v (xp.array (1, )) """ u = self.u v = self.v # x_: (b, emb) with using_config("train", False), using_config( "enable_backprop", False ): norm_u = F.mean(F.sqrt(F.batch_l2_norm_squared(u))).data norm_v = F.mean(F.sqrt(F.batch_l2_norm_squared(v))).data return dict(zip(self.metric_names_norms, [norm_u, norm_v]))
def transloss(self, x): hdx = x[:, 0] rdx = x[:, 1] tdx = x[:, 2] hvec = self.propagation(hdx, "1") tvec = self.propagation(tdx, "2") rvec = self.rvec(rdx) return F.batch_l2_norm_squared(hvec + rvec - tvec)
def zero_centered_gradient_penalty_fake(fake, y): grad, = chainer.grad([fake], [y], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) zeros = call_zeros(grad) loss = 10 * F.mean_squared_error(grad, zeros) return loss
def __call__(self, positive, negative, links, relations, edges, xp, is_train): if is_train: self.cleargrads() entities = set() for h, r, t in positive: entities.add(h) entities.add(t) for h, r, t in negative: entities.add(h) entities.add(t) entities = list(entities) x = self.get_context(entities, links, relations, edges, 0, xp, is_train) x = F.split_axis(x, len(entities), axis=0) edict = dict() for e, x in zip(entities, x): edict[e] = x pos, rels = [], [] for h, r, t in positive: rels.append(r) pos.append(edict[h] - edict[t]) pos = F.concat(pos, axis=0) xr = self.embedR(xp.array_int(rels, is_train)) if self.is_bound_wr: xr = F.tanh(xr) pos = F.batch_l2_norm_squared(pos + xr) neg, rels = [], [] for h, r, t in negative: rels.append(r) neg.append(edict[h] - edict[t]) neg = F.concat(neg, axis=0) xr = self.embedR(xp.array_int(rels, is_train)) if self.is_bound_wr: xr = F.tanh(xr) neg = F.batch_l2_norm_squared(neg + xr) if self.object_kind == 1: return sum(F.relu(self.threshold + pos - neg)) if self.object_kind == 2: return sum(pos + F.relu(self.threshold - neg)) if self.object_kind == 3: return sum(self.threshold * pos - neg)
def update_core(self): gen_opt = self.get_optimizer("gen") cri_opt = self.get_optimizer("cri") generator = gen_opt.target critic = cri_opt.target batch_size = self.get_iterator("main").batch_size # バッチ(本物)を取得 x_real = self.get_iterator("main").next() x_real = Variable(np.stack(x_real)) if chainer.config.user_gpu >= 0: x_real.to_gpu() xp = x_real.xp # update critic upd_num = self.n_cri[ 1] if self.iteration <= 25 or self.iteration % 500 == 0 else self.n_cri[ 0] for i in range(upd_num): z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32) x_fake = generator(Variable(z)) cri_loss = F.average(critic(x_fake) - critic(x_real)) # Wasserstein距離の逆符号 # gradient penalty eps = xp.random.uniform(size=(batch_size, 1, 1, 1)).astype(np.float32) x_fusion = eps * x_real + (1 - eps) * x_fake # (N,1,H,W) g_critic = chainer.grad( [critic(x_fusion)], [x_fusion], enable_double_backprop=True)[0] # (N,1,H,W) gp = F.batch_l2_norm_squared(g_critic) gp = F.average((F.sqrt(gp) - 1)**2) total_loss = cri_loss + self.gp_lam * gp critic.cleargrads() total_loss.backward() cri_opt.update() # update generator z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32) x_fake = generator(Variable(z)) gen_loss = -F.average(critic(x_fake)) generator.cleargrads() critic.cleargrads() gen_loss.backward() gen_opt.update() chainer.report({ "generator/loss": gen_loss, "critic/loss": cri_loss, "main/wdist": -cri_loss })
def update_core(self): gen_optimizer = self.get_optimizer('gen') dis_optimizer = self.get_optimizer('dis') xp = self.gen.xp for i in range(self.n_dis): batch = self.get_iterator('main').next() batchsize = len(batch) x_real = Variable(self.converter(batch, self.device)) h_real = self.dis(x_real) z = self.gen.make_hidden(batchsize) x_fake = self.gen(z) h_fake = self.dis(x_fake) z2 = self.gen.make_hidden(batchsize) x_fake2 = self.gen(z2) h_fake2 = self.dis(x_fake2) if i == 0: loss_gen = self.energy_distance(h_real, h_fake, h_fake2) self.gen.cleargrads() loss_gen.backward() gen_optimizer.update() chainer.reporter.report({'gen/loss': loss_gen}) x_fake.unchain_backward() x_fake2.unchain_backward() critic_real = self.critic(h_real, h_fake2) critic_fake = self.critic(h_fake, h_fake2) loss_surrogate = F.mean(critic_real - critic_fake) eps = self.xp.random.uniform(0, 1, size=batchsize).astype("f")[ :, None, None, None] x_mid = eps * x_real + (1.0 - eps) * x_fake h_mid = chainer.Variable(self.dis(x_mid).data) base_grad, = chainer.grad([self.critic(h_mid, h_fake.data)], [ h_mid], enable_double_backprop=True) grad, = chainer.grad([self.dis(x_mid)], [x_mid], grad_outputs=[ base_grad], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_gp = self.lam * \ F.mean_squared_error(grad, xp.ones_like(grad.data)) self.dis.cleargrads() (-loss_surrogate).backward() loss_gp.backward() dis_optimizer.update() chainer.reporter.report({'critic/loss': -loss_surrogate + loss_gp}) chainer.reporter.report({"cramer distance": loss_surrogate}) chainer.reporter.report({'critic/loss_grad': loss_gp}) chainer.reporter.report({'g': F.mean(grad)})
def loss_joint(self, gen, alpha, imgs_masked, imgs_completed, masks, d_fake, d_real): loss1 = F.mean( F.batch_l2_norm_squared( F.tile(F.expand_dims(masks, axis=1), (1, 3, 1, 1)) * (imgs_masked - imgs_completed))) loss2 = alpha * F.mean((F.log(d_real) + F.log(1 - d_fake))) loss = loss1 + loss2 chainer.report({'loss': loss}, gen) return loss
def angular_loss(anchor, positive, negative, alpha=45, in_degree=True, reduce='mean'): ''' Features, y = dnn(x), must be l2 normalized. ''' if in_degree: alpha = np.deg2rad(alpha) # tan(x)^2: [0, ..., pi/4, ..., pi/3] -> [0, ..., 1, ..., 3] # strictly increaseing convex function sq_tan_alpha = np.tan(alpha)**2 c = (a + p) / 2 loss = F.relu( F.batch_l2_norm_squared(a - p) - 4 * sq_tan_alpha * F.batch_l2_norm_squared(n - c)) return loss
def update_core(self): # train critic for t in range(self.n_c): # read data batch = self._iterators['main'].next() x = self.converter(batch, self.device) m = x.shape[0] H, W = x.shape[2], x.shape[3] xp = chainer.cuda.get_array_module(x) # generate z = self.generator.make_z(m) x_tilde = self.generator(z) # sampling along straight lines e = xp.random.uniform(0., 1., (m, 1, 1, 1)) x_hat = e * x + (1 - e) * x_tilde # compute loss loss_gan = F.average(self.critic(x_tilde) - self.critic(x)) grad, = chainer.grad([self.critic(x_hat)], [x_hat], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_grad = self.l * F.mean_squared_error(grad, xp.ones_like(grad.data)) loss_critic = loss_gan + loss_grad # update critic self.critic.cleargrads() loss_critic.backward() self._optimizers['critic'].update() # report chainer.reporter.report({ 'wasserstein distance': -loss_gan, 'loss/grad': loss_grad }) # train generator # read data batch = self._iterators['main'].next() x = self.converter(batch, self.device) # generate and compute loss z = self.generator.make_z(m) loss_generator = F.average(-self.critic(self.generator(z))) # update generator self.generator.cleargrads() loss_generator.backward() self._optimizers['generator'].update() # report chainer.reporter.report({'loss/generator': loss_generator})
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ norm = F.batch_l2_norm_squared(self.W) ** 0.5 norm_broadcasted = F.broadcast_to( F.expand_dims(norm, 1), self.W.data.shape) g_broadcasted = F.broadcast_to( F.expand_dims(self.g, 1), self.W.data.shape) return F.linear(x, g_broadcasted * self.W / norm_broadcasted, self.b)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.batch_l2_norm_squared(x) self.assertEqual(y.data.dtype, np.float32) y_data = cuda.to_cpu(y.data) x_two_dim = _as_two_dim(self.x) y_expect = np.empty(len(self.x)) for n in six.moves.range(len(self.x)): y_expect[n] = sum(map(lambda x: x * x, x_two_dim[n])) testing.assert_allclose(y_expect, y_data)
def batch_rodrigues(theta): """ Theta is N x 3 """ batch_size = theta.shape[0] xp = theta.xp angle = F.expand_dims(F.sqrt(F.batch_l2_norm_squared(theta + 1e-8)), -1) r = F.expand_dims(theta / F.tile(angle, 3), -1) angle = F.expand_dims(angle, -1) cos = F.cos(angle) sin = F.sin(angle) cos = F.tile(cos, (3, 3)) sin = F.tile(sin, (3, 3)) outer = F.matmul(r, r, transb=True) eyes = F.tile(F.expand_dims( Variable(xp.array(xp.eye(3), 'f')), 0), (batch_size, 1, 1)) R = cos * eyes + (1 - cos) * outer + sin * batch_skew(r, batch_size) return R
def forward(self, inputs, device): x, = inputs return functions.batch_l2_norm_squared(x),
def test_invalid_shape(self): x = chainer.Variable(np.zeros((4,), dtype=np.float32)) with self.assertRaises(type_check.InvalidType): functions.batch_l2_norm_squared(x)