def forward(self, x): x = x.reshape((x.shape[0], -1)) x /= F.sqrt(F.batch_l2_norm_squared(x)).reshape((-1, 1)) h = self.fc(x) h /= F.sqrt(F.sum(F.square(self.fc.W), axis=1)) return h
def get_bbox_side_lengths(self, grids): x0, x1, x2, y0, y1, y2 = self.get_corners(grids) width = F.sqrt(F.square(x1 - x0) + F.square(y1 - y0)) height = F.sqrt(F.square(x2 - x0) + F.square(y2 - y0)) return width, height
def distance_angle(cell: Variable, positions: Variable, i1: np.ndarray, i2: np.ndarray, j2: np.ndarray, s2: np.ndarray, c3: np.ndarray, a3: np.ndarray, b3: np.ndarray): """Distance and angles. Not in use yet.""" n_pairs = len(i2) n_trios = len(c3) xp = positions.xp n = i1[i2] assert isinstance(cell, Variable) assert isinstance(positions, Variable) assert n.shape == (n_pairs, ) assert i2.shape == (n_pairs, ) assert j2.shape == (n_pairs, ) assert s2.shape == (n_pairs, 3) assert c3.shape == (n_trios, ) assert a3.shape == (n_trios, ) assert b3.shape == (n_trios, ) assert cell.xp == xp assert isinstance(i2, xp.ndarray), (xp, type(i2)) assert isinstance(j2, xp.ndarray), (xp, type(j2)) assert isinstance(s2, xp.ndarray), (xp, type(s2)) assert isinstance(c3, xp.ndarray), (xp, type(c3)) assert isinstance(a3, xp.ndarray), (xp, type(a3)) assert isinstance(b3, xp.ndarray), (xp, type(b3)) real_shifts = F.sum(cell[n, :, :] * s2[:, :, xp.newaxis], axis=1) r = positions rrij = (r[j2][a3] + real_shifts[a3] - r[i2][a3]) rrik = (r[j2][b3] + real_shifts[b3] - r[i2][b3]) rij = F.sqrt(F.sum(rrij**2, axis=1)) rik = F.sqrt(F.sum(rrik**2, axis=1)) cos = F.sum(rrij * rrik, axis=1) / (rij * rik) return rij, rik, cos
def update_core(self): gen_ab_optimizer = self.get_optimizer("gen_ab") gen_ba_optimizer = self.get_optimizer("gen_ba") disa_optimizer = self.get_optimizer("disa") batch_a = chainer.Variable(self.converter(self.get_iterator("main").next())) batch_b = chainer.Variable(self.converter(self.get_iterator("data_b").next())) _xp = chainer.backend.get_array_module(batch_a.data) # D update self.disa.cleargrads() rate = 1.0 - (self.iteration / self.max_iteration) batch_an = batch_a * (_xp.random.randn(batch_a.shape[0], 1, batch_a.shape[2], 1).astype(_xp.float32) * 0.002 * rate + _xp.ones([batch_a.shape[0], 1, 1, 1])) batch_bn = batch_b * (_xp.random.randn(batch_b.shape[0], 1, batch_b.shape[2], 1).astype(_xp.float32) * 0.002 * rate + _xp.ones([batch_b.shape[0], 1, 1, 1])) fake_ab = self.gen_ab(batch_an) fake_ba = self.gen_ba(batch_bn) y_af = self.disa(fake_ba) y_bf = self.disa(fake_ab) y_at = self.disa(batch_an) y_bt = self.disa(batch_bn) y_label_TA = _xp.zeros(y_af.shape, dtype="float32") y_label_TA[:, 0] = 1.0 y_label_TB = _xp.zeros(y_af.shape, dtype="float32") y_label_TB[:, 1] = 1.0 y_label_FA = _xp.zeros(y_bf.shape, dtype="float32") y_label_FA[:, 2] = 1.0 y_label_FB = _xp.zeros(y_bf.shape, dtype="float32") y_label_FB[:, 3] = 1.0 loss_d_af = F.mean_squared_error(y_af, y_label_FA) loss_d_bf = F.mean_squared_error(y_bf, y_label_FB) loss_d_ar = F.mean_squared_error(y_at, y_label_TA) loss_d_br = F.mean_squared_error(y_bt, y_label_TB) chainer.report({"D_A_REAL": loss_d_ar, "D_A_FAKE": loss_d_af, "D_B_REAL": loss_d_br, "D_B_FAKE": loss_d_bf}) (loss_d_af + loss_d_ar).backward() (loss_d_bf + loss_d_br).backward() disa_optimizer.update() # G update self.gen_ab.cleargrads() self.gen_ba.cleargrads() fake_ba = self.gen_ba(batch_bn) fake_ab = self.gen_ab(batch_an) y_fake_ba = self.disa(fake_ba) y_fake_ab = self.disa(fake_ab) fake_aba = self.gen_ba(fake_ab) fake_bab = self.gen_ab(fake_ba) loss_ganab = F.mean_squared_error(y_fake_ab, y_label_TB) loss_ganba = F.mean_squared_error(y_fake_ba, y_label_TA) loss_cycb = F.sqrt(F.mean_squared_error(fake_bab, batch_bn)) loss_cyca = F.sqrt(F.mean_squared_error(fake_aba, batch_an)) gloss = loss_ganba + loss_ganab + (loss_cyca + loss_cycb) * self.cyc_lambda gloss.backward() chainer.report({"G_AB__GAN": loss_ganab, "G_BA__GAN": loss_ganba, "G_ABA_CYC": loss_cyca, "G_BAB_CYC": loss_cycb}) gen_ba_optimizer.update() gen_ab_optimizer.update()
def loss_func_dsgan(x, z, theta, tau=10): if x.shape[1] == 4: x = x[:, :3] loss_ds_1 = F.batch_l2_norm_squared(x[::2] - x[1::2]) / (F.batch_l2_norm_squared(z[::2] - z[1::2]) + 1e-8) loss_ds_2 = F.batch_l2_norm_squared(x[::2] - x[1::2]) / (F.absolute(theta[::2] - theta[1::2]) + 1e-8) / 1000 xp = chainer.cuda.get_array_module(x.array) loss_ds_1 = F.minimum(F.sqrt(loss_ds_1), xp.full_like(loss_ds_1.array, tau)) loss_ds_2 = F.minimum(F.sqrt(loss_ds_2), xp.full_like(loss_ds_2.array, tau)) print(loss_ds_1.array.mean(), loss_ds_2.array.mean()) return -F.mean(loss_ds_1) - F.mean(loss_ds_2)
def calculate_rotation(xy_real, z_pred): xy_split = F.split_axis(xy_real, xy_real.data.shape[1], axis=1) z_split = F.split_axis(z_pred, z_pred.data.shape[1], axis=1) # Vector v0 (neck -> nose) on zx-plain. v0=(a0, b0). a0 = z_split[9] - z_split[8] b0 = xy_split[9 * 2] - xy_split[8 * 2] n0 = F.sqrt(a0 * a0 + b0 * b0) # Vector v1 (right shoulder -> left shoulder) on zx-plain. v1=(a1, b1). a1 = z_split[14] - z_split[11] b1 = xy_split[14 * 2] - xy_split[11 * 2] n1 = F.sqrt(a1 * a1 + b1 * b1) # Return sine value of the angle between v0 and v1. return (a0 * b1 - a1 * b0) / (n0 * n1)
def compute_cos_angle_sub(vert,N,theta,idx,xp): cos_angle = [] for i in idx: L0 = F.sum((vert[N[i][:,0]] - vert[i])**2, axis=1) L1 = F.sum((vert[N[i][:,1]] - vert[i])**2, axis=1) D = F.sum((vert[N[i][:,1]] - vert[N[i][:,0]])**2, axis=1) c1 = (L0+L1-D)/(2*F.sqrt(L0*L1)) # law of cosines s1 = F.sqrt(1-c1**2) # print(xp.arccos(c1.array),xp.arcsin(s1.array)) c0,s0 = xp.cos(theta[i]),xp.sin(theta[i]) for j in range(len(c1)): # addition law c0,s0 = c0*c1[j]-s0*s1[j], c0*s1[j]+s0*c1[j] # don't split (or you need a temporary variable) cos_angle.append(c0) return(cos_angle)
def eval(self, **dataset): """Calculate loss function from given datasets and model. Args: **dataset (~numpy.ndarray): Datasets passed as kwargs. Name of each key is in the format 'inputs/N' or 'labels/N'. 'N' is the order of the dataset. Returns: ~chainer.Variable: A scalar value calculated with loss function. """ inputs = [dataset[f'inputs/{i}'] for i in range(self.order['descriptor'] + 1)] labels = [dataset[f'labels/{i}'] for i in range(self.order['property'] + 1)] predictions = self._model.predict(inputs, self.order['descriptor']) loss0 = F.mean_squared_error(predictions[0], labels[0]) loss1 = F.mean_squared_error(predictions[1], labels[1]) loss_sum1 = F.mean(predictions[1]) transverse = F.swapaxes(predictions[2], 2, 3) loss_rot = F.mean(F.square((predictions[2] - transverse) / (predictions[2] + transverse))) total_loss = ((1.0 - self._mixing_beta) * loss0 + self._mixing_beta * loss1 + self._summation * loss_sum1 + self._rotation * loss_rot) RMSE0 = F.sqrt(loss0) RMSE1 = F.sqrt(loss1) AbsMean1 = F.absolute(loss_sum1) RMS_rot = F.sqrt(loss_rot) total = ((1.0 - self._mixing_beta) * RMSE0 + self._mixing_beta * RMSE1 + self._summation * AbsMean1 + self._rotation * RMS_rot) observation = { self._observation_keys[0]: RMSE0, self._observation_keys[1]: RMSE1, self._observation_keys[2]: AbsMean1, self._observation_keys[3]: RMS_rot, self._observation_keys[4]: total, } chainer.report(observation, observer=self._model) return total_loss
def return_injected(self, h, z, n_layer, mult_until_exec=None): """ Performs the Hadamard products with z. """ # # check whether to skip the hadamard. skip_injection = False if self.thresh_skip is not None and self.thresh_skip[n_layer - 1] > 0: # # skip the hadamard, iff the random number is smaller than the threshold. skip_injection = np.random.uniform() < self.thresh_skip[n_layer - 1] if not skip_injection and mult_until_exec is not None: skip_injection = mult_until_exec <= n_layer if self.mult_lat and not skip_injection: if self.use_localz: # # apply local transformation. z1 = getattr(self, 'locz{}'.format(n_layer))(z) else: z1 = z # # appropriately reshape z for the elementwise multiplication. sh = h.shape z1 = F.reshape(z1, (sh[0], sh[1], 1)) if self.normalize_preinject: z1 /= F.sqrt(F.mean(z1 * z1, axis=1, keepdims=True) + 1e-8) z2 = F.repeat(z1, sh[3] * sh[2], axis=2) z2 = F.reshape(z2, sh) ret = h * z2 + h if self.add_h_injection else h * z2 return ret return h
def __call__(self, x, t): # xを入力した際のネットワーク出力と、回答t との差分を返します。 x = F.transpose_sequence(x) self.eh.reset_state() # model----> #return self.predict(h) for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) cel = h # <----model for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh(ee) cel_back = hh blstm = F.concat((cel, cel_back)) predict = self.hy(blstm) #print(y) label = xp.reshape(t, (len(t), 1)) #print(t) mse = F.mean_squared_error(predict, label) rmse = F.sqrt(mse) chainer.reporter.report({'loss': rmse}, self) return rmse
def eval(self, **dataset): """Calculate loss function from given datasets and model. Args: **dataset (~numpy.ndarray): Datasets passed as kwargs. Name of each key is in the format 'inputs/N' or 'labels/N'. 'N' is the order of the dataset. Returns: ~chainer.Variable: A scalar value calculated with loss function. """ inputs = [ dataset[f'inputs/{i}'] for i in range(self.order['descriptor'] + 1) ] labels = [ dataset[f'labels/{i}'] for i in range(self.order['property'] + 1) ] predictions = self._model.predict(inputs, self.order['descriptor']) loss0 = F.mean_squared_error(predictions[0], labels[0]) RMSE0 = F.sqrt(loss0) observation = { self._observation_keys[0]: RMSE0, self._observation_keys[1]: RMSE0, } chainer.report(observation, observer=self._model) return loss0
def gradient_penalty(self, y: chainer.Variable, x: chainer.Variable): """Compute gradient penalty: (L2_norm(dy/dx) - 1)**2.""" xp = self.xp weight = [Variable(xp.ones(y.shape, dtype='f'))] dydx, = chainer.grad(outputs=[y], inputs=[x], grad_outputs=weight, enable_double_backprop=True) dydx = F.sqrt(F.sum(dydx * dydx, axis=(1, 2, 3))) return F.mean_squared_error(dydx, xp.ones_like(dydx.array))
def _log_det_jacobian(self, x, y): r = F.sqrt(functions.clamp(functions.lorentzian_product(x, x), eps)) d = x / r[..., None] dim = d.shape[-1] logdet = (dim - 2) * F.log(F.sinh(r) / r) return logdet
def __call__(self, text, label, feature): # textを入力した際のネットワーク出力と、真値label との Rmse を返します。 x = F.transpose_sequence(text) self.eh.reset_state() # model----> for word in range(len(x)): e = self.xe(x[word]) h = self.eh(e) cel = h # cel = [10, 200] # <----model for word in range(1, len(x)): ee = self.xe(x[len(x) - word]) hh = self.eh(ee) cel_back = hh # cel_back = [10, 200] blstm = F.concat((cel, cel_back)) # blstm = [10, 400] blstm_f = F.concat((blstm, feature)) # blstm_f = [10, 401] predict = self.hy(blstm_f) # predict = [10, 1] label = xp.reshape(label, (len(label), 1)) mse = F.mean_squared_error(predict, label) rmse = F.sqrt(mse) chainer.reporter.report({'loss': rmse}, self) return rmse
def calc_style_mean_std(feature, eps=1e-5): mean = F.mean(feature, axis=1).reshape(feature.shape[0], 1) sigma = F.average((feature - F.tile(mean, (1, 256)))**2, axis=1) + eps std = F.sqrt(sigma).reshape(feature.shape[0], 1, 1, 1) mean = F.reshape(mean, (feature.shape[0], 1, 1, 1)) return mean, std
def minibatch_std(x): m = F.mean(x, axis=0, keepdims=True) div = x - F.broadcast_to(m, x.shape) v = F.mean(div*div, axis=0, keepdims=True) std = F.mean(F.sqrt(v + 1e-8), keepdims=True) std = F.broadcast_to(std, (x.shape[0], 1, x.shape[2], x.shape[3])) return F.concat([x, std], axis=1)
def __call__(self, x): """ Parameters ---------- x : chainer.Variable shape(batch_size, channel, x_dim, y_dim) """ batch_size, _, x_dim, y_dim = x.shape xp = self.xp xx_channel = xp.tile(xp.arange(x_dim), (1, y_dim, 1)) yy_channel = xp.tile(xp.arange(y_dim), (1, x_dim, 1)).transpose(0, 2, 1) xx_channel = xp.array(xx_channel, 'f') / (x_dim - 1) yy_channel = xp.array(yy_channel, 'f') / (y_dim - 1) xx_channel = xx_channel * 2 - 1 yy_channel = yy_channel * 2 - 1 xx_channel = xp.tile(xx_channel, (batch_size, 1, 1, 1)).transpose(0, 1, 3, 2) yy_channel = xp.tile(yy_channel, (batch_size, 1, 1, 1)).transpose(0, 1, 3, 2) ret = F.concat([x, xx_channel, yy_channel], axis=1) if self.with_r: rr = F.sqrt( F.square(xx_channel - 0.5) + F.square(yy_channel - 0.5)) ret = F.concat([ret, rr], axis=1) return ret
def path_length(ws, x, mask): levels, batch, size = len(ws), *(ws[0].shape) gradients = grad([x * mask], ws, enable_double_backprop=True) gradient = stack(gradients).transpose(1, 0, 2).reshape(batch * levels, size) path_lengths = batch_l2_norm_squared(gradient).reshape(batch, levels) return sqrt(mean(path_lengths, axis=1))
def _compute_laplacian_mmd(self, samples1, samples2, *, sigma=20.0): n = samples1.shape[1] m = samples2.shape[1] k_xx = F.expand_dims(x=samples1, axis=2) - \ F.expand_dims(x=samples1, axis=1) sum_k_xx = F.sum(F.exp( -F.sum(F.absolute(k_xx), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) k_xy = F.expand_dims(x=samples1, axis=2) - \ F.expand_dims(x=samples2, axis=1) sum_k_xy = F.sum(F.exp( -F.sum(F.absolute(k_xy), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) k_yy = F.expand_dims(x=samples2, axis=2) - \ F.expand_dims(x=samples2, axis=1) sum_k_yy = F.sum(F.exp( -F.sum(F.absolute(k_yy), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) mmd_squared = \ sum_k_xx / (n * n) - 2.0 * sum_k_xy / (m * n) + sum_k_yy / (m * m) return F.sqrt(mmd_squared + 1e-6)
def dropout_convolution_2d(self, x): train = configuration.config.train W, b = self.W, self.b log_alpha = VDF.calculate_log_alpha(self.W, self.log_sigma2, eps=1e-8, thresholds=(-8., 8.)) clip_mask = (log_alpha.data > self.loga_threshold) if train: W = (1. - clip_mask) * W mu = F.convolution_2d(x, (1. - clip_mask) * W, b=None, stride=self.stride, pad=self.pad, deterministic=self.deterministic) si = F.sqrt( F.convolution_2d(x * x, F.exp(log_alpha) * W * W, b=None, stride=self.stride, pad=self.pad, deterministic=self.deterministic) + 1e-8) normal_noise = self.xp.random.normal(0., 1., mu.shape).astype('f') activation = mu + si * normal_noise return F.bias(activation, b) else: return F.convolution_2d(x, (1. - clip_mask) * W, b, stride=self.stride, pad=self.pad, deterministic=self.deterministic)
def gradimg(img): grad = xp.tile( xp.asarray([[[[1, 0, -1], [2, 0, -2], [1, 0, -1]]]], dtype=img.dtype), (img.array.shape[1], 1, 1)) dx = F.convolution_2d(img, grad) dy = F.convolution_2d(img, xp.transpose(grad, (0, 1, 3, 2))) return (F.sqrt(dx**2 + dy**2))
def __call__(self, loc, val, y, train=True): bs = val.data.shape[0] pred, kld0, kld1, kld2 = self.forward(loc, val, y, train=train) # Compute MSE loss mse = F.mean_squared_error(pred, y) rmse = F.sqrt(mse) # Only used for reporting # Now compute the total KLD loss kldt = kld0 * self.lambda0 + kld1 * self.lambda1 + kld2 * self.lambda2 # Total loss is MSE plus regularization losses loss = mse + kldt * (1.0 / self.total_nobs) # Log the errors logs = { 'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1, 'kld2': kld2, 'kldt': kldt, 'bias': F.sum(self.bias_mu.b) } reporter.report(logs, self) return loss
def __call__(self, x): eps = 1e-8 mean = F.mean(x**2, axis=1, keepdims=True) mean = F.sqrt(mean + eps) mean = F.broadcast_to(mean, (x.shape)) h = x / mean return x
def calc_2d_normal(x1, x2, mu1, mu2, s1, s2, rho): norm1 = F.broadcast_to(x1, mu1.shape) - mu1 norm2 = F.broadcast_to(x2, mu2.shape) - mu2 s1s2 = s1 * s2 z = F.square(norm1 / s1) + F.square(norm2 / s2) - 2 * rho * norm1 * norm2 / s1s2 neg_rho = 1 - F.square(rho) return F.exp(-z / (2 * neg_rho)) / (2 * np.pi * s1s2 * F.sqrt(neg_rho))
def __call__(self, x, c=None): if c is not None: embedded = self.embedder(c) normalized = embedded / sqrt(mean(embedded ** 2, axis=1, keepdims=True) + 1e-08) c1 = self.mapper(normalized) h = self.main(x) return flatten(h) if c is None else sum(h * c1, axis=1) / root(h.shape[1])
def update_core(self): xp = self.gen.xp self._iter += 1 opt_g = self.get_optimizer('gen') opt_d = self.get_optimizer('dis') data_z0 = self.get_latent_code_batch() x_fake0 = self.gen(Variable(data_z0)) data_z1 = self.get_latent_code_batch() x_fake1 = self.gen(Variable(data_z1)) data_x = self.get_real_image_batch() x_real = Variable(data_x) eta = np.random.rand() x_inter = Variable( (data_x * eta + (1.0 - eta) * x_fake0.data).astype('f')) dis_x_fake0 = self.dis(x_fake0) dis_x_fake1 = self.dis(x_fake1) dis_x_real = self.dis(x_real) loss_gen = loss_l2_norm(dis_x_fake0, dis_x_real) + \ loss_l2_norm(dis_x_fake1, dis_x_real) - \ loss_l2_norm(dis_x_fake0, dis_x_fake1) #print(loss_gen.data) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update() x_fake0.unchain_backward() x_fake1.unchain_backward() loss_surrogate = loss_l2_norm(dis_x_fake0, dis_x_fake1) - \ loss_l2_norm(dis_x_fake0, 0.0) + \ loss_l2_norm(dis_x_real, 0.0) - \ loss_l2_norm(dis_x_real, dis_x_fake1) dis_x_inter = self.dis(x_inter, retain_forward=True) g = xp.ones_like(dis_x_inter.data) t0 = dis_x_inter.data - dis_x_fake1.data t0_norm = xp.sum(t0**2, axis=(1))**0.5 t1_norm = xp.sum(dis_x_inter.data**2, axis=(1))**0.5 t_g = ((t0.transpose() / t0_norm) - (dis_x_inter.data.transpose()) / t1_norm).transpose() g = g * t_g grad = self.dis.differentiable_backward(Variable(g)) grad_l2 = F.sqrt(F.sum(grad**2, axis=(1, 2, 3))) loss_gp = self._lambda_gp * loss_l2(grad_l2, 1.0) loss_dis = loss_surrogate + loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() chainer.report({'loss': loss_dis, 'loss_gp': loss_gp}, self.dis)
def __call__(self, x, t): h = self.base(x, layers=['res5'])['res5'] self.cam = h h = _global_average_pooling_2d(h) ################################################################################ # ResNet50の後ろにArcFace実装 ################################################################################ # --------------------------- cos(theta) & phi(theta) --------------------------- cosine = F.linear(F.normalize(h), F.normalize(self.weight)) # fc8 sine = F.sqrt(F.clip((1.0 - F.square(cosine)),0, 1)) phi = cosine * cos_m - sine * sin_m if easy_margin: phi = F.where(cosine.data > 0, phi, cosine) else: phi = F.where(cosine.data > th, phi, cosine - mm) # --------------------------- convert label to one-hot --------------------------- one_hot = cp.eye(10)[t].astype(cp.float32) one_hot = Variable(one_hot) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= s ################################################################################ #h = self.fc(h) return output
def __call__(self, x, test=False): self.hiddens = [] # Linear/BatchNorm/Branch/Nonlinear h = self.linear0(x) h = self.bn0(h, test) h = self.act(h) self.hiddens.append(h) h = self.linear1(h) h = self.bn1(h, test) h = self.act(h) self.hiddens.append(h) h = self.linear2(h) h = self.bn2(h, test) h = self.act(h) self.hiddens.append(h) h = self.linear3(h) h = self.bn3(h, test) h = self.act(h) #TODO: should use tanh? self.hiddens.append(h) # Variational self.mu = self.linear_mu(h) self.log_sigma_2 = self.linear_sigma(h) self.sigma_2 = F.exp(self.log_sigma_2) #TODO: consider nan problem sigma = F.sqrt(self.sigma_2) r = self.generate_norm(self.mu) z = self.mu + sigma * r return z
def l2norm(vec): # Calculate the l2norm (or euclidean norm) if vec.ndim > 1: # Add epsilon to avoid problems of square root derivative close to zero. Since f(x + ε) = f(x) # => f(x + ε) - f(x) = 0 vec = F.sqrt(F.sum(vec * vec, axis=(1,2,3,4)) + 1e-12) return abs(vec)
def overlap(u, v): # u, v: (1 * -) Variable -> (1 * 1) Variable denominator = F.sqrt( F.batch_l2_norm_squared(u) * F.batch_l2_norm_squared(v)) if (np.array_equal(cuda.to_cpu(denominator.data), np.array([0]))): return F.matmul(u, F.transpose(v)) return F.matmul(u, F.transpose(v)) / F.reshape(denominator, (1, 1))
def main(): for _ in range(1000): inp = np.random.random((2, 3, 224, 224)).astype(np.float32) ret = F.sqrt(F.relu(inp - 0.5)).array assert np.sum(np.isnan(ret)) == 0 print("no error")
def __call__(self, x): # chainer requires explicit broadcast for avoiding latent bugs u = F.mean(x, -1, keepdims=True) u = F.broadcast_to(u, x.shape) s = F.mean((x - u) ** 2, -1, keepdims=True) s = F.broadcast_to(s, x.shape) x = (x - u) / F.sqrt(s + self.e) return F.bias(F.scale(x, self.g, axis=2), self.b, axis=2)
def __call__(self, x): f1 = F.sigmoid(self.beta1) f2 = F.sigmoid(self.beta2) #self.m = f1 * self.m + (1 - f1) * x #self.v = f2 * self.v + (1 - f2) * x**2 self.m = self.beta1 * self.m + (1 - self.beta1) * x self.v = self.beta2 * self.v + (1 - self.beta2) * x**2 g = 1e-3 * self.m / F.sqrt(self.v + 1e-8) return g
def norm_by_freq(self, freq): word_embs = self.W mean = F.sum(freq * word_embs, axis=0, keepdims=True) mean = F.broadcast_to(mean, word_embs.shape) var = F.sum(freq * ((word_embs - mean) ** 2), axis=0, keepdims=True) var = F.broadcast_to(var, word_embs.shape) stddev = F.sqrt(1e-6 + var) word_embs_norm = (word_embs - mean) / stddev return word_embs_norm
def get_normalized_vector(d, xp=None): shape = tuple(range(1, len(d.shape))) if xp is not None: d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True)) d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True)) else: d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True) d /= F.broadcast_to(d_term, d.shape) d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True)) d /= F.broadcast_to(d_term, d.shape) return d
def compute_distance_of_cluster_heads(self): # list all possible combinations of two cluster heads num_combination = self.nCr(self.ndim_y, 2) # a_labels # [0, 1, 0, 0] # [0, 0, 1, 0] # [0, 0, 1, 0] # [0, 0, 0, 1] # [0, 0, 0, 1] # [0, 0, 0, 1] a_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32) for i in range(1, self.ndim_y): for n in range(i): j = int(0.5 * i * (i - 1) + n) a_labels[j, i] = 1 # b_labels # [1, 0, 0, 0] # [1, 0, 0, 0] # [0, 1, 0, 0] # [1, 0, 0, 0] # [0, 1, 0, 0] # [0, 0, 1, 0] b_labels = np.zeros((num_combination, self.ndim_y), dtype=np.float32) for i in range(1, self.ndim_y): for n in range(i): j = int(0.5 * i * (i - 1) + n) b_labels[j, n] = 1 xp = self.xp if xp is not np: a_labels = cuda.to_gpu(a_labels) b_labels = cuda.to_gpu(b_labels) a_vector = a_labels b_vector = b_labels distance = functions.sqrt(functions.sum((a_vector - b_vector) ** 2, axis=1)) # clip distance = functions.clip(distance, 0.0, float(self.cluster_head_distance_threshold)) return distance
def batch_rodrigues(theta): """ Theta is N x 3 """ batch_size = theta.shape[0] xp = theta.xp angle = F.expand_dims(F.sqrt(F.batch_l2_norm_squared(theta + 1e-8)), -1) r = F.expand_dims(theta / F.tile(angle, 3), -1) angle = F.expand_dims(angle, -1) cos = F.cos(angle) sin = F.sin(angle) cos = F.tile(cos, (3, 3)) sin = F.tile(sin, (3, 3)) outer = F.matmul(r, r, transb=True) eyes = F.tile(F.expand_dims( Variable(xp.array(xp.eye(3), 'f')), 0), (batch_size, 1, 1)) R = cos * eyes + (1 - cos) * outer + sin * batch_skew(r, batch_size) return R
def rmse(x, t): return F.sqrt(mse(x, t))
def __call__(self, x0, x1): if self.scaler is not None: x0 = self.scaler.inverse_transform(x0) x1 = self.scaler.inverse_transform(x1) return F.sqrt(F.mean_squared_error(x0, x1))