def __call__(self, images): hs = [] for model in self.models: hs.append(model.forward_backbone(images)) h_segs = [] for (h, model) in zip(hs, self.models): h_segs.append(model.forward_seg(h)) h_segs = F.stack(h_segs) h_seg_avg = F.average(h_segs, axis=0, weights=self.xp.asarray(self.seg_weight)) if self.ensemble_seg: h_segs = [h_seg_avg] * len(self.models) h_hors, h_vers = [], [] for i in range(len(self.models)): h_hor, h_ver = self.models[i].forward_edge(hs[i], h_segs[i]) h_hors.append(h_hor) h_vers.append(h_ver) h_hors = F.stack(h_hors) h_hor_avg = F.average(h_hors, axis=0, weights=self.xp.asarray(self.edge_weight)) h_vers = F.stack(h_vers) h_ver_avg = F.average(h_vers, axis=0, weights=self.xp.asarray(self.edge_weight)) return h_seg_avg, h_hor_avg, h_ver_avg
def loss_comp_low(x, y, threshold, norm='l2'): mask = ((x.array <= threshold) ^ (y.array <= threshold)).astype( x.xp.float32) if norm == 'l1': return (F.average(mask * F.absolute_error(x, y))) else: return (F.average(mask * F.squared_error(x, y)))
def __call__(self, x): """ Apply the LayerNormalization on in the input "x" Args: x (float[][]): input tensor to re-center and re-scale (layer normalize) Returns: float[][] """ if self.hidden_size is None: self._initialize_params(x.shape) # Layer Normalization parameters mu = F.average(x, axis=1, keepdims=True) mu = F.broadcast_to(mu, x.shape) sigma = F.sqrt( F.average(F.square(x - mu), axis=1, keepdims=True) + self.epsilon) sigma = F.broadcast_to(sigma, x.shape) # Transformation outputs = (x - mu) / sigma # Affine transformation outputs = (outputs * self.gain) + self.bias #outputs = F.scale(outputs, self.gain) #outputs = F.bias(outputs, self.bias) return outputs
def shared_middle(self, batch_size, width_rgb, width_flow, rpn_scores_rgb, rpn_locs_rgb, rpn_scores_flow, rpn_locs_flow, anchor_rgb, gt_segments_rgb, labels, seg_info): # rpn_scores_rgb shape = (N, W_rgb * A, 2) rpn_scores_flow shape = (N, W_flow * A, 2) n_anchor = anchor_rgb.shape[1] rpn_locs_flow = F.transpose(rpn_locs_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2)) # (B, 2, W_flow, A) rpn_locs_flow = F.resize_images(rpn_locs_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A) # B, W_rgb, A, 2 => B, W_rgb * A, 2 rpn_locs_flow = F.reshape(F.transpose(rpn_locs_flow, axes=(0, 2, 3 ,1)), shape=(batch_size, width_rgb * n_anchor, 2)) rpn_locs = F.average(F.stack([rpn_locs_rgb, rpn_locs_flow]), axis=0) rpn_scores_flow = F.transpose(rpn_scores_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2)) rpn_scores_flow = F.resize_images(rpn_scores_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A) # B, W_rgb, A, 2 => B, W_rgb * A, 2 rpn_scores_flow = F.reshape(F.transpose(rpn_scores_flow, axes=(0, 2, 3, 1)), shape=(batch_size, width_rgb * n_anchor, 2)) rpn_scores = F.average(F.stack([rpn_scores_rgb,rpn_scores_flow]), axis=0) # merge over! rois, roi_indices = self.time_seg_train_chain_rgb.nms_process(batch_size, width_rgb, n_anchor, rpn_scores, rpn_locs, anchor_rgb) sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label = self.time_seg_train_chain_rgb.proposal_target_creator( rois, roi_indices, gt_segments_rgb, labels, seg_info, self.time_seg_train_chain_rgb.loc_normalize_mean, self.time_seg_train_chain_rgb.loc_normalize_std) return sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label
def log_prob(self, z, log_det_jacobians): ln_var_adj = self.ln_var * self.xp.ones([self.adj_size]) ln_var_x = self.ln_var * self.xp.ones([self.x_size]) log_det_jacobians[0] = log_det_jacobians[0] - F.log( self.xp.array([self.x_size], dtype=self.xp.float32)) log_det_jacobians[1] = log_det_jacobians[1] - F.log( self.xp.array([self.adj_size], dtype=self.xp.float32)) negative_log_likelihood_adj = F.average( F.sum(F.gaussian_nll(z[1], self.xp.zeros(self.adj_size, dtype=self.xp.float32), ln_var_adj, reduce="no"), axis=1) - log_det_jacobians[1]) negative_log_likelihood_x = F.average( F.sum(F.gaussian_nll(z[0], self.xp.zeros(self.x_size, dtype=self.xp.float32), ln_var_x, reduce="no"), axis=1) - log_det_jacobians[0]) negative_log_likelihood_adj /= self.adj_size negative_log_likelihood_x /= self.x_size if negative_log_likelihood_x.array < 0: log.warning("negative nll for x!") return [negative_log_likelihood_x, negative_log_likelihood_adj]
def loss_grad_d(diff): xp = cuda.get_array_module(diff.data) grad = xp.tile( xp.asarray([[[[1, 0, -1], [2, 0, -2], [1, 0, -1]]]], dtype=diff.dtype), (diff.data.shape[1], 1, 1)) dx = F.convolution_2d(diff, grad) dy = F.convolution_2d(diff, xp.transpose(grad, (0, 1, 3, 2))) return F.average(dx**2) + F.average(dy**2)
def update_core(self): def _update(optimizer, loss): optimizer.target.cleargrads() loss.backward() optimizer.update() xp = self.generator.xp if self.iteration < 50: n_critic = 100 else: n_critic = 5 # update critic n_critic times for _ in range(n_critic): # real image x_real = self.next_batch(self.x) y_real = self.critic(x_real) loss1 = -F.average(y_real) # fake image z = self.next_batch(self.z) x_fake = self.generator(z) y_fake = self.critic(x_fake) loss2 = F.average(y_fake) # gp # using chainer.grad here eps = xp.random.uniform(0, 1, size=self.batchsize).astype("f")[:, None, None, None] x_mid = eps * x_real + (1.0 - eps) * x_fake y_mid = self.critic(x_mid) grad, = chainer.grad([y_mid], [x_mid], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_gp = self.lam * F.mean_squared_error(grad, xp.ones_like(grad.data)) # compute loss critic_loss = loss1 + loss2 + loss_gp # update critic _update(self.optimizer_critic, critic_loss) chainer.reporter.report({ 'critic/loss/real': loss1, 'critic/loss/fake': loss2, 'critic/loss/gp': loss_gp, 'critic/loss': critic_loss, 'Wasserstein': -loss1 - loss2, }) # update generator 1 time z = self.next_batch(self.z) x_fake = self.generator(z) y_fake = self.critic(x_fake) gen_loss = -F.average(y_fake) _update(self.optimizer_generator, gen_loss) chainer.report({'generator/loss': gen_loss})
def __call__(self, hs, rs, ts, ys): """Calculate the loss between outputs and ys. Args: hs: The heads of facts. rs: The relations of facts. ts: The tails of facts. ys: The labels which indicate whether the facts are correct. Returns: loss: The cross-entropy loss for outputs and ys. """ batch_size, max_length_h = hs.shape _, max_length_t = ts.shape hhs = self.concept_encoder(hs) hts = self.concept_encoder(ts) hrs = self.relation_encoder(rs) # embedding vectors which corresponds to PAD should be zeros hhs = hhs * (hs != PAD)[:, :, None] hts = hts * (ts != PAD)[:, :, None] # calculate average over embeddings hhs = F.average(hhs, axis=1) hts = F.average(hts, axis=1) # transform concept representations l_hhs = F.tanh( F.dropout(self.l_concept(hhs), ratio=self.n_dropout) ) l_hts = F.tanh( F.dropout(self.l_concept(hts), ratio=self.n_dropout) ) # reshape hrs hrs = F.reshape( hrs, (batch_size, self.n_relation_units, self.n_relation_units) ) # calculate bilinear outputs outputs = F.flatten( F.batch_matmul( F.batch_matmul( l_hhs, hrs, transa=True ), l_hts ) ) loss = F.sigmoid_cross_entropy(outputs, ys) chainer.report({'loss': loss.data}, self) return loss
def total_variation2(x): xp = cuda.get_array_module(x.data) wh = xp.asarray([[[[1], [-1]]]], dtype=x.dtype) ww = xp.asarray([[[[1, -1]]]], dtype=x.dtype) dx = F.convolution_2d(x, W=wh) dy = F.convolution_2d(x, W=ww) # dx = x[:, 1:, :, :] - x[:, :-1, :, :] # dy = x[:, :, 1:, :] - x[:, :, :-1, :] return F.average(F.absolute(dx)) + F.average(F.absolute(dy))
def update_core(self): gen_opt = self.get_optimizer("gen") cri_opt = self.get_optimizer("cri") generator = gen_opt.target critic = cri_opt.target batch_size = self.get_iterator("main").batch_size # バッチ(本物)を取得 x_real = self.get_iterator("main").next() x_real = Variable(np.stack(x_real)) if chainer.config.user_gpu >= 0: x_real.to_gpu() xp = x_real.xp # update critic upd_num = self.n_cri[ 1] if self.iteration <= 25 or self.iteration % 500 == 0 else self.n_cri[ 0] for i in range(upd_num): z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32) x_fake = generator(Variable(z)) cri_loss = F.average(critic(x_fake) - critic(x_real)) # Wasserstein距離の逆符号 # gradient penalty eps = xp.random.uniform(size=(batch_size, 1, 1, 1)).astype(np.float32) x_fusion = eps * x_real + (1 - eps) * x_fake # (N,1,H,W) g_critic = chainer.grad( [critic(x_fusion)], [x_fusion], enable_double_backprop=True)[0] # (N,1,H,W) gp = F.batch_l2_norm_squared(g_critic) gp = F.average((F.sqrt(gp) - 1)**2) total_loss = cri_loss + self.gp_lam * gp critic.cleargrads() total_loss.backward() cri_opt.update() # update generator z = xp.random.uniform(size=(batch_size, Z_DIM)).astype(np.float32) x_fake = generator(Variable(z)) gen_loss = -F.average(critic(x_fake)) generator.cleargrads() critic.cleargrads() gen_loss.backward() gen_opt.update() chainer.report({ "generator/loss": gen_loss, "critic/loss": cri_loss, "main/wdist": -cri_loss })
def __call__(self, s, q, s_mask, q_mask): """ s_bar, _, _ = self.pred_bilstm(None, None, s) s_bar_new = F.concat(s_bar, axis=1) q_bar, _, _ = self.pred_bilstm(None, None, q) q_bar_new = F.concat(q_bar, axis=1) """ _, _, s_bar = self.pred_bilstm(None, None, s) # get list of [seq, dim] s_bar_new = F.stack(s_bar, axis=0) # turn list to 3d tensor _, _, q_bar = self.pred_bilstm(None, None, q) # get list of [seq, dim] q_bar_new = F.stack(q_bar, axis=0) # turn list to 3d tensor # mean-max pooling s_sum = F.sum(s_mask, axis=-1) q_sum = F.sum(q_mask, axis=-1) s_batch, s_seq = s_mask.shape s_mask_broad = F.broadcast_to(F.reshape(s_mask, (s_batch, s_seq, 1)), (s_batch, s_seq, s_bar_new.shape[-1])) s_broad = s_bar_new * s_mask_broad """ s_infinit_matrix = self.xp.ones((s_batch, s_seq, s_bar_new.shape[-1]), dtype=self.xp.float32) * -1 * self.xp.inf s_cond = s_mask_broad.data.astype(self.xp.bool) s_broad_max = F.where(s_cond, s_bar_new, s_infinit_matrix) """ s_mean = F.average(s_broad, axis=1) # [batch_size, dim] s_max = F.maxout( F.reshape( s_bar_new, (s_bar_new.shape[0], s_bar_new.shape[1] * s_bar_new.shape[2])), s_bar_new.shape[-1]) # [batch_size, dim] q_batch, q_seq = q_mask.shape q_broad = q_bar_new * F.broadcast_to( F.reshape(q_mask, (q_batch, q_seq, 1)), (q_batch, q_seq, q_bar_new.shape[-1])) q_mean = F.average(q_broad, axis=1) # [batch_size, dim] q_max = F.maxout( F.reshape( q_bar_new, (q_bar_new.shape[0], q_bar_new.shape[1] * q_bar_new.shape[2])), q_bar_new.shape[-1]) # [batch_size, dim] summarized_vector = F.concat([s_mean, s_max, q_mean, q_max], axis=1) s_linear_output = self.gelu(self.L(summarized_vector)) y = F.softmax(s_linear_output) return y
def loss_grad_d(diff): xp = cuda.get_array_module(diff.data) grad = xp.tile( xp.asarray([[[[1, 0, -1], [2, 0, -2], [1, 0, -1]]]], dtype=diff.dtype), (diff.data.shape[1], 1, 1)) dx = F.convolution_2d(diff, grad) dy = F.convolution_2d(diff, xp.transpose(grad, (0, 1, 3, 2))) # target = self.xp.zeros_like(dx.data) # return 0.5*(F.mean_squared_error(dx,target)+F.mean_squared_error(dy,target)) return F.average(dx**2) + F.average(dy**2)
def update_core(self): # train critic for t in range(self.n_c): # read data batch = self._iterators['main'].next() x = self.converter(batch, self.device) m = x.shape[0] H, W = x.shape[2], x.shape[3] xp = chainer.cuda.get_array_module(x) # generate z = self.generator.make_z(m) x_tilde = self.generator(z) # sampling along straight lines e = xp.random.uniform(0., 1., (m, 1, 1, 1)) x_hat = e * x + (1 - e) * x_tilde # compute loss loss_gan = F.average(self.critic(x_tilde) - self.critic(x)) grad, = chainer.grad([self.critic(x_hat)], [x_hat], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_grad = self.l * F.mean_squared_error(grad, xp.ones_like(grad.data)) loss_critic = loss_gan + loss_grad # update critic self.critic.cleargrads() loss_critic.backward() self._optimizers['critic'].update() # report chainer.reporter.report({ 'wasserstein distance': -loss_gan, 'loss/grad': loss_grad }) # train generator # read data batch = self._iterators['main'].next() x = self.converter(batch, self.device) # generate and compute loss z = self.generator.make_z(m) loss_generator = F.average(-self.critic(self.generator(z))) # update generator self.generator.cleargrads() loss_generator.backward() self._optimizers['generator'].update() # report chainer.reporter.report({'loss/generator': loss_generator})
def __call__(self, x, c): mu = F.average(x, axis=0).reshape(1, x.shape[1], x.shape[2], x.shape[3]) sigma = F.average((x-F.tile(mu, (x.shape[0], 1, 1, 1)))**2, axis=0) x_hat = (x-F.tile(mu, (x.shape[0], 1, 1, 1)))/F.sqrt(F.tile(sigma+self.eps, (x.shape[0], 1, 1, 1))) h = F.relu(self.c0(c)) w = self.cw(h) b = self.cb(h) #ones = chainer.as_variable(xp.ones_like(w, dtype=xp.float32)) h = w * x_hat + b return h
def power_loss(x, t, frame_length=1024, hop_length=512, time_axis_mean=False): # ..., FFT axis Xr, Xi = stft(x, frame_length, hop_length) Xa = Xr**2 + Xi**2 Tr, Ti = stft(t, frame_length, hop_length) Ta = Tr**2 + Ti**2 if time_axis_mean: Xa = F.average(Xa, -1) Ta = F.average(Ta, -1) return F.mean_squared_error(Xa, Ta)
def _loss(self, fake_batch_obs, fake_batch_action, true_batch_obs, true_batch_action): if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(fake_batch_obs, update=False) infer_fake = self.model(normalized_obs, fake_batch_action) else: infer_fake = self.model(fake_batch_obs, fake_batch_action) if self.noisy_label: n = fake_batch_obs.shape[0] fake_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_fake)) + self.discriminator_value_offset)) else: fake_loss = -F.average(F.log(1 - F.sigmoid(infer_fake) + self.discriminator_value_offset)) if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(true_batch_obs, update=True) infer_true = self.model(normalized_obs, true_batch_action) else: infer_true = self.model(true_batch_obs, true_batch_action) if self.noisy_label: n = true_batch_obs.shape[0] true_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_true)) + self.discriminator_value_offset)) else: true_loss = -F.average(F.log(F.sigmoid(infer_true) + self.discriminator_value_offset)) entropy = (self._get_entropy(infer_fake) / 2 + self._get_entropy(infer_true) / 2) loss = (fake_loss + true_loss - entropy * self.entropy_coef) # Update stats self.accuracy_gen = np.average( chainer.cuda.to_cpu(infer_fake.array) < 0) self.accuracy_exp = np.average( chainer.cuda.to_cpu(infer_true.array) > 0) self.average_entropy *= self.entropy_decay self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array) # noqa self.average_loss *= self.loss_decay self.average_loss += (1.0 - self.loss_decay) * \ chainer.cuda.to_cpu(loss.array) return loss
def __call__(self, x, e=None): gap = F.average(x, axis=(2, 3)) gmp = F.max(x, axis=(2, 3)) gap = self.ext(F.relu(self.sqz(gap))) gmp = self.ext(F.relu(self.sqz(gmp))) x = F.sigmoid(gap + gmp)[:, :, None, None] * x gap = F.average(x, axis=1)[:, None] gmp = F.max(x, axis=1)[:, None] h = self.conv(F.concat([gap, gmp])) h = F.sigmoid(h) * x return h
def update_core(self): xp = self.gen.xp self._iter += 1 opt_d = self.get_optimizer('dis') for i in range(self._dis_iter): d_fake = self.get_fake_image_batch() d_real = self.get_real_image_batch() y_fake = self.dis(Variable(d_fake), test=False) y_real = self.dis(Variable(d_real), test=False) w1 = F.average(y_fake - y_real) loss_dis = w1 if self._mode == 'gp': eta = np.random.rand() c = (d_real * eta + (1.0 - eta) * d_fake).astype('f') y = self.dis(Variable(c), test=False, retain_forward=True) g = xp.ones_like(y.data) grad_c = self.dis.differentiable_backward(Variable(g)) grad_c_l2 = F.sqrt(F.sum(grad_c**2, axis=(1, 2, 3))) loss_gp = loss_l2(grad_c_l2, 1.0) loss_dis += self._lambda_gp * loss_gp opt_d.zero_grads() loss_dis.backward() opt_d.update() if self._mode == 'clip': self.dis.clip() chainer.report({'loss': loss_dis, 'loss_w1': w1}, self.dis) z_in = self.get_latent_code_batch() x_out = self.gen(Variable(z_in), test=False) opt_g = self.get_optimizer('gen') y_fake = self.dis(x_out, test=False) loss_gen = -F.average(y_fake) chainer.report({'loss': loss_gen}, self.gen) opt_g.zero_grads() loss_gen.backward() opt_g.update()
def risk(self, Xt1, Xt2): Xa, Xb, Xc, A, B = self.prepare(Xt1, Xt2) p, n = self.p, self.n a = A*A + p*p + n*n b = A*B + 2*p*n c = B*B + p*p + n*n coe = 1 / (a*c - b*b) r_a = p*(c*p-b*n) * self.loss(self.f(Xa)) + \ n*(a*n-b*p) * self.loss(-self.f(Xa)) r_b = p*(c*A-b*B) * self.loss(self.f(Xb)) + \ n*(a*B-b*A) * self.loss(-self.f(Xb)) r_c = p*(c*n-b*p) * self.loss(self.f(Xc)) + \ n*(a*p-b*n) * self.loss(-self.f(Xc)) return coe * (F.average(r_a) + F.average(r_b) + F.average(r_c))
def calc_loss(self, grids, image_size): top_left_x, top_right_x, _, top_left_y, _, bottom_left_y = self.get_corners(grids, image_size) # penalize upside down images distance = top_left_y - bottom_left_y loss_values = F.maximum(distance, self.xp.zeros_like(distance)) up_down_loss = F.average(loss_values) # penalize images that are vertically mirrored distance = top_left_x - top_right_x loss_values = F.maximum(distance, self.xp.zeros_like(distance)) left_right_loss = F.average(loss_values) return up_down_loss + left_right_loss
def __call__(self, x, t): y_list = self.predictor(x) _len, _cls = y_list.shape if self.sm_fuse: _sm = F.reshape(F.log_softmax(y_list), (self.n_kernel, _len // self.n_kernel, _cls)) ave_y = F.average(_sm, axis=0) loss = - F.average(F.select_item(ave_y, t)) else: loss = F.average(F.softmax_cross_entropy(y_list, F.tile(t, self.n_kernel))) conf = F.average( F.reshape(y_list, (self.n_kernel, _len // self.n_kernel, _cls)), axis=0) chainer.report( {'loss': loss, 'accuracy': F.accuracy(conf, t)}, self) return loss
def __call__(self, x): b, c, height, width = x.data.shape h = F.average(x, axis=(2, 3)) # Global pooling h = F.relu(self.l1(h)) h = F.sigmoid(self.l2(h)) return (F.transpose(F.broadcast_to(h, (height, width, b, c)), (2, 3, 0, 1)))
def __call__(self, x): # バッチ内の文書ごとに、各文をembedding (並列化するには???) sent_rep = [self.sen_enc(doc) for doc in x] # x: ミニバッチ, doc: 1ラベルの複数文 # 1文ずつBiLSTMに読み込む last_h, last_c, ys = self.encoder(None, None, sent_rep) # 最終層の各文の状態を平均したものを返す return [F.average(x, axis=0) for x in ys]
def __init__(self, n_layer, n_class=None, pretrained_model=None, mean=None, initialW=None, fc_kwargs={}, arch='fb'): if arch == 'fb': stride_first = False conv1_no_bias = True elif arch == 'he': stride_first = True # Kaiming He uses bias only for ResNet50 conv1_no_bias = n_layer != 50 else: raise ValueError('arch is expected to be one of [\'he\', \'fb\']') blocks = self._blocks[n_layer] param, path = utils.prepare_pretrained_model( { 'n_class': n_class, 'mean': mean }, pretrained_model, self._models[arch][n_layer], { 'n_class': 1000, 'mean': _imagenet_mean }) self.mean = param['mean'] if initialW is None: initialW = initializers.HeNormal(scale=1., fan_option='fan_out') if 'initialW' not in fc_kwargs: fc_kwargs['initialW'] = initializers.Normal(scale=0.01) if pretrained_model: # As a sampling process is time-consuming, # we employ a zero initializer for faster computation. initialW = initializers.constant.Zero() fc_kwargs['initialW'] = initializers.constant.Zero() kwargs = {'initialW': initialW, 'stride_first': stride_first} super(ResNet, self).__init__() with self.init_scope(): self.conv1 = Conv2DBNActiv(None, 64, 7, 2, 3, nobias=conv1_no_bias, initialW=initialW) self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2) self.res2 = ResBlock(blocks[0], None, 64, 256, 1, **kwargs) self.res3 = ResBlock(blocks[1], None, 128, 512, 2, **kwargs) self.res4 = ResBlock(blocks[2], None, 256, 1024, 2, **kwargs) self.res5 = ResBlock(blocks[3], None, 512, 2048, 2, **kwargs) self.pool5 = lambda x: F.average(x, axis=(2, 3)) self.fc6 = L.Linear(None, param['n_class'], **fc_kwargs) self.prob = F.softmax if path: chainer.serializers.load_npz(path, self)
def calc_style_mean_std(feature, eps=1e-5): mean = F.mean(feature, axis=1).reshape(feature.shape[0], 1) sigma = F.average((feature - F.tile(mean, (1, 256)))**2, axis=1) + eps std = F.sqrt(sigma).reshape(feature.shape[0], 1, 1, 1) mean = F.reshape(mean, (feature.shape[0], 1, 1, 1)) return mean, std
def proxy_nca_loss(x, proxy, labels): """Proxy-NCA loss function. Args: x (:class:`~chainer.Variable`): L2 normalized anchor points whose shape is (B, D), where B is the batch size and D is the number of dimensions of feature vector. proxy (:class:`~chainer.Variable` or :class:`~chainer.Parameter`): Proxies whose shape is (K, D), where K is the number of classes in the dataset. labels (:class:`numpy.ndarray`): Class labels associated to x. The shape is (B,) and dtype is int. Note that the class IDs must be 0, 1, ..., K-1. Returns: :class:`~chainer.Variable`: Loss value. See: `No Fuss Distance Metric Learning using Proxies \ <http://openaccess.thecvf.com/content_ICCV_2017/papers/\ Movshovitz-Attias_No_Fuss_Distance_ICCV_2017_paper.pdf>`_ """ proxy = F.normalize(proxy) distance = squared_distance_matrix(x, proxy) d_posi = distance[np.arange(len(x)), labels] # For each row, remove one element corresponding to the positive distance B, K = distance.shape # batch size and the number of classes mask = np.tile(np.arange(K), (B, 1)) != labels[:, None] d_nega = distance[mask].reshape(B, K - 1) log_denominator = F.logsumexp(-d_nega, axis=1) loss = d_posi + log_denominator return F.average(loss)
def compute_batch_loss(self, batch, weights): """Compute gradients on a list of trajectories. Args: batch -- a TrajectoryBatch weights -- a list of weights for trajectories in the batch Returns a loss value """ weights = self._xp.array(weights) for step, step_batch in batch.step_batches(self._gpu_device): policies, values = self._model(step_batch.states) values *= (1 - step_batch.terminals).reshape(values.shape) logprobs = F.select_item(policies, step_batch.actions) batch.set_logprobs_and_values(step, logprobs, values) losses = [] for trajectory, logprobs, values in batch: losses.append( self.compute_trajectory_loss(trajectory, logprobs, values)) losses = F.stack(losses) loss = F.average(losses * weights) loss.backward() return np.asscalar(cuda.to_cpu(loss.data))
def extract(self, images, layers=['fc5']): self._layer_names = layers x = chainer.Variable(self.xp.asarray(images)) h = self(x).data _len, _cls = h.shape h = F.average(F.reshape(h, (16, _len // 16, _cls)), axis=0) return chainer.cuda.to_cpu(h.data)
def extract(self, images, layers=['fc']): self._layer_names = layers x = chainer.Variable(self.xp.asarray(images)) h = self(x).data h = F.stack(F.split_axis(h, 16, axis=0)) h = F.average(F.softmax(h, axis=2), axis=0) return chainer.cuda.to_cpu(h.data)
def check_forward(self, x_data, axis, weights): x = chainer.Variable(x_data) if self.use_weights: w = chainer.Variable(weights) w_data = self.w else: w = None w_data = None y = functions.average(x, axis=axis, weights=w, keepdims=self.keepdims) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.average( self.x, axis=axis, weights=w_data) if self.keepdims: # numpy.average does not support keepdims if axis is None: axis = list(six.moves.range(x_data.ndim)) elif isinstance(axis, int): axis = axis, shape = list(x_data.shape) for i in six.moves.range(len(shape)): if i in axis or i - len(shape) in axis: shape[i] = 1 y_expect = y_expect.reshape(shape) if self.dtype == numpy.float16: options = {'atol': 1e-3, 'rtol': 1e-3} else: options = {} self.assertEqual(y_expect.shape, y.shape) testing.assert_allclose(y_expect, y.data, **options)
def check_forward(self, x_data, axis, weights): if self.use_weights and isinstance(self.axis, tuple): # This condition is not supported return x = chainer.Variable(x_data) if self.use_weights: w = chainer.Variable(weights) w_data = self.w else: w = None w_data = None y = functions.average(x, axis=axis, weights=w, keepdims=self.keepdims) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.average(self.x, axis=axis, weights=w_data) if self.keepdims: # numpy.average does not support keepdims if axis is None: axis = list(six.moves.range(x_data.ndim)) elif isinstance(axis, int): axis = axis, shape = list(x_data.shape) for i in six.moves.range(len(shape)): if i in axis or i - len(shape) in axis: shape[i] = 1 y_expect = y_expect.reshape(shape) if self.dtype == numpy.float16: options = {'atol': 5e-3, 'rtol': 5e-3} else: options = {} self.assertEqual(y_expect.shape, y.shape) testing.assert_allclose(y_expect, y.data, **options)
def check_forward(self, x_data, axis, weights): x = chainer.Variable(x_data) if self.use_weights: w = chainer.Variable(weights) w_data = self.w else: w = None w_data = None y = functions.average(x, axis=axis, weights=w) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.average(self.x, axis=axis, weights=w_data) if self.dtype == numpy.float16: options = {'atol': 1e-3, 'rtol': 1e-3} else: options = {} testing.assert_allclose(y_expect, y.data, **options)
def f(x): return functions.average(x, axis=axis)
def test_duplicate_value_negative(self): x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype) with self.assertRaises(ValueError): functions.average(x, axis=(1, -2))
def f(x): return functions.average(x, axis=axis, keepdims=self.keepdims)
def f(x, w): return functions.average( x, axis=axis, weights=w, keepdims=self.keepdims)
def f(x, w): return functions.average(x, axis=axis, weights=w)
def test_weights_and_axis(self): x = numpy.random.uniform(-1, 1, 24).reshape(2, 3, 4).astype(self.dtype) w = numpy.random.uniform(-1, 1, 6).reshape(2, 3).astype(self.dtype) with self.assertRaises(ValueError): functions.average(x, axis=(0, 1), weights=w)