def _compute_laplacian_mmd(self, samples1, samples2, *, sigma=20.0): n = samples1.shape[1] m = samples2.shape[1] k_xx = F.expand_dims(x=samples1, axis=2) - \ F.expand_dims(x=samples1, axis=1) sum_k_xx = F.sum(F.exp( -F.sum(F.absolute(k_xx), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) k_xy = F.expand_dims(x=samples1, axis=2) - \ F.expand_dims(x=samples2, axis=1) sum_k_xy = F.sum(F.exp( -F.sum(F.absolute(k_xy), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) k_yy = F.expand_dims(x=samples2, axis=2) - \ F.expand_dims(x=samples2, axis=1) sum_k_yy = F.sum(F.exp( -F.sum(F.absolute(k_yy), axis=-1, keepdims=True) / (2.0 * sigma)), axis=(1, 2)) mmd_squared = \ sum_k_xx / (n * n) - 2.0 * sum_k_xy / (m * n) + sum_k_yy / (m * m) return F.sqrt(mmd_squared + 1e-6)
def _feature_repl(hs_flatten, pairs, ckeys, lengths): xp = chainer.cuda.get_array_module(hs_flatten) begins, ends = pairs.T begins_ = xp.asarray(begins) ends_ = xp.asarray(ends) ckeys_ = xp.asarray(ckeys) h_b = F.embed_id(begins_, hs_flatten) h_b_pre = F.embed_id(begins_ - 1, hs_flatten, ignore_label=-1) out_of_span = np.insert(lengths[:-1].cumsum(), 0, 0) - 1 is_out_of_span = np.isin(begins - 1, out_of_span) h_b_pre = F.where( xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_b_pre.data), h_b_pre) h_e = F.embed_id(ends_, hs_flatten) h_e_post = F.embed_id(ends_ + 1, hs_flatten, hs_flatten.shape[0]) out_of_span = lengths.cumsum() is_out_of_span = np.isin(ends + 1, out_of_span) h_e_post = F.where( xp.asarray(is_out_of_span)[:, None], xp.zeros_like(h_e_post.data), h_e_post) h_k_pre = F.embed_id(ckeys_ - 1, hs_flatten) h_k_post = F.embed_id(ckeys_ + 1, hs_flatten) repl1 = F.absolute(h_b_pre * (h_b - h_k_post)) repl2 = F.absolute(h_e_post * (h_e - h_k_pre)) return repl1, repl2
def __call__(self, x): x = F.average_pooling_2d(x, 2, 2, 0) depth_smoothness = F.convolution_2d(x, self.diff) depth_smoothness = F.sum(F.absolute(depth_smoothness), axis=1, keepdims=True) edge = F.convolution_2d(x, self.laplacian) loss = F.exp(-F.absolute(edge)) * depth_smoothness return F.mean(loss)
def total_variation2(x): xp = cuda.get_array_module(x.data) wh = xp.asarray([[[[1], [-1]]]], dtype=x.dtype) ww = xp.asarray([[[[1, -1]]]], dtype=x.dtype) dx = F.convolution_2d(x, W=wh) dy = F.convolution_2d(x, W=ww) # dx = x[:, 1:, :, :] - x[:, :-1, :, :] # dy = x[:, :, 1:, :] - x[:, :, :-1, :] return F.average(F.absolute(dx)) + F.average(F.absolute(dy))
def update_core(self): batch = self.get_iterator('main').next() batchsize = len(batch) # Step1 GeneratorExit(" error") z = Variable(xp.asarray(self.generator.make_hidden(batchsize))) / 255. x_gen = self.generator(z) y_gen = self.critic(x_gen) # Step2 real x_real = Variable(xp.array(batch)) / 255. y_real = self.critic(x_real) # Step3 Compute loss for wgan_gp eps = xp.random.uniform(0, 1, (batchsize, 1, 1, 1)).astype("f") x_mid = eps * x_real + (1.0 - eps) * x_gen x_mid_v = Variable(x_mid.data) y_mid = self.critic(x_mid_v) dydx = chainer.grad([y_mid], [x_mid_v], enable_double_backprop=True)[0] dydx = F.sqrt(1e-08+F.sum(F.square(dydx), axis=1)) loss_gp = self.lam * F.mean_squared_error(dydx, xp.ones_like(dydx.data)) loss_cri = F.sum(-y_real) / batchsize loss_cri += F.sum(y_gen) / batchsize # extra step calculate regularization term about the last layer loss_sp = self.lam2 * F.absolute(F.sum(self.critic.inter.W) - 1) loss_all = loss_cri + loss_gp + loss_sp # Step4 Update critic self.critic.cleargrads() loss_all.backward(loss_scale = 0.001) self._optimizers['critic'].update() # Step5 Update generator if self.iteration < 2500 and self.iteration % 100 == 0: loss_gen = F.sum(-y_gen) / batchsize loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1) loss_gen += loss_sp self.generator.cleargrads() loss_gen.backward(loss_scale = 0.001) self._optimizers['generator'].update() chainer.reporter.report({'loss/generator': loss_gen}) if self.iteration > 2500 and self.iteration % self.n_c == 0: loss_gen = F.sum(-y_gen) / batchsize loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1) loss_gen += loss_sp self.generator.cleargrads() loss_gen.backward(loss_scale = 0.001) self._optimizers['generator'].update() chainer.reporter.report({'loss/generator': loss_gen}) # Step6 Report chainer.reporter.report({'loss/critic': loss_cri})
def __call__(self, img_error, dis_error, dis_output, test=False): h = F.reshape(F.absolute(img_error), (img_error.data.shape[0], 3 * 128 * 128)) h = self.l_img(h) g = F.reshape(F.absolute(dis_error), (dis_error.data.shape[0], 512 * 8 * 8)) g = self.l_dis(g) f = F.reshape(dis_output, (dis_output.data.shape[0], 512 * 8 * 8)) f = self.l_fdis(f) ghf = F.sigmoid(self.l_FL(F.concat((h, g, f), axis=1))) return ghf
def update_core(self): vae_optimizer = self.get_optimizer('opt_vae') xp = self.vae.xp batch = self.get_iterator('main').next() batchsize = len(batch) x = chainer.dataset.concat_examples(batch, device=self.device) latent_dist = self.vae.encode(x) # reconstruction loss rec_loss = 0 for _ in range(self.vae.k): reconstructions = self.vae(x, sigmoid=False, mode="sample") rec_loss += F.bernoulli_nll(x, reconstructions) \ / (self.vae.k * batchsize) ### latent loss # latent loss for continuous cont_capacity_loss = 0 if self.vae.is_continuous: mu, ln_var = latent_dist['cont'] kl_cont_loss = gaussian_kl_divergence(mu, ln_var) / batchsize # Anealing loss cont_min, cont_max, cont_num_iters, cont_gamma = \ self.vae.cont_capacity cont_cap_now = (cont_max - cont_min) * self.iteration / float(cont_num_iters) + cont_min cont_cap_now = min(cont_cap_now, cont_max) cont_capacity_loss = cont_gamma * F.absolute(cont_cap_now - kl_cont_loss) # latent loss for discrete disc_capacity_loss = 0 if self.vae.is_discrete: kl_disc_loss = kl_multiple_discrete_loss(latent_dist['disc']) # Anealing loss disc_min, disc_max, disc_num_iters, disc_gamma = \ self.vae.disc_capacity disc_cap_now = (disc_max - disc_min) * self.iteration / float(disc_num_iters) + disc_min disc_cap_now = min(disc_cap_now, disc_max) # Require float conversion here to not end up with numpy float disc_theoretical_max = 0 for disc_dim in self.vae.latent_spec["disc"]: disc_theoretical_max += xp.log(disc_dim) disc_cap_now = min(disc_cap_now, disc_theoretical_max.astype("float32")) disc_capacity_loss = disc_gamma * F.absolute(disc_cap_now - kl_disc_loss) joint_vae_loss = rec_loss + cont_capacity_loss + disc_capacity_loss self.vae.cleargrads() joint_vae_loss.backward() vae_optimizer.update() chainer.reporter.report({"rec_loss": rec_loss, "cont_loss": cont_capacity_loss, "disc_loss": disc_capacity_loss, "vae_loss": joint_vae_loss, }) return
def compute_disp_smooth(self, img, pred_disp): def gradient(input_img): D_dy = input_img[:, :, 1:] - input_img[:, :, :-1] D_dx = input_img[:, :, :, 1:] - input_img[:, :, :, :-1] return D_dx, D_dy i_dx, i_dy = gradient(img) i_dx = F.mean(i_dx, axis=1, keepdims=True) i_dy = F.mean(i_dy, axis=1, keepdims=True) d_dx, d_dy = gradient(pred_disp) return F.mean(F.absolute(d_dx) * F.exp(-F.absolute(i_dx))) \ + F.mean(F.absolute(d_dy) * F.exp(-F.absolute(i_dy)))
def loss_func_tv_l1(x_out): xp = cuda.get_array_module(x_out.data) b, ch, h, w = x_out.data.shape Wx = xp.zeros((ch, ch, 2, 2), dtype="f") Wy = xp.zeros((ch, ch, 2, 2), dtype="f") for i in range(ch): Wx[i, i, 0, 0] = -1 Wx[i, i, 0, 1] = 1 Wy[i, i, 0, 0] = -1 Wy[i, i, 1, 0] = 1 return F.sum(F.absolute(F.convolution_2d(x_out, W=Wx))) + F.sum( F.absolute(F.convolution_2d(x_out, W=Wy)))
def _loss(self, fake_batch_obs, fake_batch_action, true_batch_obs, true_batch_action): if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(fake_batch_obs, update=False) infer_fake = self.model(normalized_obs, fake_batch_action) else: infer_fake = self.model(fake_batch_obs, fake_batch_action) if self.noisy_label: n = fake_batch_obs.shape[0] fake_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_fake)) + self.discriminator_value_offset)) else: fake_loss = -F.average(F.log(1 - F.sigmoid(infer_fake) + self.discriminator_value_offset)) if self.obs_normalizer is not None: normalized_obs = self.obs_normalizer(true_batch_obs, update=True) infer_true = self.model(normalized_obs, true_batch_action) else: infer_true = self.model(true_batch_obs, true_batch_action) if self.noisy_label: n = true_batch_obs.shape[0] true_loss = -F.average( F.log(F.absolute(1 - (self.xp.random.rand(n) * self.noisy_label_range) - F.sigmoid(infer_true)) + self.discriminator_value_offset)) else: true_loss = -F.average(F.log(F.sigmoid(infer_true) + self.discriminator_value_offset)) entropy = (self._get_entropy(infer_fake) / 2 + self._get_entropy(infer_true) / 2) loss = (fake_loss + true_loss - entropy * self.entropy_coef) # Update stats self.accuracy_gen = np.average( chainer.cuda.to_cpu(infer_fake.array) < 0) self.accuracy_exp = np.average( chainer.cuda.to_cpu(infer_true.array) > 0) self.average_entropy *= self.entropy_decay self.average_entropy += (1.0 - self.entropy_decay) * chainer.cuda.to_cpu(entropy.array) # noqa self.average_loss *= self.loss_decay self.average_loss += (1.0 - self.loss_decay) * \ chainer.cuda.to_cpu(loss.array) return loss
def get_disparity_smoothness(self, disp, img): disp_gradients_x = self.gradient_x(disp) disp_gradients_y = self.gradient_y(disp) img_gradients_x = self.gradient_x(img) img_gradients_y = self.gradient_y(img) weight_x = F.exp(-F.mean(F.absolute(disp_gradients_x), axis=1, keep_dims=True)) weight_y = F.exp(-F.mean(F.absolute(disp_gradients_y), axis=1, keep_dims=True)) smoothness_x = disp_gradients_x * weight_x smoothness_y = disp_gradients_y * weight_y return smoothness_x + smoothness_y
def update_core(self): xp = cuda.cupy batch = self.get_iterator('main').next() batchsize = len(batch) # Step1 Generate z = Variable(xp.asarray(self.generator.make_hidden(batchsize))) x_gen = self.generator(z) y_gen = self.critic(x_gen) # Step2 real x_real = Variable(xp.array(batch)) / 255. y_real = self.critic(x_real) # Step3 Compute loss for DCGAN loss_cri = F.sum(F.softplus(-y_real)) / batchsize loss_cri += F.sum(F.softplus(y_gen)) / batchsize loss_sp = self.lam2 * F.absolute(F.sum(self.critic.inter.W) - 1) loss_all = loss_cri + loss_sp # Step4 Update critic self.critic.cleargrads() loss_all.backward() self._optimizers['critic'].update() # Step5 Update generator if self.iteration < 2500 and self.iteration % 100 == 0: loss_gen = F.sum(F.softplus(-y_gen)) / batchsize loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1) loss_gen += loss_sp self.generator.cleargrads() loss_gen.backward() self._optimizers['generator'].update() chainer.reporter.report({'loss/generator': loss_gen}) if self.iteration > 2500 and self.iteration % self.n_c == 0: loss_gen = F.sum(F.softplus(-y_gen)) / batchsize loss_sp = self.lam2 * F.absolute(F.sum(self.generator.inter.W) - 1) loss_gen += loss_sp self.generator.cleargrads() loss_gen.backward() self._optimizers['generator'].update() chainer.reporter.report({'loss/generator': loss_gen}) # Step6 Report chainer.reporter.report({'loss/critic': loss_cri})
def occupancy_grid_1d(points, *, pitch, origin, dimension): assert points.shape == (points.shape[0], ) d_IJ = OccupancyGrid1D(pitch=pitch, origin=origin, dimension=dimension)(points) m_IJ = F.relu(1 - F.absolute(d_IJ)) m = F.max(m_IJ, axis=0) return m
def __call__(self, x1, x2, train=True): if train: batchsize = x1.shape[0] xp = cupy alpha = chainer.Variable(xp.random.rand(batchsize, dtype=xp.float32)) alpha = 0.5 - F.absolute(0.5 - alpha) alpha = alpha.reshape(batchsize, 1, 1 ,1) h1 = F.relu(self.conv1(x1)) h2 = F.relu(self.conv1(x2)) h1 = F.relu(self.conv2(h1)) h2 = F.relu(self.conv2(h2)) h1 = F.relu(self.conv3(h1)) h2 = F.relu(self.conv3(h2)) h1 = self.conv_z(h1) h2 = self.conv_z(h2) c = alpha*h1+(1.0-alpha)*h2 y1 = self.z_deconv(h1) y2 = self.z_deconv(h2) yc = self.z_deconv(c) y1 = F.relu(self.deconv1(y1)) y2 = F.relu(self.deconv1(y2)) yc = F.relu(self.deconv1(yc)) y1 = F.relu(self.deconv2(y1)) y2 = F.relu(self.deconv2(y2)) yc = F.relu(self.deconv2(yc)) y1 = self.deconv3(y1) y2 = self.deconv3(y2) yc = self.deconv3(yc) return F.sigmoid(y1), F.sigmoid(y2), F.sigmoid(yc), alpha, h1, h2 else: y = F.relu(self.z_deconv(x1)) y = F.relu(self.deconv1(y)) y = F.relu(self.deconv2(y)) y = self.deconv3(y) return F.sigmoid(y)
def loss_comp_low(x, y, threshold, norm='l1'): if norm == 'l1': return (F.sum(((x.array < threshold) ^ (y.array < threshold)) * F.absolute(x - y))) else: return (F.sum( ((x.array < threshold) ^ (y.array < threshold)) * ((x - y)**2)))
def test_backward_silhouette(): """Backward if non-zero gradient is out of a face.""" grad_ref = [ [1.6725862, -0.26021874, 0.], [1.41986704, -1.64284933, 0.], [0., 0., 0.], ] vertices = [[0.8, 0.8, 1.], [0.0, -0.5, 1.], [0.2, -0.4, 1.]] faces = [[0, 1, 2]] vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') grad_ref = cp.array(grad_ref, 'float32') vertices, faces, grad_ref = utils.to_minibatch((vertices, faces, grad_ref)) pxi = 35 pyi = 25 renderer = Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.fill_back = False renderer.perspective = False print(vertices.shape) print(faces.shape) vertices = chainer.Variable(vertices) images = renderer.render_silhouettes(vertices, faces) loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1)) loss.backward() chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def test_backward_silhouette_ch_2(): """Backward if non-zero gradient is on a face.""" vertices = np.array([[0.8, 0.8, 1.], [-0.5, -0.8, 1.], [0.8, -0.8, 1.]]) faces = np.array([[0, 1, 2]]) pyi = 40 pxi = 50 grad_ref = np.array([ [0.98646867, 1.04628897, 0.], [-1.03415668, -0.10403691, 0.], [3.00094461, -1.55173182, 0.], ]) renderer = Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False # Prepare chainer inputs vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') grad_ref = cp.array(grad_ref, 'float32') vertices, faces, grad_ref = utils.to_minibatch((vertices, faces, grad_ref)) vertices = chainer.Variable(vertices) images = renderer.render_silhouettes(vertices, faces) loss = cf.sum(cf.absolute(images[:, pyi, pxi])) loss.backward() chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def predict(self, xs, softmax=False, argmax=False, get_embed=False, no_dropout=False): xs0, xs1 = xs # premise, hypothesis if get_embed: ys0, exs0 = self.encoder(xs0, get_embed=True) ys1, exs1 = self.encoder(xs1, get_embed=True) else: ys0 = self.encoder(xs0, get_embed=False) ys1 = self.encoder(xs1, get_embed=False) ys0 = [F.max(y, axis=0) for y in ys0] ys1 = [F.max(y, axis=0) for y in ys1] ratio = 0.0 if no_dropout else self.dropout ys0 = F.dropout(F.stack(ys0, axis=0), ratio=ratio) ys1 = F.dropout(F.stack(ys1, axis=0), ratio=ratio) ys = F.concat([ys0, ys1, F.absolute(ys0 - ys1), ys0 * ys1], axis=1) ys = self.output(ys, no_dropout) if softmax: ys = F.softmax(ys).data elif argmax: ys = self.xp.argmax(ys.data, axis=1) if get_embed: return ys, exs0, exs1 return ys
def test_backward_case1(): """Backward if non-zero gradient is out of a face.""" vertices = [[0.8, 0.8, 1.], [0.0, -0.5, 1.], [0.2, -0.4, 1.]] faces = [[0, 1, 2]] pxi = 35 pyi = 25 grad_ref = [ [1.6725862, -0.26021874, 0.], [1.41986704, -1.64284933, 0.], [0., 0., 0.], ] renderer = Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False renderer.light_intensity_ambient = 1.0 renderer.light_intensity_directional = 0.0 vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32') grad_ref = cp.array(grad_ref, 'float32') vertices, faces, textures, grad_ref = utils.to_minibatch( (vertices, faces, textures, grad_ref)) vertices = chainer.Variable(vertices) images = renderer.render(vertices, faces, textures) images = cf.mean(images, axis=1) loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1)) loss.backward() chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def update_core(self): Enc_optimizer = self.get_optimizer('Enc') Dec_optimizer = self.get_optimizer('Dec') Critic_optimizer = self.get_optimizer('Critic') batch1 = self.get_iterator('main').next() batch2 = random.sample(batch1, len(batch1)) x1 = Variable(self.converter(batch1, self.device)) x2 = Variable(self.converter(batch2, self.device)) xp = chainer.backend.get_array_module(x1.data) batchsize = len(batch1) alpha = chainer.Variable(xp.random.rand(batchsize, dtype=xp.float32)) alpha = 0.5 - F.absolute(0.5 - alpha) if self.net == 'conv': alpha = alpha.reshape(batchsize, 1, 1 ,1) else: alpha = alpha.reshape(batchsize, 1) z1 = self.Enc(x1) z2 = self.Enc(x2) zc = alpha*z1+(1.0-alpha)*z2 yc = self.Dec(zc) y1 = self.Dec(z1) y2 = self.Dec(z2) cdis_c = self.Critic(yc) cdis_y1 = self.Critic(self.gam*x1+(1-self.gam)*y1) cdis_y2 = self.Critic(self.gam*x2+(1-self.gam)*y2) Critic_optimizer.update(self.loss_Critic, cdis_c, alpha, cdis_y1, cdis_y2) Enc_optimizer.update(self.loss_Enc, x1, x2, y1, y2, cdis_c) Dec_optimizer.update(self.loss_Dec, x1, x2, y1, y2, cdis_c)
def __call__(self, x): batch_size = x.shape[0] # predict 337 vertices [bs, 337, 3] h = cf.relu(self.linear1(x)) h = cf.relu(self.linear2(h)) vertices = self.linear_bias(h) * self.scaling vertices = vertices.reshape((batch_size, -1, 3)) # add base sphere and normalize base = self.vertices_base * self.obj_scale base = self.xp.broadcast_to(base[None, :, :], vertices.shape) vertices = vertices + base vertices = self.object_size * cf.tanh(vertices) * 0.99 # z <- abs(z) xy = vertices[:, :, :2] z = cf.absolute(vertices[:, :, 2:3]) vertices = cf.concat((xy, z), axis=2) # assign to 642 vertices # bias: [bs, 337, 3] # vertices_matrix: [642 * 3, 337 * 3] vertices = cf.reshape(vertices, (batch_size, -1)) vertices_matrix = self.xp.tile(self.vertices_matrix[None, :, :], (batch_size, 1, 1)) vertices = cf.matmul(vertices_matrix, vertices[:, :, None]) vertices = cf.reshape(vertices, (batch_size, -1, 3)) return vertices, self.faces
def _compute_ddqn_losses(self, exp_batch, errors_out=None): """Compute the Q-learning losses for a batch of experiences Args: exp_batch (dict): A dict of batched arrays of transitions Returns: Computed loss from the minibatch of experiences """ y, t = self._compute_y_and_ts(exp_batch) del errors_out[:] delta = F.absolute(y - t) if delta.ndim == 2: delta = F.sum(delta, axis=1) delta = cuda.to_cpu(delta.array) for e in delta: errors_out.append(e) is_1_step = self.xp.abs(1. - exp_batch["is_n_step"]) loss_1step = compute_weighted_value_loss( y, t, exp_batch['weights'], mask=is_1_step, clip_delta=self.clip_delta, batch_accumulator=self.batch_accumulator) loss_nstep = compute_weighted_value_loss( y, t, exp_batch['weights'], mask=exp_batch["is_n_step"], clip_delta=self.clip_delta, batch_accumulator=self.batch_accumulator) return loss_nstep, loss_1step
def __call__(self, x): h = cf.relu(self.linear1_bn(self.linear1(x))) h = cf.relu(self.linear2_bn(self.linear2(h))) bias = cf.reshape(self.linear_bias(h), (-1, self.num_vertices, 3)) bias *= self.scaling base = self.vertices_base base = cf.broadcast_to(base[None, :, :], bias.shape) vertices = base + bias if self.symmetric: xy = vertices[:, :, :2] # [bs, nv, 2] z = cf.absolute(vertices[:, :, 2:3]) # [bs, nv, 1] vertices = cf.concat((xy, z), axis=2) vertices = cf.transpose( cf.tensordot(vertices, self.symmetric_matrix, axes=(1, 0)), (0, 2, 1)) xy = vertices[:, :, :2] # [bs, nv, 2] z = vertices[:, :, 2:3] # [bs, nv, 1] z = z * self.z_sign[None, :, None] vertices = cf.concat((xy, z), axis=2) vertices = cf.tanh(vertices) * self.tanh_scale return vertices, self.faces
def test_backward_case2(self): vertices = [[0.8, 0.8, 1.], [-0.5, -0.8, 1.], [0.8, -0.8, 1.]] faces = [[0, 1, 2]] pyi = 40 pxi = 50 renderer = neural_renderer.Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False vertices = chainer.Variable(cp.array(vertices, 'float32')) faces = cp.array(faces, 'int32') images = renderer.render_silhouettes(vertices[None, :, :], faces[None, :, :]) loss = cf.sum(cf.absolute(images[:, pyi, pxi])) loss.backward() for i in range(3): for j in range(2): axis = 'x' if j == 0 else 'y' vertices2 = cp.copy(vertices.data) vertices2[i, j] -= 1. / vertices.grad[i, j] images = renderer.render_silhouettes(vertices2[None, :, :], faces[None, :, :]) image = np.tile(images[0].data.get()[:, :, None], (1, 1, 3)) image[pyi, pxi] = [1, 0, 0] ref = scipy.misc.imread( './tests/data/rasterize_silhouettes_case2_v%d_%s.png' % (i, axis)) ref = ref.astype('float32') / 255 chainer.testing.assert_allclose(ref, image)
def get_onehot_grad(self, xs, ys=None): if ys is None: with chainer.using_config('train', False): ys = self.predict(xs, argmax=True) u, exs_prem = self.encoder.get_grad(xs[0]) v, exs_hypo = self.encoder.get_grad(xs[1]) encodings = F.concat((u, v, F.absolute(u - v), u * v), axis=1) outputs = self.output(self.mlp(encodings, no_dropout=True)) loss = F.softmax_cross_entropy(outputs, ys) exs = exs_hypo lengths = [len(x) for x in xs[1]] if isinstance(exs, tuple): exs_grad = chainer.grad([loss], exs) ex_sections = np.cumsum([ex.shape[0] for ex in exs[:-1]]) exs = F.concat(exs, axis=0) exs_grad = F.concat(exs_grad, axis=0) onehot_grad = F.sum(exs_grad * exs, axis=1) onehot_grad = F.split_axis(onehot_grad, ex_sections, axis=0) else: exs_grad = chainer.grad([loss], [exs])[0] # (batch_size, n_dim, max_length, 1) assert exs_grad.shape == exs.shape onehot_grad = F.squeeze(F.sum(exs_grad * exs, 1), 2) onehot_grad = [x[:l] for x, l in zip(onehot_grad, lengths)] return onehot_grad
def __call__(self, x): """ Calucurate Minibatch Discrimination using broardcast. Parameters --------------- x: Variable input vector shape is (N, num_units) """ batch_size = x.shape[0] xp = x.xp x = F.reshape(x, (batch_size, -1)) activation = F.reshape(self.t(x), (-1, self.b, self.c)) m = F.reshape(activation, (-1, self.b, self.c)) m = F.expand_dims(m, 3) m_T = F.transpose(m, (3, 1, 2, 0)) m, m_T = F.broadcast(m, m_T) l1_norm = F.sum(F.absolute(m-m_T), axis=2) # eraser to erase l1 norm with themselves eraser = F.expand_dims(xp.eye(batch_size, dtype="f"), 1) eraser = F.broadcast_to(eraser, (batch_size, self.b, batch_size)) o_X = F.sum(F.exp(-(l1_norm + 1e6 * eraser)), axis=2) # concatunate along channels or units return F.concat((x, o_X), axis=1)
def predict(self, xs, softmax=False, argmax=False, dknn=False, no_dropout=False): dknn_layers = [] u = self.encoder(xs[0], dknn=False, no_dropout=no_dropout) v = self.encoder(xs[1], dknn=False, no_dropout=no_dropout) # concatenate results as done in infersent encodings = F.concat((u, v, F.absolute(u - v), u * v), axis=1) dknn_layers = [encodings] if dknn: outputs, _dknn_layers = self.mlp(encodings, dknn=True, no_dropout=no_dropout) dknn_layers = dknn_layers + _dknn_layers else: outputs = self.mlp(encodings, dknn=False, no_dropout=no_dropout) outputs = self.output(outputs) if softmax: outputs = F.softmax(outputs).data elif argmax: outputs = self.xp.argmax(outputs.data, axis=1) if dknn: return outputs, dknn_layers else: return outputs
def nlogn_loss(prediction, label): residual = prediction * 255 - label * 255 diff_abs = F.absolute(residual) + 1 loss = F.mean(diff_abs * F.log2(diff_abs) / 256) return loss
def _compute_loss(self, exp_batch, errors_out=None): """Compute the Q-learning loss for a batch of experiences Args: exp_batch (dict): A dict of batched arrays of transitions Returns: Computed loss from the minibatch of experiences """ y, t = self._compute_y_and_t(exp_batch) if errors_out is not None: del errors_out[:] delta = F.absolute(y - t) if delta.ndim == 2: delta = F.sum(delta, axis=1) delta = cuda.to_cpu(delta.array) for e in delta: errors_out.append(e) if 'weights' in exp_batch: return compute_weighted_value_loss( y, t, exp_batch['weights'], clip_delta=self.clip_delta, batch_accumulator=self.batch_accumulator) else: return compute_value_loss(y, t, clip_delta=self.clip_delta, batch_accumulator=self.batch_accumulator)
def _smooth_l1_loss(x, t, in_weight, sigma): sigma2 = sigma**2 diff = in_weight * (x - t) abs_diff = F.absolute(diff) flag = (abs_diff.data < (1. / sigma2)).astype(np.float32) y = (flag * (sigma2 / 2.) * F.square(diff) + (1 - flag) * (abs_diff - 0.5 / sigma2)) return F.sum(y)
def _smooth_l1_loss(x, t, in_weight, sigma): sigma2 = sigma ** 2 diff = in_weight * (x - t) abs_diff = F.absolute(diff) flag = (abs_diff.array < (1. / sigma2)).astype(np.float32) y = (flag * (sigma2 / 2.) * F.square(diff) + (1 - flag) * (abs_diff - 0.5 / sigma2)) return F.sum(y)
def get_normalized_vector(d, xp=None): shape = tuple(range(1, len(d.shape))) if xp is not None: d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True)) d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True)) else: d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True) d /= F.broadcast_to(d_term, d.shape) d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True)) d /= F.broadcast_to(d_term, d.shape) return d
def read(address): #map from the reals to the hypercube of dimesion n index = F.tanh(address) #map from a point to the nearest corner of the hypercube f = lambda x: x > 0 mainIndex = np.vectorize(f,index.data,cache=True) mainValue = F.select_item(array,lookup(mainIndex)) scaleFactor =F.exp(F.sum(F.log(F.absolute(x)))) return mainValue * scaleFactor
def test_backward_case2(self): """Backward if non-zero gradient is on a face.""" vertices = [ [0.8, 0.8, 1.], [-0.5, -0.8, 1.], [0.8, -0.8, 1.]] faces = [[0, 1, 2]] pyi = 40 pxi = 50 grad_ref = [ [0.98646867, 1.04628897, 0.], [-1.03415668, - 0.10403691, 0.], [3.00094461, - 1.55173182, 0.], ] renderer = neural_renderer.Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False renderer.light_intensity_ambient = 1.0 renderer.light_intensity_directional = 0.0 vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32') grad_ref = cp.array(grad_ref, 'float32') vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref)) vertices = chainer.Variable(vertices) images = renderer.render(vertices, faces, textures) images = cf.mean(images, axis=1) loss = cf.sum(cf.absolute(images[:, pyi, pxi])) loss.backward() grad_ref = cp.array(grad_ref, 'float32') chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def test_backward_case1(self): """Backward if non-zero gradient is out of a face.""" vertices = [ [0.8, 0.8, 1.], [0.0, -0.5, 1.], [0.2, -0.4, 1.]] faces = [[0, 1, 2]] pxi = 35 pyi = 25 grad_ref = [ [1.6725862, -0.26021874, 0.], [1.41986704, -1.64284933, 0.], [0., 0., 0.], ] renderer = neural_renderer.Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False renderer.light_intensity_ambient = 1.0 renderer.light_intensity_directional = 0.0 vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') textures = cp.ones((faces.shape[0], 4, 4, 4, 3), 'float32') grad_ref = cp.array(grad_ref, 'float32') vertices, faces, textures, grad_ref = utils.to_minibatch((vertices, faces, textures, grad_ref)) vertices = chainer.Variable(vertices) images = renderer.render(vertices, faces, textures) images = cf.mean(images, axis=1) loss = cf.sum(cf.absolute(images[:, pyi, pxi] - 1)) loss.backward() chainer.testing.assert_allclose(vertices.grad, grad_ref, rtol=1e-2)
def main(): parser = argparse.ArgumentParser(description='GradNorm') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--n-iter', '-it', type=int, default=5000) parser.add_argument('--mode', '-m', choices=('grad_norm', 'equal_weight'), default='grad_norm') args = parser.parse_args() np.random.seed(123) sigmas = [1, 10] n_task = len(sigmas) epsilons = np.random.normal( scale=3.5, size=(n_task, 100, 250)).astype(np.float32) dataset = RegressionDataset(sigmas, epsilons) model = RegressionTrainChain(RegressionChain(n_task)) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.Adam(alpha=1e-2) optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(dataset, 200) xp = model.xp weights = [] task_losses = [] loss_ratios = [] final_layer_names = ['task_{}'.format(i) for i in range(n_task)] for t in range(args.n_iter): batch = train_iter.next() x, ts = chainer.dataset.convert.concat_examples(batch, device=args.gpu) task_loss = model(x, ts) weighted_task_loss = model.weight * task_loss if t == 0: initial_task_loss = task_loss.data loss = F.mean(weighted_task_loss) model.cleargrads() loss.backward() # Ignore a gradient to the coefficient vector, which # is computed from the standard loss. model.weight.cleargrad() if args.mode == 'grad_norm': # Use |\nabla_W w_i * L_i | = w_i |\nabla_W L_i| gygw_norms = [] for i, layer_name in enumerate(final_layer_names): l = getattr(model.model, layer_name) gygw = chainer.grad([task_loss[i]], [l.W])[0].data gygw_norms.append(xp.linalg.norm(gygw)) gygw_norms = xp.stack(gygw_norms) norms = model.weight * gygw_norms alpha = 0.16 mean_norm = xp.mean(norms.data) loss_ratio = task_loss.data / initial_task_loss inverse_train_rate = loss_ratio / xp.mean(loss_ratio) diff = norms - (inverse_train_rate ** alpha) * mean_norm grad_norm_loss = F.mean(F.absolute(diff)) grad_norm_loss.backward() # For debugging purpose only # from chainer import computational_graph # import os # cg = computational_graph.build_computational_graph( # [grad_norm_loss]).dump() # with open('grad_weight_loss_cg', 'w') as f: # f.write(cg) optimizer.update() # Renormalize normalize_coeff = n_task / xp.sum(model.weight.data) model.weight.data[:] = model.weight.data * normalize_coeff # Record task_losses.append(chainer.backends.cuda.to_cpu(task_loss.data)) loss_ratios.append(np.mean(task_losses[-1] / task_losses[0])) weights.append(chainer.backends.cuda.to_cpu(model.weight.data)) if t % 100 == 0: print('{}/{}: loss_ratio={}, weights={} task_loss={}'.format( t, args.n_iter, loss_ratios[-1], model.weight.data, task_loss.data)) task_losses = np.array(task_losses) weights = np.array(weights) fig = plt.figure() ax1 = fig.add_subplot(1, 4, 1) ax1.set_title('loss (task 0)') ax2 = fig.add_subplot(1, 4, 2) ax2.set_title('loss (task 1)') ax3 = fig.add_subplot(1, 4, 3) ax3.set_title('sum of normalized losses') ax4 = fig.add_subplot(1, 4, 4) ax4.set_title('change of weights over time') ax1.plot(task_losses[:, 0]) ax2.plot(task_losses[:, 1]) ax3.plot(loss_ratios) ax4.plot(weights[:, 0]) ax4.plot(weights[:, 1]) plt.show()