def check_gaussian_kl_divergence(self, mean, ln_var): if self.wrap_m: mean = chainer.Variable(mean) if self.wrap_v: ln_var = chainer.Variable(ln_var) actual = cuda.to_cpu( F.gaussian_kl_divergence(mean, ln_var, self.reduce).data) actual = cuda.to_cpu( F.gaussian_kl_divergence(mean, ln_var, self.reduce).data) testing.assert_allclose(self.expect, actual)
def __call__(self, x, test=False, k=4): batch_size = x.data.shape[0] w = x.data.shape[2] tr, tg, tb = chainer.functions.split_axis(x, 3, 1) tr = F.reshape(tr, (batch_size * w * w, )) tg = F.reshape(tg, (batch_size * w * w, )) tb = F.reshape(tb, (batch_size * w * w, )) x = chainer.Variable(x.data.astype('f')) z_mu, z_var = self.enc(x, test) loss_kl = F.gaussian_kl_divergence(z_mu, z_var) / batch_size / self.k loss_decode = 0 for _ in range(k): z = F.gaussian(z_mu, z_var) r, g, b = self.dec(z, test) r = F.transpose(r, (0, 2, 3, 1)) r = F.reshape(r, (batch_size * w * w, 256)) g = F.transpose(g, (0, 2, 3, 1)) g = F.reshape(g, (batch_size * w * w, 256)) b = F.transpose(b, (0, 2, 3, 1)) b = F.reshape(b, (batch_size * w * w, 256)) loss_decode += F.softmax_cross_entropy(r, tr) / k loss_decode += F.softmax_cross_entropy(g, tg) / k loss_decode += F.softmax_cross_entropy(b, tb) / k chainer.report({'loss_kl': loss_kl, 'loss_decode': loss_decode}, self) beta = 0.2 return beta * loss_kl + (1 - beta) * loss_decode
def pretrain_step_vrae(self, x_input): """ Maximum likelihood Estimation :param x_input: :return: loss """ batch_size = len(x_input) _, mu_z, ln_var_z = self.encoder.encode(x_input) z = F.gaussian(mu_z, ln_var_z) self.reset_state() accum_loss = 0 self.lstm1.h = z for i in range(self.sequence_length): if i == 0: x = chainer.Variable( self.xp.asanyarray([self.start_token] * batch_size, 'int32')) else: x = chainer.Variable( self.xp.asanyarray(x_input[:, i - 1], 'int32')) scores = self.decode_one_step(x) loss = F.softmax_cross_entropy( scores, chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32'))) accum_loss += loss dec_loss = accum_loss / self.sequence_length kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size return dec_loss, kl_loss
def sample_g0(self, zs): mu = self.l_g0_mu(zs) ln_var = self.l_g0_ln_var(zs) g_0 = F.gaussian(mu, ln_var) batchsize = len(mu.data) kl_g0 = gaussian_kl_divergence(mu, ln_var) / batchsize return g_0, kl_g0
def update_core(self): batch = self._iterators['main'].next() x = Variable(self.converter(batch, self.device)) xp = cuda.get_array_module(x.data) enc = self.enc opt_enc = self._optimizers['enc'] dec = self.dec opt_dec = self._optimizers['dec'] mu, ln_var = enc(x) batchsize = len(mu.data) rec_loss = 0 k = 10 for l in range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, dec( z, sigmoid=False)) / (k * batchsize) loss = rec_loss + 1.0 * F.gaussian_kl_divergence(mu, ln_var) / batchsize enc.cleargrads() dec.cleargrads() loss.backward() opt_enc.update() opt_dec.update() chainer.report({'rec_loss': rec_loss}) chainer.report({'loss': loss})
def _forward(self, batch, test=False): # TrainingSetのEncodeとDecode encoded, means, ln_vars = self._encode(batch, test=test) rec = self._decode(encoded, test=test) normer = reduce(lambda x, y: x*y, means.data.shape) # データ数 kl_loss = F.gaussian_kl_divergence(means, ln_vars)/normer #print 'means={}'.format(means.data.shape) #print 'ln_vars={}'.format(ln_vars.data.shape) #print 'kl_loss={}, normer={}'.format(kl_loss.data, normer) # zのサンプル samp_p = np.random.standard_normal(means.data.shape).astype('float32') z_p = chainer.Variable(samp_p) if self.flag_gpu: z_p.to_gpu() rec_p = self._decode(z_p) disc_rec, conv_layer_rec = self.disc(rec, test=test, dropout_ratio=self.dropout_ratio) disc_batch, conv_layer_batch = self.disc(batch, test=test, dropout_ratio=self.dropout_ratio) disc_x_p, conv_layer_x_p = self.disc(rec_p, test=test, dropout_ratio=self.dropout_ratio) dif_l = F.mean_squared_error(conv_layer_rec, conv_layer_batch) return kl_loss, dif_l, disc_rec, disc_batch, disc_x_p
def reconst(self, data, unregular=False): if data.ndim == 1: data = data.reshape(1,-1) with chainer.using_config('train', False), chainer.using_config('enable_backprop', False): e_mu,e_var = self.encode(Variable(data)) feat = F.gaussian(e_mu, e_var).data d_out = self.decode(Variable(feat)) if self.is_gauss_dist: rec = d_out[0].data if unregular: # 非正則化項のやつ d_mu, d_var = d_out D_VAE = F.gaussian_kl_divergence(e_mu, e_var) A_VAE = 0.5* ( np.log(2*np.pi) + d_var ) M_VAE = 0.5* ( data-d_mu )**2 * F.exp(-d_var) return feat, rec, M_VAE.data else: rec = F.sigmoid(d_out).data mse = np.mean( (rec-data)**2, axis=1 ) # lat_loss = F.gaussian_kl_divergence(e_mu, e_var) # rec_loss = F.bernoulli_nll( Variable(data), d_out ) # vae_err = (lat_loss+rec_loss).data return feat, rec, mse
def lf(frames): mu, ln_var = self.encode(frames) z = F.gaussian(mu, ln_var) frames_flat = F.reshape( frames, (-1, frames.shape[1] * frames.shape[2] * frames.shape[3])) variational_flat = F.reshape( self.decode(z), (-1, frames.shape[1] * frames.shape[2] * frames.shape[3])) rec_loss = F.sum(F.square(frames_flat - variational_flat), axis=1) # l2 reconstruction loss rec_loss = F.mean(rec_loss) kl_loss = F.sum(F.gaussian_kl_divergence(mu, ln_var, reduce="no"), axis=1) if self._cpu: kl_tolerance = np.asarray(self.kl_tolerance * self.n_latent).astype(np.float32) else: kl_tolerance = cp.asarray(self.kl_tolerance * self.n_latent).astype(cp.float32) kl_loss = F.maximum(kl_loss, F.broadcast_to(kl_tolerance, kl_loss.shape)) kl_loss = F.mean(kl_loss) loss = rec_loss + kl_loss chainer.report({'loss': loss}, observer=self) chainer.report({'kl_loss': kl_loss}, observer=self) chainer.report({'rec_loss': rec_loss}, observer=self) return loss
def __call__(self, xs, ys): eos = self.xp.array([EOS], 'i') xs = [self.denoiseInput(x[::-1], self.denoising_rate) for x in xs] # denoising #ys_d = [self.wordDropout(y, self.word_dropout) for y in ys] # word dropout ys_d = [self.denoiseInput(y, self.word_dropout) for y in ys] # word dropout ys_in = [F.concat([eos, y], axis=0) for y in ys_d] ys_out = [F.concat([y, eos], axis=0) for y in ys] # Both xs and ys_in are lists of arrays. exs = sequence_embed(self.embed_x, xs) eys = sequence_embed(self.embed_y, ys_in) batch = len(xs) # None represents a zero vector in an encoder. hx, at = self.encoder(None, exs) # layer x batch x n_units hx_t = F.transpose(hx, (1, 0, 2)) # batch x layer x n_units mu = self.W_mu(hx_t) # batch x n_latent ln_var = self.W_ln_var(hx_t) #print('{},{}'.format(mu.shape,ln_var.shape)) #print(hx_t.shape) rec_loss = 0 concat_ys_out = F.concat(ys_out, axis=0) for _ in range(self.k): z = F.gaussian(mu, ln_var) z_e = F.expand_dims(z, 2) # batch x n_latent x 1 Wz = self.W_h(z_e) # batch x (layer x unit) #print('Wz: {}, {}'.format(Wz.shape, type(Wz))) hys = F.split_axis(Wz, self.n_layers, 1) # layer x batch x unit #print('hys, {}'.format([x.shape for x in hys])) c_hy = F.concat([F.expand_dims(hy, 0) for hy in hys], 0) # layer x batch x unit #print('c_hy: {}'.format(c_hy.shape)) _, os = self.decoder(c_hy, eys) #print(len(os)) concat_os = F.concat(os, axis=0) rec_loss += F.sum( F.softmax_cross_entropy(self.W(concat_os), concat_ys_out, reduce='no')) / (self.k * batch) latent_loss = F.gaussian_kl_divergence(mu, ln_var) / batch loss = rec_loss + self.C * latent_loss # wy = self.W(concat_os) # ys = self.xp.argmax(wy.data, axis=1).astype('i') # print(ys) chainer.report({'loss': loss.data}, self) chainer.report({'rec': rec_loss.data}, self) chainer.report({'lat': latent_loss.data}, self) n_words = concat_ys_out.shape[0] perp = self.xp.exp(loss.data * batch / n_words) chainer.report({'perp': perp}, self) return loss
def free_energy(self, x): #return -(free energy) enc_mu, enc_log_sigma_2 = self.encode(x) kl = F.gaussian_kl_divergence(enc_mu, enc_log_sigma_2) z = F.gaussian(enc_mu, enc_log_sigma_2) dec_mu = self.decode(z) nll = F.bernoulli_nll(x, dec_mu) return nll + kl
def free_energy(self,x): #return -(free energy) enc_mu, enc_log_sigma_2 = self.encode(x) kl = F.gaussian_kl_divergence(enc_mu,enc_log_sigma_2) z = F.gaussian(enc_mu,enc_log_sigma_2) dec_mu = self.decode(z) nll = F.bernoulli_nll(x,dec_mu) return nll+kl
def compute_loss(self, x1, x2, t): ''' Compute both encoders losses and decoder loss Use KL divergence for encoder and Bernoulli loss for decoder Input: two images and target (composition of both images) Output: decoder loss, encoder1 loss, encoder2 loss ''' mu1, ln_std1, mu2, ln_std2 = self.encode(x1, x2) kl1 = F.gaussian_kl_divergence(mu1, ln_std1) kl2 = F.gaussian_kl_divergence(mu2, ln_std2) sample1 = F.gaussian(mu1, ln_std1) sample2 = F.gaussian(mu2, ln_std2) sample = F.concat((sample1, sample2)) output = self.decode(sample) nll = F.bernoulli_nll(F.reshape(t, (t.shape[0], 1, 32, 32)), output) return nll / (t.shape[0] * 32 * 32), kl1 / (x1.shape[0] * 32), kl2 / (x2.shape[0] * 32)
def __call__(self, x): xp = self.encoder.xp x = Variable(xp.asarray(x)) zm, zv = self.encoder((x,)) z = F.gaussian(zm, zv) mean, ln_var = self.decoder((z,)) kl_loss = F.gaussian_kl_divergence(zm, zv) nll_loss = F.gaussian_nll(x, mean, ln_var) loss = kl_loss + nll_loss return loss
def forward(self, batch, test=False): out, means, ln_vars = self.encode(batch, test=test) out = self.decode(out, test=test) normer = reduce(lambda x, y: x * y, means.data.shape) kl_loss = F.gaussian_kl_divergence(means, ln_vars) / normer rec_loss = F.mean_squared_error(batch, out) return out, kl_loss, rec_loss
def encode(self, bow): """ Convert the bag of words vector of shape (n_docs, n_vocab) into latent mean log variance vectors. """ lam = F.relu(self.l1(bow)) pi = F.relu(self.l2(lam)) mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1) sample = F.gaussian(mu, log_sigma) loss = F.gaussian_kl_divergence(mu, log_sigma) return sample, loss
def update(net, optimizer, x): xp = cuda.get_array_module(x) div_weight = 1 y, mean, var = net(x) loss = F.mean_squared_error(x, y) + div_weight * F.gaussian_kl_divergence(mean, var) / float(y.size) net.cleargrads() loss.backward() optimizer.update() return loss
def forward(self, batch, test=False): out, means, ln_vars = self.encode(batch, test=test) out = self.decode(out, test=test) normer = reduce(lambda x, y: x*y, means.data.shape) kl_loss = F.gaussian_kl_divergence(means, ln_vars)/normer rec_loss = F.mean_squared_error(batch, out) return out, kl_loss, rec_loss
def __call__(self, x): x = Variable(x) start = time.time() zm, zv = self.encoder((x,)) z = F.gaussian(zm, zv) y = self.decoder((z,))[0] kl_loss = F.gaussian_kl_divergence(zm, zv) nll_loss = F.bernoulli_nll(x, y) loss = kl_loss + nll_loss return loss
def __call__(self, x, l): mu, sigma = self.encoder(x) self.KL = F.gaussian_kl_divergence(mu, sigma) self.loss = Variable(np.array(0, dtype=np.float32)) for i in range(l): sample = F.gaussian(mu, sigma) m, s = self.decoder(sample) self.loss += F.gaussian_nll(x, m, s) self.loss = self.loss / l + self.KL self.loss = self.loss / len(x) return self.loss
def get_loss(self, x, y, train=True): mu, ln_var = self.encode(x, y) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(y, self.decode(z, x)) / (batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + F.gaussian_kl_divergence( mu, ln_var) / batchsize return self.loss
def loss(self, x, y): batch_size = len(x) mu, ln_var = self._latent_distribution(x) z = self._sample(mu, ln_var) reconstruction_loss = F.mean_squared_error(x, self.decode(z)) latent_loss = 0.0005 * F.gaussian_kl_divergence(mu, ln_var) / batch_size loss = reconstruction_loss + latent_loss return loss
def pretrain_step_vrae_tag(self, x_input, tag, word_drop_ratio=0.0, train=True): """ Maximum likelihood Estimation :param x_input: :return: loss """ batch_size = len(x_input) _, mu_z, ln_var_z = self.encoder.encode_with_tag(x_input, tag, train) self.reset_state() if self.latent_dim: z = F.gaussian(mu_z, ln_var_z) else: latent = F.gaussian(mu_z, ln_var_z) tag_ = self.tag_embed( chainer.Variable(self.xp.array(tag, 'int32'), volatile=not train)) self.lstm1.h = self.dec_input(F.concat((latent, tag_))) z = None accum_loss = 0 for i in range(self.sequence_length): if i == 0: x = chainer.Variable(self.xp.asanyarray([self.start_token] * batch_size, 'int32'), volatile=not train) else: if np.random.random() < word_drop_ratio and train: x = chainer.Variable(self.xp.asanyarray( [self.start_token] * batch_size, 'int32'), volatile=not train) else: x = chainer.Variable(self.xp.asanyarray( x_input[:, i - 1], 'int32'), volatile=not train) scores = self.decode_one_step(x, z=z) loss = F.softmax_cross_entropy( scores, chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32'), volatile=not train)) accum_loss += loss dec_loss = accum_loss kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size return dec_loss, kl_loss
def forward(self, hs): data_len = len(hs) mu = self.l1_mu(hs) ln_var = self.l1_ln_var(hs) mu = F.leaky_relu(mu, slope=0.2) ln_var = F.leaky_relu(ln_var, slope=0.2) zs = F.gaussian(mu, ln_var) loss = F.gaussian_kl_divergence(mu, ln_var) / data_len return zs, loss
def lf(self, x, mu, ln_var, split=False): batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decoder_model(z, sigmoid=False)) / (k * batchsize) rec_loss = rec_loss kl_loss = C * F.gaussian_kl_divergence(mu, ln_var) / batchsize loss = rec_loss + kl_loss if split: return rec_loss, kl_loss else: return loss
def __call__(self, *args, beta=1.0): assert len(args) >= 2 x = args[:-1] t = args[-1] mu_e, ln_var_e = self.predictor.encode(*x) batchsize = len(mu_e.data) rec_loss = 0 for l in six.moves.range(self.k): z = F.gaussian(mu_e, ln_var_e) mu_d, ln_var_d = self.predictor.decode(z) rec_loss += F.gaussian_nll(t, mu_d, ln_var_d) / (self.k * batchsize) kl_loss = beta * F.gaussian_kl_divergence(mu_e, ln_var_e) / batchsize self.loss = rec_loss + kl_loss reporter_module.report({'loss': self.loss}, self) return self.loss
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True): # Change all of the shapes to form interaction vectors shape = (bs, nf * 2, self.n_dim) feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape) feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape) if not train: feat_lv_vec += self.lv_floor # Construct the interaction mean and variance # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and # dot(feat, feat) is (bs, nf) ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc), feat_lv_vec + self.feat_delta_lv(iloc)) jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc), feat_lv_vec + self.feat_delta_lv(jloc)) # feat is (bs, ) feat = dot(F.sum(ivec * jvec, axis=2), ival * jval) # Compute the KLD for the group mean vector and variance vector kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b) # Compute the KLD for vector deviations from the group mean and var kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W, self.feat_delta_lv.W) return feat, kld1 + kld2
def gaussian_kl_divergence(): mu_data = np.array([1, 2, 3], dtype=np.float32) mu = Variable(mu_data) var_data = np.array([1, 4, 9], dtype=np.float32) var = Variable(var_data) ln_var = F.log(var) dim = len(mu_data) xp = cuda.get_array_module(var) expected_kld = (xp.trace(xp.diag(var)) + mu_data.dot(mu_data) - dim - xp.sum(ln_var)) * 0.5 computed_kld = F.gaussian_kl_divergence(mu, ln_var) print('expected_kld: ', expected_kld) print('computed_kld: ', computed_kld)
def __call__(self, x): mu, ln_var = self.encoder(x) batchsize = len(mu.array) kl_penalty = F.gaussian_kl_divergence(mean=mu, ln_var=ln_var) / batchsize reconstr = 0 for l in range(self.k): z = F.gaussian(mu, ln_var) recon = self.decoder(z) reconstr += 0.5 * F.mean_squared_error( recon, x) * x.shape[2] * x.shape[3] * x.shape[4] / self.k loss = (reconstr + self.beta * kl_penalty) reporter.report({'loss': loss}, self) reporter.report({'reconstr': reconstr}, self) reporter.report({'kl_penalty': kl_penalty}, self) return loss
def _train_vae(self, batch): status = {} (s, a, _, _, _) = batch reconstructed_action, mean, ln_var = self._vae((s, a)) reconstruction_loss = F.mean_squared_error(reconstructed_action, a) latent_loss = 0.5 * \ F.gaussian_kl_divergence(mean, ln_var, reduce='mean') vae_loss = reconstruction_loss + latent_loss self._vae_optimizer.target.cleargrads() vae_loss.backward() vae_loss.unchain_backward() self._vae_optimizer.update() xp = chainer.backend.get_array_module(vae_loss) status['vae_loss'] = xp.array(vae_loss.array) return status
def free_energy_onestep(self): """ [input] x : BxHxW[mono] Bx3HW[color] matrix (Variable) errx : BxHxW[mono] Bx3HW[color] matrix (Variable) """ self.c,self.h,enc_mu,enc_logsig2 = self.encode(self.c,self.h,self.x,self.errx,self.h2) kl = F.gaussian_kl_divergence(enc_mu,enc_logsig2) z = F.gaussian(enc_mu,enc_logsig2) z = enc_mu self.c2,self.h2,inc_canvas = self.decode(self.c2,self.h2,z) self.canvas += inc_canvas y = F.sigmoid(self.canvas) #y = F.relu(self.canvas+0.5)-F.relu(self.canvas-0.5) self.errx = self.x-y self.t += 1 return y,kl
def evaluate(self): iterator = self._iterators['main'] model = self._targets['model'] encoder = model.encoder decoder = model.decoder k = model.k beta = model.beta if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) summary = reporter.DictSummary() for batch in it: observation = {} with reporter.report_scope(observation): x = self.converter(batch, self.device) with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): mu, ln_var = encoder(x) batchsize = len(mu.array) kl_penalty = F.gaussian_kl_divergence( mean=mu, ln_var=ln_var) / batchsize reconstr = 0 for l in range(k): z = F.gaussian(mu, ln_var) recon = decoder(z) reconstr += 0.5 * F.mean_squared_error( recon, x) * x.shape[2] * x.shape[3] * x.shape[4] / k loss = (reconstr + beta * kl_penalty) observation['validation/loss'] = loss observation['validation/reconstr'] = reconstr observation['validation/kl_penalty'] = kl_penalty summary.add(observation) return summary.compute_mean()
def calcLoss(self, t, mu, ln_var): k = self.sample_size kl_zero_epoch = self.kl_zero_epoch loss = None t_pred = [t_e[1:] + [2] for t_e in t] t_pred = [xp.asarray(tp_e, dtype=xp.int32) for tp_e in t_pred] t = self.denoiseInput(t) print("t:{}".format([self.vocab.itos(t_e) for t_e in t[0]])) t_vec = self.makeEmbedBatch(t) for l in range(k): z = F.gaussian(mu, ln_var) if loss is None: loss = self.decode(z, t_vec, t_pred) / (k * self.batch_size) elif loss is not None: loss += self.decode(z, t_vec, t_pred) / (k * self.batch_size) C = 0.06 * (self.epoch_now - kl_zero_epoch) / self.epoch if self.epoch_now > kl_zero_epoch: loss += C * F.gaussian_kl_divergence(mu, ln_var) / self.batch_size return loss
def lf(self, x): """AutoEncoder""" mu, ln_var = self.encode(x) # reconstruction loss z = F.gaussian(mu, ln_var) outputs_mu, outputs_sigma_2 = self.decode(z) m_vae_loss = (F.flatten(x) - F.flatten(outputs_mu))**2 \ / F.flatten(outputs_sigma_2) m_vae_loss = 0.5 * F.sum(m_vae_loss) a_vae_loss = F.log(2 * 3.14 * F.flatten(outputs_sigma_2)) a_vae_loss = 0.5 * F.sum(a_vae_loss) d_vae_loss = F.gaussian_kl_divergence(mu, ln_var) self.loss = F.mean(d_vae_loss + m_vae_loss + a_vae_loss) return self.loss
def free_energy_onestep(self): #,h2,aa,bb): """ [input] x : BxHxW[mono] 3BxHxW[color] matrix (Variable) errx : BxHxW[mono] 3BxHxW[color] matrix (Variable) """ B = self.B C = self.C rP = self.Read_patch wP = self.Write_patch x_patch = self.R_filter.Filter(self.x) #print("x_patch max",np.max(x_patch.data)) errx_patch = self.R_filter.Filter(self.errx) #reshape 3BxHxW -> Bx3HW array x_patch_2D = F.reshape(x_patch, (B, C * rP**2)) errx_patch_2D = F.reshape(errx_patch, (B, C * rP**2)) self.c, self.h, enc_mu, enc_logsig2 = self.encode( self.c, self.h, x_patch_2D, errx_patch_2D, self.h2) kl = F.gaussian_kl_divergence(enc_mu, enc_logsig2) z = F.gaussian(enc_mu, enc_logsig2) self.c2, self.h2, inc_canvas, Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma, Rmean_x, Rmean_y, Rln_var, Rln_stride, Rln_gamma = self.decode( self.c2, self.h2, z) #,aa,bb) self.W_filter.mkFilter(Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma) self.R_filter.mkFilter(Rmean_x, Rmean_y, Rln_var, Rln_stride, Rln_gamma) inc_canvas = F.reshape(inc_canvas, (B * C, wP, wP)) #print("Wfilter:",np.max(self.W_filter.Fx.data),np.min(self.W_filter.Fx.data),np.max(self.W_filter.Fy.data),np.min(self.W_filter.Fy.data)) #print("Wmean:{} {}, Wlnvar:{}, Wln_stride:{}, Wln_gamma:{}".format(Wmean_x.data,Wmean_y.data,Wln_var.data,Wln_stride.data,Wln_gamma.data)) inc_canvas = self.W_filter.InvFilter(inc_canvas) self.canvas += inc_canvas y = F.sigmoid( self.canvas ) #F.relu(self.canvas+0.5)-F.relu(self.canvas-0.5) #[normal]:sigmoid, [whitened]:tanh self.errx = self.x - y self.t += 1 return y, kl #,h2
def calcLoss(self, t, categ_vec_h, categ_vec_c, mu, ln_var, wei_arr=None): k = self.sample_size loss = None t_pred = [t_e[1:] + [2] for t_e in t] t_pred = [xp.asarray(tp_e, dtype=xp.int32) for tp_e in t_pred] t = self.denoiseInput(t) t_vec = self.makeEmbedBatch(t) for l in range(k): z = F.gaussian(mu, ln_var) if loss is None: loss = self.decode(z, categ_vec_h, categ_vec_c, t_vec, t_pred, wei_arr) / (k * self.batch_size) elif loss is not None: loss += self.decode(z, categ_vec_h, categ_vec_c, t_vec, t_pred, wei_arr) / (k * self.batch_size) C = 0.005 * (self.epoch_now - self.kl_zero_epoch) / self.epoch # 0.02 if self.epoch_now > self.kl_zero_epoch: loss += C * F.gaussian_kl_divergence(mu, ln_var) / self.batch_size return loss
def train_one(enc, gen, dis, optimizer_enc, optimizer_gen, optimizer_dis, x_batch, gpu_device): batch_size = len(x_batch) if gpu_device == None: xp = np else: xp = cuda.cupy # encode x_in = xp.asarray(x_batch) z0, mean, var = enc(Variable(x_in)) x0 = gen(z0) y0, l0 = dis(x0) loss_enc = F.gaussian_kl_divergence(mean, var) / float(l0.data.size) loss_gen = 0 loss_gen = F.softmax_cross_entropy(y0, Variable(xp.zeros(batch_size).astype(np.int32))) loss_dis = F.softmax_cross_entropy(y0, Variable(xp.ones(batch_size).astype(np.int32))) # train generator z1 = Variable(xp.random.normal(0, 1, (batch_size, latent_size)).astype(np.float32)) x1 = gen(z1) y1, l1 = dis(x1) loss_gen += F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32))) loss_dis += F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32))) # train discriminator y2, l2 = dis(Variable(xp.asarray(x_batch))) loss_enc += F.mean_squared_error(l0, l2) loss_gen += 0.1 * F.mean_squared_error(l0, l2) loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size).astype(np.int32))) optimizer_enc.zero_grads() loss_enc.backward() optimizer_enc.update() optimizer_gen.zero_grads() loss_gen.backward() optimizer_gen.update() optimizer_dis.zero_grads() loss_dis.backward() optimizer_dis.update() return (float(loss_enc.data), float(loss_gen.data), float(loss_dis.data))
## lossのbuffer sum_enc_loss = 0. sum_dec_loss = 0. sum_dis_loss = 0. sum_gan_loss = 0. sum_like_loss = 0. sum_prior_loss = 0. ## バッチ学習 for i in six.moves.range(0, N_train, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) # バッチ分のデータの抽出 ##### ForwardとLossの計算 # KL距離 mu, ln_var = encode(x, test=False) x_rec = decode(mu, sigmoid=True) batchsize = len(mu.data) kl_loss = F.gaussian_kl_divergence(mu, ln_var) / batchsize # ランダムzの生成とランダムzでのdecode ## zはN(0, 1)から生成 z_p = xp.random.standard_normal(mu.data.shape).astype('float32') z_p = chainer.Variable(z_p) x_p = decode(z_p) # Discriminatorの出力を得る d_x_rec, h_out_rec = disc(x_rec) d_x_base, h_out_base = disc(x) d_x_p, h_out_p = disc(x_p) # Discriminatorのsoftmax_cross_entropy L_rec = F.softmax_cross_entropy(d_x_rec, Variable(xp.zeros(batchsize, dtype=np.int32))) L_base = F.softmax_cross_entropy(d_x_base, Variable(xp.ones(batchsize, dtype=np.int32))) L_p = F.softmax_cross_entropy(d_x_p, Variable(xp.zeros(batchsize, dtype=np.int32)))
def check_invalid_option(self, xp): m = chainer.Variable(xp.asarray(self.mean)) v = chainer.Variable(xp.asarray(self.ln_var)) with self.assertRaises(ValueError): F.gaussian_kl_divergence(m, v, 'invalid_option')
def check_gaussian_kl_divergence(self, mean, ln_var): m = chainer.Variable(mean) v = chainer.Variable(ln_var) actual = cuda.to_cpu(F.gaussian_kl_divergence(m, v, self.reduce).data) testing.assert_allclose(self.expect, actual)