def __call__(self, h, dist): """ Args: h (numpy.ndarray): axis 0 represents minibatch index, axis 1 represents atom_index and axis2 represents feature dimension. dist (numpy.ndarray): axis 0 represents minibatch index, axis 1 and 2 represent distance between atoms. """ mb, atom, ch = h.shape if ch != self.hidden_dim: raise ValueError('h.shape[2] {} and hidden_dim {} must be same!' .format(ch, self.hidden_dim)) embedlist = self.xp.arange( self.num_rbf).astype('f') * self.radius_resolution dist = functions.reshape(dist, (mb, atom, atom, 1)) dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf)) dist = functions.exp(- self.gamma * (dist - embedlist) ** 2) dist = functions.reshape(dist, (-1, self.num_rbf)) dist = self.dense1(dist) dist = functions.softplus(dist) dist = self.dense2(dist) dist = functions.softplus(dist) dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim)) h = functions.reshape(h, (mb, atom, 1, self.hidden_dim)) h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim)) h = functions.sum(h * dist, axis=1) return h
def dis_loss(discriminator, y, t): y_dis = discriminator(y) t_dis = discriminator(t) loss = F.mean(F.softplus(-t_dis)) + F.mean(F.softplus(y_dis)) return loss
def train(self, expert_states, expert_next_states, expert_action_probs, fake_states, fake_next_states, fake_action_probs, gamma): def logits(states, next_states, log_action_probs): # p(expert|state, action) = sigmoid(logits) rewards = self.reward_net(states) # print(F.mean(rewards)) state_values = self.value_net(states) next_state_values = self.value_net(next_states) return rewards + gamma * next_state_values - state_values - log_action_probs[:, None].array # This parameter stabilise training # softplus(logits) == log(sigmoid(logits)) # print('expert: ', end='') loss = F.mean(F.softplus(-logits(expert_states, expert_next_states, expert_action_probs))) # print('fake: ', end='') loss += F.mean(F.softplus(logits(fake_states, fake_next_states, fake_action_probs))) # add gradient penalty for reward # xp = chainer.cuda.get_array_module(expert_states) # e = xp.random.uniform(0., 1., len(expert_states))[:, None].astype(xp.float32) # x_hat = chainer.Variable((e * expert_states + (1 - e) * fake_states), requires_grad=True) # grad, = chainer.grad([self.reward_net(x_hat)], [x_hat], enable_double_backprop=True) # loss_grad = 0.1 * F.mean(F.sqrt(F.batch_l2_norm_squared(grad))) # loss += loss_grad self.reward_net.cleargrads() self.value_net.cleargrads() loss.backward() self.reward_optimizer.update() self.value_optimizer.update() return loss
def __call__(self, h, dist): """ Args: h (numpy.ndarray): axis 0 represents minibatch index, axis 1 represents atom_index and axis2 represents feature dimension. dist (numpy.ndarray): axis 0 represents minibatch index, axis 1 and 2 represent distance between atoms. """ mb, atom, ch = h.shape if ch != self.hidden_dim: raise ValueError( 'h.shape[2] {} and hidden_dim {} must be same!'.format( ch, self.hidden_dim)) embedlist = self.xp.arange( self.num_rbf).astype('f') * self.radius_resolution dist = functions.reshape(dist, (mb, atom, atom, 1)) dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf)) dist = functions.exp(-self.gamma * (dist - embedlist)**2) dist = functions.reshape(dist, (-1, self.num_rbf)) dist = self.dense1(dist) dist = functions.softplus(dist) dist = self.dense2(dist) dist = functions.softplus(dist) dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim)) h = functions.reshape(h, (mb, atom, 1, self.hidden_dim)) h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim)) h = functions.sum(h * dist, axis=1) return h
def loss_dis(self, dis, y_fake, y_real): batchsize = len(y_fake) L1 = F.sum(F.softplus(-y_real)) / batchsize L2 = F.sum(F.softplus(y_fake)) / batchsize loss = L1 + L2 chainer.report({'loss': loss}, dis) return loss
def loss_gen_multi(self, gen, y_flowgan_fake, y_texgan_fake, C=0.1): batchsize = y_flowgan_fake.data.shape[0] loss_flow = F.sum(F.softplus(-y_flowgan_fake)) / batchsize loss_tex = F.sum(F.softplus(-y_texgan_fake)) / batchsize loss = loss_flow + C * loss_tex chainer.report({'loss': loss}, gen) return loss
def loss_dis(self, dis, y_fake, y_real): batchsize = len(y_fake) L1= F.sum(F.softplus(-y_real)) / batchsize L2 = F.sum(F.softplus(y_fake)) / batchsize loss = L1 + L2 train_loss_dis.append(loss) return loss
def loss_dis(y_in, y_out): batchsize, _, w, h = y_in.data.shape L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h L2 = F.sum(F.softplus(y_out)) / batchsize / w / h loss = L1 + L2 # print("loss_dis:", loss) return loss
def loss_discriminator(self, discriminator, real, fake): batchsize, _, h, w = fake.data.shape loss1 = F.sum(F.softplus(-real)) / batchsize / h / w loss2 = F.sum(F.softplus(fake)) / batchsize / h / w loss = loss1 + loss2 chainer.report({'loss': loss}, discriminator) return loss
def loss_dis(self, dis, y_in, y_out, a=1, b=1): L1 = a*F.sum(F.softplus(-y_in)) / len(y_in) L2 = b*F.sum(F.softplus(y_out)) /len(y_out) loss = (L1 + L2) / (a+b) chainer.report({'loss': loss}, dis) return loss
def compute_discriminator_loss(self, image_real, image_fake, image_labeled, label): # predict prediction_real = self.discriminator(image_real) prediction_fake = self.discriminator(image_fake) prediction_labeled = self.discriminator(image_labeled) # discriminator loss prediction_real_lse = cf.logsumexp(prediction_real, axis=1) prediction_fake_lse = cf.logsumexp(prediction_fake, axis=1) loss_discriminator = ( 0.5 * cf.sum(cf.softplus(prediction_real_lse)) / prediction_real_lse.size + 0.5 * cf.sum(-prediction_real_lse) / prediction_real_lse.size + 0.5 * cf.sum(cf.softplus(prediction_fake_lse)) / prediction_fake_lse.size) # classifier loss loss_classifier = cf.softmax_cross_entropy(prediction_labeled, label) loss = loss_discriminator + loss_classifier chainer.reporter.report( { 'loss_discriminator': loss_discriminator, 'loss_classifier': loss_classifier }, self) return loss
def loss_dis(self, dis, y_in, y_out): batchsize,_,w,h = y_in.data.shape L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h L2 = F.sum(F.softplus(y_out)) / batchsize / w / h loss = L1 + L2 chainer.report({'loss': loss}, dis) return loss
def loss_dis(self, dis, completed_y, real_y): batchsize = len(completed_y) L1 = F.sum(F.softplus(-real_y)) / batchsize L2 = F.sum(F.softplus(completed_y)) / batchsize loss = L1 + L2 # GAN loss chainer.report({'loss': loss}, dis) return loss
def dis_loss(opt, real_d, fake_d, observer=None): #adversarial loss adv_loss = 0 real_loss = 0 fake_loss = 0 if opt.adv_loss_mode == 'bce': real_loss = F.mean(F.softplus(-real_d)) fake_loss = F.mean(F.softplus(fake_d)) if opt.adv_loss_mode == 'mse': xp = cuda.get_array_module(real_d.array) real_loss = F.mean_squared_error(real_d, xp.ones_like(real_d.array)) fake_loss = F.mean_squared_error(fake_d, xp.zeros_like(fake_d.array)) if opt.adv_loss_mode == 'hinge': real_loss = F.mean(F.relu(1.0 - real_d)) fake_loss = F.mean(F.relu(1.0 + fake_d)) adv_loss = (real_loss + fake_loss) * 0.5 loss = adv_loss if observer is not None: report( { 'loss': loss, 'adv_loss': adv_loss, 'real_loss': real_loss, 'fake_loss': fake_loss }, observer=observer) return loss
def learning_GAN_log(generator, discliminator, optgen, optdis, data, result, T=200): for time in range(T): optgen.target.cleargrads() ytemp = generator(data[1]) with chainer.using_config('train', False): ytrain_false = discliminator(ytemp) loss_train_gen = 0.5 * F.mean(F.softplus(-ytrain_false)) loss_train_gen.backward() optgen.update() #偽物画像 optdis.target.cleargrads() ytrain_false = discliminator(ytemp) ytrain_true = discliminator(data[0]) loss1 = 0.5 * F.mean(F.softplus(ytrain_false)) loss2 = 0.5 * F.mean(F.softplus(-ytrain_true)) loss_train_dis = loss1 + loss2 loss_train_dis.backward() optdis.update() result[0].append(cuda.to_cpu(loss_train_gen.data)) result[1].append(cuda.to_cpu(loss1.data)) result[2].append(cuda.to_cpu(loss2.data))
def __call__(self, x): # <question : is batchsize>1 possible for RNN ? if No, I will implement calculations without batch dimension.> self.chi = F.concat((x, self.r)) (self.nu, self.xi) = \ F.split_axis(self.l_dl(self.chi), [self.Y], 1) (self.kr, self.betar, self.kw, self.betaw, self.e, self.v, self.f, self.ga, self.gw, self.pi ) = F.split_axis(self.xi, np.cumsum( [self.W*self.R, self.R, self.W, 1, self.W, self.W, self.R, 1, 1]), 1) self.kr = F.reshape(self.kr, (self.R, self.W)) # R * W self.betar = 1 + F.softplus(self.betar) # 1 * R # self.kw: 1 * W self.betaw = 1 + F.softplus(self.betaw) # 1 * 1 self.e = F.sigmoid(self.e) # 1 * W # self.v : 1 * W self.f = F.sigmoid(self.f) # 1 * R self.ga = F.sigmoid(self.ga) # 1 * 1 self.gw = F.sigmoid(self.gw) # 1 * 1 self.pi = F.softmax(F.reshape(self.pi, (self.R, 3))) # R * 3 (softmax for 3) # self.wr : N * R self.psi_mat = 1 - F.matmul(Variable(np.ones((self.N, 1)).astype(np.float32)), self.f) * self.wr # N * R self.psi = Variable(np.ones((self.N, 1)).astype(np.float32)) # N * 1 for i in range(self.R): self.psi = self.psi * F.reshape(self.psi_mat[:,i],(self.N,1)) # N * 1 # self.ww, self.u : N * 1 self.u = (self.u + self.ww - (self.u * self.ww)) * self.psi self.a = u2a(self.u) # N * 1 self.cw = C(self.M, self.kw, self.betaw) # N * 1 self.ww = F.matmul(F.matmul(self.a, self.ga) + F.matmul(self.cw, 1.0 - self.ga), self.gw) # N * 1 self.M = self.M * (np.ones((self.N, self.W)).astype(np.float32) - F.matmul(self.ww, self.e)) + F.matmul(self.ww, self.v) # N * W self.p = (1.0 - F.matmul(Variable(np.ones((self.N,1)).astype(np.float32)), F.reshape(F.sum(self.ww),(1,1)))) \ * self.p + self.ww # N * 1 self.wwrep = F.matmul(self.ww, Variable(np.ones((1, self.N)).astype(np.float32))) # N * N self.L = (1.0 - self.wwrep - F.transpose(self.wwrep)) * self.L + F.matmul(self.ww, F.transpose(self.p)) # N * N self.L = self.L * (np.ones((self.N, self.N)) - np.eye(self.N)) # force L[i,i] == 0 self.fo = F.matmul(self.L, self.wr) # N * R self.ba = F.matmul(F.transpose(self.L), self.wr) # N * R self.cr_list = [0] * self.R for i in range(self.R): self.cr_list[i] = C(self.M, F.reshape(self.kr[i,:],(1, self.W)), F.reshape(self.betar[0,i],(1, 1))) # N * 1 self.cr = F.concat(self.cr_list) # N * R self.bacrfo = F.concat((F.reshape(F.transpose(self.ba),(self.R,self.N,1)), F.reshape(F.transpose(self.cr),(self.R,self.N,1)), F.reshape(F.transpose(self.fo) ,(self.R,self.N,1)),),2) # R * N * 3 self.pi = F.reshape(self.pi, (self.R,3,1)) # R * 3 * 1 self.wr = F.transpose(F.reshape(F.batch_matmul(self.bacrfo, self.pi), (self.R, self.N))) # N * R self.r = F.reshape(F.matmul(F.transpose(self.M), self.wr),(1, self.R * self.W)) # W * R (-> 1 * RW) self.y = self.l_Wr(self.r) + self.nu # 1 * Y return self.y
def update_core(self): gen_optimizer = self.get_optimizer('opt_gen') dis_optimizer = self.get_optimizer('opt_dis') xp = self.gen.xp batch = self.get_iterator('main').next() batchsize = len(batch) x = [] for i in range(batchsize): x.append(np.asarray(batch[i][0]).astype("f")) x_real = Variable(xp.asarray(x)) y_real = self.dis(x_real) z = Variable(xp.asarray(self.gen.make_hidden(batchsize))) x_fake = self.gen(z) y_fake = self.dis(x_fake) loss_dis = F.sum(F.softplus(-y_real)) / batchsize loss_dis += F.sum(F.softplus(y_fake)) / batchsize loss_gen = F.sum(F.softplus(-y_fake)) / batchsize self.gen.cleargrads() loss_gen.backward() gen_optimizer.update() x_fake.unchain_backward() self.dis.cleargrads() loss_dis.backward() dis_optimizer.update() chainer.reporter.report({'loss_gen': loss_gen}) chainer.reporter.report({'loss_dis': loss_dis})
def loss_dis_removal(self, dis_removal, y_in_removal, y_out_removal): batchsize, _, w, h = y_in_removal.data.shape L1 = F.sum(F.softplus(-y_in_removal)) / batchsize / w / h L2 = F.sum(F.softplus(y_out_removal)) / batchsize / w / h loss = L1 + L2 #chainer.report({'loss': loss}, dis) return loss
def __call__(self, x, r): """Forward propagaion Args: x (numpy.ndarray): axis 0 represents minibatch index, axis 1 represents atom_index and axis 2 represents feature dimension. r (numpy.ndarray): axis 0 represents minibatch index, axis 1 and 2 represent distance between atoms. """ # s0 minibatch, s1 atom (from), s2 atom (to) s0, s1, s2 = r.shape # a0 minibatch, a1 atom, a2 ch. a2 must be equal to self.hidden_dim a0, a1, a2 = x.shape if a2 != self.hidden_dim: raise ValueError( "x.shape[2] {} and hidden_dim {} must be same!".format( a2, self.hidden_dim)) embedlist = self.xp.arange( self.num_rbf, dtype=self.xp.float32) * self.radius_resolution r = functions.reshape(r, (s0, s1, s2, 1)) r = functions.broadcast_to(r, (s0, s1, s2, self.num_rbf)) r = functions.exp(-self.gamma * (r - embedlist)**2) r = functions.reshape(r, (s0 * s1 * s2, self.num_rbf)) r = self.dense1(r) r = functions.softplus(r) r = self.dense2(r) r = functions.softplus(r) r = functions.reshape(r, (s0, s1, s2, self.hidden_dim)) x = functions.reshape(x, (a0, a1, 1, self.hidden_dim)) x = functions.broadcast_to(x, (a0, a1, s2, self.hidden_dim)) x = functions.sum(x * r, axis=1) return x
def loss_dis(self, dis, y_fake, y_real): batchsize = len(y_fake) loss_real = F.sum(F.softplus(-y_real)) / batchsize loss_fake = F.sum(F.softplus(y_fake)) / batchsize loss = loss_fake + loss_real chainer.report({'loss': loss}, dis) return loss
def update_core(self): iterator = self.get_iterator('main') # type: Iterator g_opt = self.get_optimizer('gen') # type: chainer.Optimizer d_opt = self.get_optimizer('dis') # type: chainer.Optimizer batch = iterator.next() batch_size = len(batch) x1, x2 = self.converter(batch, self.device) generated = self.gen(x1) dis_real = self.dis(x1, x2) dis_fake = self.dis(x1, generated) g_loss = F.sum(F.softplus( -dis_fake)) / dis_fake.size + self.lambda_ * F.mean_absolute_error( generated, x2) reporter.report({'loss': g_loss}, self.gen) self.gen.cleargrads() g_loss.backward() g_opt.update() del g_loss d_loss = F.sum(F.softplus(-dis_real)) / dis_real.size + F.sum( F.softplus(dis_fake)) / dis_fake.size reporter.report({'loss': d_loss}, self.dis) self.dis.cleargrads() d_loss.backward() d_opt.update() del d_loss, dis_fake, dis_real, x1, x2, batch, generated
def loss_dis(self, dis, y_in, y_out): batchsize, _, w, h = y_in.data.shape L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h L2 = F.sum(F.softplus(y_out)) / batchsize / w / h loss = L1 + L2 chainer.report({'loss': loss}, dis) return loss
def loss_discriminator(self, discriminator, x_fake, x_real): batchsize = len(x_fake) loss1 = F.sum(F.softplus(-x_real)) / batchsize loss2 = F.sum(F.softplus(x_fake)) / batchsize loss = loss1 + loss2 chainer.report({'loss': loss}, discriminator) return loss
def dis_loss(discriminator, y_fake, x, y_label, x_label): fake = discriminator(y_fake, F.concat([y_label, x_label])) real = discriminator(x, F.concat([x_label, y_label])) #fake = discriminator(y_fake, y_label) #"real = discriminator(x, x_label) return F.mean(F.softplus(-real)) + F.mean(F.softplus(fake))
def loss_dis2(self, dis2, y_in, y_out): batchsize,_,w,h = y_in.data.shape L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h L2 = F.sum(F.softplus(y_out)) / batchsize / w / h loss = L1 + L2 #chainer.report({'loss': loss}, dis2) #print("dis2", {'loss': loss}) return loss
def dis_loss(self, discriminator, y, t): y_dis = discriminator(y) t_dis = discriminator(t) loss = self.zero_centered_gradient_penalty_fake(y_dis, y) loss += self.zero_centered_gradient_penalty_real(discriminator, t) return F.mean(F.softplus(-t_dis)) + F.mean(F.softplus(y_dis)) + loss
def loss_dis(self, dis, y_fake, y_real, y_class, labels): batchsize = len(y_fake) L1 = F.sum(F.softplus(-y_real)) / batchsize L2 = F.sum(F.softplus(y_fake)) / batchsize L4 = F.softmax_cross_entropy(y_class, labels) / batchsize loss = L1 + L2 + L4 # + L3 + L4 chainer.report({'loss': loss}, dis) return loss
def process_batch(self, x, y, one_hot, last_joint, return_sample=False, return_mean=True): xp = cuda.cupy x = F.dropout(x, ratio=0.5) x = F.concat((one_hot, x), axis=-1) x = self.ln1_(x) h0 = self.l1_(x) h = F.dropout(h0, ratio=0.5) h1 = self.ln2_(F.concat((x, h), axis=1)) h1 = self.l2_(h1) h = F.dropout(h1, ratio=0.5) h2 = self.ln3_(F.concat((x, h), axis=1)) h2 = self.l3_(h2) final_h = F.concat((h0, h1, h2), axis=-1) mu = self.mu_(final_h) mu = F.reshape(mu, (-1, self.future_out_dim, self.NUM_MIXTURE)) sigma_orig = self.sigma_(final_h) mixing_orig = self.mixing_(final_h) sigma = F.softplus(sigma_orig) mixing = F.softmax(mixing_orig) y = F.expand_dims(y, axis=2) y_broad = F.broadcast_to(y, mu.shape) normalizer = 2 * np.pi * sigma exponent = -0.5 * (1. / F.square(sigma)) * F.sum( (y_broad - mu)**2, axis=1) + F.log( mixing) - (self.future_out_dim * 0.5) * F.log(normalizer) cost = -F.logsumexp(exponent, axis=1) cost = F.mean(cost) #sampling if return_sample: mixing = mixing_orig * (1 + self.sampling_bias) sigma = F.softplus(sigma_orig - self.sampling_bias) mixing = F.softmax(mixing) argmax_mixing = F.argmax(mixing, axis=1) mixing_one_hot = xp.zeros(mixing.shape, dtype=xp.float32) mixing_one_hot[xp.arange(mixing.shape[0]), argmax_mixing.data] = 1 component_expanded = F.broadcast_to( F.expand_dims(mixing_one_hot, axis=1), mu.shape) component_mean = F.sum(mu * component_expanded, axis=2) if return_mean: return cost, component_mean component_std = F.sum(sigma * component, axis=2, keepdims=True) component_std = F.broadcast_to(component_std, component_mean.shape) sample = xp.random.normal(component_mean.data, component_std.data) return cost, sample return cost, None
def update_core(self): # TODO: support n_Classfier # TIPS: in case of experiments, set n_critic as 5 is best result. gen_optimizer = self.get_optimizer('gen') critic_optimizer = self.get_optimizer('critic') xp = self.generator.xp for i in range(self.n_critic): # grab data batch = self.get_iterator('main').next() batchsize = len(batch) batch = self.converter(batch, self.device) real_data, real_label = batch real_label = Variable(real_label) real_data = Variable(real_data) / 255. # TODO: cWGANってuniformで良いんだっけ...? z = Variable( xp.asarray( self.generator.make_input_z_with_label( batchsize, real_label.data))) # Generator gen_data = self.generator(z) # Critic(Discrimintor) critic_real = self.critic(real_data, real_label) critic_fake = self.critic(gen_data, real_label) # Loss ## Critic Loss # print(critic_fake.shape, critic_real.shape, gen_data.shape, real_data.shape) # critic_loss = F.mean(critic_fake - critic_real) critic_loss = F.sum(F.softplus(-critic_real)) / batchsize critic_loss += F.sum(F.softplus(critic_fake)) / batchsize self.critic.cleargrads() critic_loss.backward() critic_optimizer.update() chainer.report({'critic_loss': critic_loss}) # # if chainer.is_debug(): # graph = cg.build_computational_graph(critic_loss) # with open(os.path.join(), 'w') as file: # file.write(graph.dump()) if i == 0: # Generator Loss gen_loss = F.sum(F.softplus(-critic_fake)) / batchsize self.generator.cleargrads() gen_loss.backward() gen_optimizer.update() chainer.report({'gen_loss': gen_loss})
def loss_dis(self, dis, dis_real, dis_fake): batchsize, _, w, h = dis_real.data.shape L1 = (2 + np.random.rand()) * F.sum( F.softplus(-dis_real)) / batchsize / w / h L2 = (2 + np.random.rand()) * F.sum( F.softplus(dis_fake)) / batchsize / w / h loss = L1 + L2 chainer.report({'loss': loss}, dis) return loss
def __call__(self, x): h = F.reshape(x, (x.shape[0], -1)) h = self.l1(h) h = self.l2(F.softplus(h)) h = self.l3(F.softplus(h)) self.feature = F.softplus(h) h = self.l4(self.feature) return h
def loss_dis(self, dis, y_fake, y_real): batchsize = len(y_fake) #G(x, E(x))->1, G(D(z), z)->0 #real label 1 L1 = F.sum(F.softplus(-y_real)) / batchsize #fake label 0 L2 = F.sum(F.softplus(y_fake)) / batchsize loss = L1 + L2 chainer.report({'loss': loss}, dis) return loss
def dis_loss(opt, real_d, fake_d, real_g, fake_g, observer=None, tag=str()): #gradient penalty real_gp = 0 fake_gp = 0 if opt.zero_gp_mode == 'real' or opt.zero_gp_mode == 'real_fake': real_gp = opt.gp_coef * compute_grad(real_d, real_g) if opt.zero_gp_mode == 'fake' or opt.zero_gp_mode == 'real_fake': fake_gp = opt.gp_coef * compute_grad(fake_d, fake_g) #adversarial loss adv_loss = 0 real_loss = 0 fake_loss = 0 if opt.adv_loss_mode == 'wgan': adv_loss = -F.mean(real_d - fake_d) gp = real_gp + fake_gp else: if opt.adv_loss_mode == 'bce': real_loss = F.mean(F.softplus(-real_d)) fake_loss = F.mean(F.softplus(fake_d)) if opt.adv_loss_mode == 'mse': xp = cuda.get_array_module(real_d.array) real_loss = F.mean_squared_error(real_d, xp.ones_like(real_d.array)) fake_loss = F.mean_squared_error(fake_d, xp.zeros_like(fake_d.array)) if opt.adv_loss_mode == 'hinge': real_loss = F.mean(F.relu(1.0 - real_d)) fake_loss = F.mean(F.relu(1.0 + fake_d)) adv_loss = (real_loss + fake_loss) * 0.5 gp = (real_gp + fake_gp) * 0.5 loss = adv_loss + gp if observer is not None: if tag: tag += '_' report( { tag + 'loss': l2f(loss), tag + 'adv_loss': l2f(adv_loss), tag + 'real_loss': l2f(real_loss), tag + 'fake_loss': l2f(fake_loss), tag + 'gp': l2f(gp), tag + 'adv_loss_with_gp': l2f(adv_loss + gp) }, observer=observer) return loss
def dis_loss(self, discriminator, y_fake, x, y_label, x_label, residual): x = chainer.Variable(x.data) if residual: fake = discriminator(y_fake + x, y_label) else: fake = discriminator(y_fake, y_label) real = discriminator(x, x_label) return F.mean(F.softplus(-real)), F.mean(F.softplus(fake))
def __call__(self, x, dist): v = self.linear[0](x) v = self.cfconv(v, dist) v = self.linear[1](v) v = functions.softplus(v) v = self.linear[2](v) return x + v
def bernoulli_nll(x, y): """Calculate negative log-likelihood of Bernoulli distribution. This function calculates negative log-likelihood on a Bernoulli distribution. .. math:: -B(x; p) = -\\sum_i {x_i \\log(p_i) + (1 - x_i)\\log(1 - p_i)}, where :math:`p = \\sigma(y)`, and :math:`\\sigma(\\cdot)` is a sigmoid funciton. .. note:: As this funtion uses a sigmoid function, you can pass a result of fully-connected layer (that means :class:`Linear`) to this function directly. Args: x (~chainer.Variable): Input variable. y (~chainer.Variable): A variable representing the parameter of Bernoulli distribution. Returns: ~chainer.Variable: A variable representing negative log-likelihood. """ assert isinstance(x, variable.Variable) assert isinstance(y, variable.Variable) return F.sum(F.softplus(-y)) + F.sum(y) - F.sum(y * x)
def __call__(self, x): # x.shape == (batchsize, 3, 128, 64) batchsize = x.shape[0] h = F.elu(self.bn1(self.conv1_1(x))) h = F.elu(self.bn2(self.conv1_2(h))) h = F.max_pooling_2d(h, 3, 2, cover_all=False) h = self.conv2_1(h) h = self.conv2_3(h) h = self.conv3_1(h) h = self.conv3_3(h) h = self.conv4_1(h) h = self.conv4_3(h) h = h.reshape(batchsize, -1) h = F.dropout(h, ratio=0.6) h = F.elu(self.fc1_bn(self.fc1(h))) # Features in rows, normalize axis 1. weights = self.mean_vectors features = self.ball(h) features = F.normalize(features, eps=1e-8) scale = F.softplus(self.scale) normalized_weight = F.normalize(weights, axis=0, eps=1e-8) logits = F.tile(scale[None, ], (batchsize, 1)) * \ F.matmul(features, normalized_weight) return logits
def loss_dec(self, dec, x_out, t_out, y_out, lam1=100, lam2=1): batchsize, _, w, h = y_out.data.shape loss_rec = lam1*(F.mean_absolute_error(x_out, t_out)) loss_adv = lam2*F.sum(F.softplus(-y_out)) / batchsize / w / h loss = loss_rec + loss_adv chainer.report({'loss': loss}, dec) return loss
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.softplus(x, beta=self.beta) x_value = cuda.to_cpu(x_data) y_exp = numpy.log(1 + numpy.exp(self.beta * x_value)) / self.beta self.assertEqual(y.data.dtype, self.dtype) testing.assert_allclose( y_exp, y.data, **self.check_forward_options)
def _loss_predictor(self, predictor, output, target, d_fake): b, _, t = d_fake.data.shape loss_mse = (F.mean_absolute_error(output, target)) chainer.report({'mse': loss_mse}, predictor) loss_adv = F.sum(F.softplus(-d_fake)) / (b * t) chainer.report({'adversarial': loss_adv}, predictor) loss = self.loss_config.mse * loss_mse + self.loss_config.adversarial * loss_adv chainer.report({'loss': loss}, predictor) return loss
def check_backward(self, x_data, y_grad): x = chainer.Variable(x_data) y = functions.softplus(x, beta=self.beta) self.assertEqual(y.data.dtype, numpy.float32) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data,)) gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,)) gradient_check.assert_allclose(gx, x.grad)
def _loss_discriminator(self, discriminator, d_real, d_fake): b, _, t = d_real.data.shape loss_real = F.sum(F.softplus(-d_real)) / (b * t) chainer.report({'real': loss_real}, discriminator) loss_fake = F.sum(F.softplus(d_fake)) / (b * t) chainer.report({'fake': loss_fake}, discriminator) loss = loss_real + loss_fake chainer.report({'loss': loss}, discriminator) tp = (d_real.data > 0.5).sum() fp = (d_fake.data > 0.5).sum() fn = (d_real.data <= 0.5).sum() tn = (d_fake.data <= 0.5).sum() accuracy = (tp + tn) / (tp + fp + fn + tn) precision = tp / (tp + fp) recall = tp / (tp + fn) chainer.report({'accuracy': accuracy}, self.discriminator) chainer.report({'precision': precision}, self.discriminator) chainer.report({'recall': recall}, self.discriminator) return loss
def bernoulli_nll_keepbatch(self, x, y): nll = F.softplus(y) - x * y return F.sum(nll, axis=1)
def f(x): y = functions.softplus(x, beta=self.beta) return y * y
def forward(self, inputs, device): x, = inputs return functions.softplus(x, beta=self.beta),
def _encode(self, xs): exs = self.embed_mat(xs) h = F.tanh(self.l1(exs)) logits = F.softplus(self.l2(h)) logits = F.log(logits + 1e-10).reshape(-1, self.M, self.K) return logits, exs
def __call__(self, x): return F.softplus(x, self.use_cudnn)
def loss_gen(self, gen, y_fake): batchsize = y_fake.data.shape[0] loss = F.sum(F.softplus(-y_fake)) / batchsize chainer.report({'loss': loss}, gen) return loss
def loss_gen(self, gen, y_fake): batchsize = len(y_fake) loss = F.sum(F.softplus(-y_fake)) / batchsize train_loss_gen.append(loss) return loss
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.softplus(x, beta=self.beta) x_value = cuda.to_cpu(x_data) y_exp = numpy.log(1 + numpy.exp(self.beta * x_value)) / self.beta gradient_check.assert_allclose(y_exp, y.data)
def __call__(self, x): return functions.softplus(x, self.beta)
def f(x): return functions.softplus(x, beta=self.beta)
def loss_gen(self, gen, y_fake): batchsize = len(y_fake) loss = F.sum(F.softplus(-y_fake)) / batchsize chainer.report({'loss': loss}, gen) return loss
def loss(self, source, target, weight): word = F.dropout(self.embed(target), ratio=self.dropout_ratio) inner = F.sum(source * word, axis=1) sp = F.sum(F.softplus(-inner) * weight) return sp
def __call__(self, h): h = self.linear1(h) h = functions.softplus(h) h = self.linear2(h) h = functions.sum(h, axis=1) return h