def trainD(self, idf_label, y, idf_unlabel): x_label, x_unlabel, y = self.make_input(*idf_label), self.make_input( *idf_unlabel), Variable(y, requires_grad=False) if self.args.cuda: x_label, x_unlabel, y = x_label.cuda(), x_unlabel.cuda(), y.cuda() output_label, (mom_un, output_unlabel), output_fake = self.D( x_label, cuda=self.args.cuda), self.D( x_unlabel, cuda=self.args.cuda, feature=True), self.D(self.G(x_unlabel.size()[0], cuda=self.args.cuda).view( x_unlabel.size()).detach(), cuda=self.args.cuda) logz_label, logz_unlabel, logz_fake = log_sum_exp( output_label), log_sum_exp(output_unlabel), log_sum_exp( output_fake) # log ∑e^x_i prob_label = torch.gather(output_label, 1, y.unsqueeze(1)) # log e^x_label = x_label loss_supervised = -torch.mean(prob_label) + torch.mean(logz_label) loss_unsupervised = 0.5 * ( -torch.mean(logz_unlabel) + torch.mean(F.softplus(logz_unlabel)) + # real_data: log Z/(1+Z) torch.mean(F.softplus(logz_fake))) # fake_data: log 1/(1+Z) entropy = -torch.mean( F.softmax(output_unlabel, dim=1) * F.log_softmax(output_unlabel, dim=1)) pt = pull_away_term(mom_un) loss = loss_supervised + self.args.unlabel_weight * loss_unsupervised + entropy + pt acc = torch.mean((output_label.max(1)[1] == y).float()) self.Doptim.zero_grad() loss.backward() self.Doptim.step() return loss_supervised.data.cpu().numpy(), loss_unsupervised.data.cpu( ).numpy(), acc
def trainD(self, idf_label, y, idf_unlabel): x_label, x_unlabel, y = self.make_input(*idf_label), self.make_input( *idf_unlabel), y output_label, (mom_un, output_unlabel), output_fake = self.D( x_label, cuda=self.args.cuda), self.D( x_unlabel, cuda=self.args.cuda, feature=True), self.D( tf.reshape(self.G(x_unlabel.shape[0], cuda=self.args.cuda), x_unlabel.shape), cuda=self.args.cuda) logz_label, logz_unlabel, logz_fake = log_sum_exp( output_label), log_sum_exp(output_unlabel), log_sum_exp( output_fake) # log ∑e^x_i prob_label = tf.gather(output_label, 1, tf.expand_dims(y, 1)) # log e^x_label = x_label loss_supervised = -tf.reduce_mean(prob_label) + tf.reduce_mean( logz_label) loss_unsupervised = 0.5 * ( -tf.reduce_mean(logz_unlabel) + tf.reduce_mean( tf.math.softplus(logz_unlabel)) + # real_data: log Z/(1+Z) tf.reduce_mean(tf.math.softplus(logz_fake)) ) # fake_data: log 1/(1+Z) entropy = -tf.reduce_mean( tf.nn.softmax(output_unlabel, axis=1) * tf.nn.log_softmax(output_unlabel, axis=1)) pt = pull_away_term(mom_un) loss = loss_supervised + self.args.unlabel_weight * loss_unsupervised + entropy + pt ac = tf.reduce_mean( tf.cast(tf.argmax(input=output_label, axis=1) == y, dtype=tf.float32)) return loss_supervised.numpy(), loss_unsupervised.numpy(), loss, ac
def trainG(self, idf_unlabel): x_unlabel = self.make_input(*idf_unlabel) fake = tf.reshape(self.G(x_unlabel.shape[0], cuda=self.args.cuda), x_unlabel.shape) mom_gen, output_fake = self.D(fake, feature=True, cuda=self.args.cuda) mom_unlabel, output_unlabel = self.D(x_unlabel, feature=True, cuda=self.args.cuda) loss_pt = pull_away_term(mom_gen) mom_gen = tf.raw_ops.Mean(input=mom_gen, axis=0) mom_unlabel = tf.raw_ops.Mean(input=mom_unlabel, axis=0) loss_fm = tf.reduce_mean(tf.abs(mom_gen - mom_unlabel)) loss = loss_fm + loss_pt return loss
def trainG(self, idf_unlabel): x_unlabel = self.make_input(*idf_unlabel) if self.args.cuda: x_unlabel = x_unlabel.cuda() fake = self.G(x_unlabel.size()[0], cuda = self.args.cuda).view(x_unlabel.size()) mom_gen, output_fake = self.D(fake, feature=True, cuda=self.args.cuda) mom_unlabel, output_unlabel = self.D(x_unlabel, feature=True, cuda=self.args.cuda) loss_pt = pull_away_term(mom_gen) mom_gen = torch.mean(mom_gen, dim = 0) mom_unlabel = torch.mean(mom_unlabel, dim = 0) loss_fm = torch.mean(torch.abs(mom_gen - mom_unlabel)) loss = loss_fm + loss_pt self.Goptim.zero_grad() self.Doptim.zero_grad() loss.backward() self.Goptim.step() return loss.data.cpu().numpy()