def lsgan_loss(real, weight, fake=None): if fake: loss = weight * F.mean(F.squared_error(F.constant(1, real.shape), real) + F.pow_scalar(fake, 2)) else: loss = weight * \ F.mean(F.squared_error(F.constant(1, real.shape), real)) return loss
def create_network(batchsize, imheight, imwidth, args, seen): import gc gc.collect() nnabla_ext.cuda.clear_memory_cache() anchors = args.num_anchors classes = args.num_classes yolo_x = nn.Variable((batchsize, 3, imheight, imwidth)) target = nn.Variable((batchsize, 50 * 5)) yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False) nB = yolo_features.shape[0] nA = args.num_anchors nC = args.num_classes nH = yolo_features.shape[2] nW = yolo_features.shape[3] # Bouding box regression loss # pred.shape = [nB, nA, 4, nH, nW] output = F.reshape(yolo_features, (nB, nA, (5 + nC), nH, nW)) xy = F.sigmoid(output[:, :, :2, ...]) wh = output[:, :, 2:4, ...] bbox_pred = F.concatenate(xy, wh, axis=2) conf_pred = F.sigmoid(output[:, :, 4:5, ...]) cls_pred = output[:, :, 5:, ...] region_loss_targets = RegionLossTargets(nC, args.anchors, seen, args.coord_scale, args.noobject_scale, args.object_scale, args.class_scale, args.thresh) tcoord, mcoord, tconf, mconf, tcls, mcls = region_loss_targets( bbox_pred, target) for v in tcoord, mcoord, tconf, mconf, tcls, mcls: v.need_grad = False # Bounding box regression bbox_loss = F.sum(F.squared_error(bbox_pred, tcoord) * mcoord) # Conf (IoU) regression loss conf_loss = F.sum(F.squared_error(conf_pred, tconf) * mconf) # Class probability regression loss cls_loss = F.sum(F.softmax_cross_entropy(cls_pred, tcls, axis=2) * mcls) # Note: # loss is devided by 2.0 due to the fact that the original darknet # code doesn't multiply the derivative of square functions by 2.0 # in region_layer.c. loss = (bbox_loss + conf_loss) / 2.0 + cls_loss return yolo_x, target, loss, region_loss_targets
def ls_gan_loss(r_out, f_out): # todo: set constant arbitrary # D d_gan_real = F.mean(F.squared_error(r_out, F.constant(1., shape=r_out.shape))) d_gan_fake = F.mean(F.squared_error(f_out, F.constant(0., shape=f_out.shape))) # G g_gan = F.mean(F.squared_error(f_out, F.constant(1., shape=f_out.shape))) return d_gan_real, d_gan_fake, g_gan
def sigma_regularization(ctx, log_var, one): with nn.context_scope(ctx): h = F.exp(log_var) h = F.pow_scalar(h, 0.5) h = F.mean(h, axis=1) r = F.mean(F.squared_error(h, one)) return r
def sigma_regularization(ctx, log_var, one): with nn.context_scope(ctx): h = F.exp(log_var) h = F.pow_scalar(h, 0.5) b = log_var.shape[0] r = F.sum(F.squared_error(h, one)) / b return r
def test_simple_loop(): nn.clear_parameters() x = nn.Variable.from_numpy_array(np.random.randn(10, 3, 128, 128)) t = nn.Variable.from_numpy_array(np.random.randint(0, 100, (10, ))) unet = UNet(num_classes=1, model_channels=128, output_channels=3, num_res_blocks=2, attention_resolutions=(16, 8), attention_num_heads=4, channel_mult=(1, 1, 2, 2, 4, 4)) y = unet(x, t) loss = F.mean(F.squared_error(y, x)) import nnabla.solvers as S solver = S.Sgd() solver.set_parameters(nn.get_parameters()) from tqdm import trange tr = trange(100) for i in tr: loss.forward(clear_no_need_grad=True) solver.zero_grad() loss.backward(clear_buffer=True) solver.update() tr.set_description(f"diff: {loss.d.copy():.5f}")
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var): #TODO: squared error/absolute error with nn.context_scope(ctx): loss_sr = F.mean(F.squared_error( F.softmax(pred0), F.softmax(pred1)) * F.exp(-log_var)) \ + F.mean(log_var) return loss_sr
def capsule_loss(v_norm, t_onehot, recon=None, x=None, m_pos=0.9, m_neg=0.1, wn=0.5, wr=0.0005): ''' Compute a margin loss given a length vector of output capsules and one-hot labels, and.optionally comptues a reconstruction loss. Margin loss is given in eq 4. Reconstruction loss is given in Sec 4.1. Args: v_norm (nnabla.Variable): A length vector of capsules. A shape of [B, capsules]. t_onehot (nnabla.Variable): A shape of [B, capsules]. recon (nnabla.Variable): Reconstruction output with a shape of [B, 1, 28, 28]. The values are in [0, 0.1]. x (nnabla.Variable): Reconstruction target (i.e. input) with a shape of [B, 1, 28, 28]. The values are in [0, 0.1]. m_pos (float): Margin of capsules corresponding targets. m_neg (float): Margin of capsules corresponding non-targets. wn (float): Weight of the non-target margin loss. wr (float): Weight of the reconstruction loss. Returns: nnabla.Variable: 0-dim ''' # Classification loss lp = F.sum(t_onehot * F.relu(m_pos - v_norm) ** 2) ln = F.sum((1 - t_onehot) * F.relu(v_norm - m_neg) ** 2) lmargin = lp + wn * ln if recon is None or x is None: return lmargin / v_norm.shape[0] # Reconstruction loss lr = F.sum(F.squared_error(recon, x)) # return (lmargin + wr * lr) / v_norm.shape[0] lmargin = lmargin / v_norm.shape[0] lmargin.persistent = True lreconst = (wr * lr) / v_norm.shape[0] lreconst.persistent = True return lmargin, lreconst, lmargin + lreconst
def siamese_loss(e0, e1, t, margin=1.0, eps=1e-4): dist = F.sum(F.squared_error(e0, e1), axis=1) # Squared distance # Contrastive loss sim_cost = t * dist dissim_cost = (1 - t) * (F.maximum_scalar(margin - (dist + eps)**(0.5), 0)**2) return F.mean(sim_cost + dissim_cost)
def create_network(batchsize, imheight, imwidth, args): import gc gc.collect() nnabla_ext.cuda.clear_memory_cache() anchors = args.num_anchors classes = args.num_classes yolo_x = nn.Variable((batchsize, 3, imheight, imwidth)) yolo_features = yolov2.yolov2(yolo_x, anchors, classes, test=False) nB = yolo_features.shape[0] nA = args.num_anchors nC = args.num_classes nH = yolo_features.shape[2] nW = yolo_features.shape[3] output = yolo_features.get_unlinked_variable(need_grad=True) # TODO: Workaround until v1.0.2. # Explicitly enable grad since need_grad option above didn't work. output.need_grad = True output = F.reshape(output, (nB, nA, (5 + nC), nH, nW)) output_splitted = F.split(output, 2) x, y, w, h, conf = [v.reshape((nB, nA, nH, nW)) for v in output_splitted[0:5]] x, y, conf = map(F.sigmoid, [x, y, conf]) cls = F.stack(*output_splitted[5:], axis=2) cls = cls.reshape((nB*nA, nC, nH*nW)) cls = F.transpose(cls, [0, 2, 1]).reshape((nB*nA*nH*nW, nC)) tx, ty, tw, th, tconf, coord_mask, conf_mask_sq = [ nn.Variable(v.shape) for v in [x, y, w, h, conf, x, conf]] cls_ones, cls_mask = [nn.Variable(cls.shape) for _ in range(2)] tcls, cls_mask_bb = [nn.Variable((cls.shape[0], 1)) for _ in range(2)] coord_mask_sq = F.pow_scalar(coord_mask, 2) loss_x = args.coord_scale * F.sum(F.squared_error(x, tx) * coord_mask_sq) loss_y = args.coord_scale * F.sum(F.squared_error(y, ty) * coord_mask_sq) loss_w = args.coord_scale * F.sum(F.squared_error(w, tw) * coord_mask_sq) loss_h = args.coord_scale * F.sum(F.squared_error(h, th) * coord_mask_sq) loss_conf = F.sum(F.squared_error(conf, tconf) * conf_mask_sq) loss_cls = args.class_scale * \ F.sum(cls_mask_bb * F.softmax_cross_entropy(cls + cls_ones - cls_mask, tcls)) loss_nnabla = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return yolo_x, yolo_features, (x, y, w, h, conf, cls), (tx, ty, tw, th, tconf, coord_mask, conf_mask_sq, cls_ones, cls_mask, tcls, cls_mask_bb), loss_nnabla
def _build(self): # inference self.infer_obs_t = nn.Variable((1,) + self.obs_shape) with nn.parameter_scope('trainable'): self.infer_policy_t = policy_network(self.infer_obs_t, self.action_size, 'actor') # training self.obss_t = nn.Variable((self.batch_size,) + self.obs_shape) self.acts_t = nn.Variable((self.batch_size, self.action_size)) self.rews_tp1 = nn.Variable((self.batch_size, 1)) self.obss_tp1 = nn.Variable((self.batch_size,) + self.obs_shape) self.ters_tp1 = nn.Variable((self.batch_size, 1)) # critic training with nn.parameter_scope('trainable'): q_t = q_network(self.obss_t, self.acts_t, 'critic') with nn.parameter_scope('target'): policy_tp1 = policy_network(self.obss_tp1, self.action_size, 'actor') q_tp1 = q_network(self.obss_tp1, policy_tp1, 'critic') y = self.rews_tp1 + self.gamma * q_tp1 * (1.0 - self.ters_tp1) self.critic_loss = F.mean(F.squared_error(q_t, y)) # actor training with nn.parameter_scope('trainable'): policy_t = policy_network(self.obss_t, self.action_size, 'actor') q_t_with_actor = q_network(self.obss_t, policy_t, 'critic') self.actor_loss = -F.mean(q_t_with_actor) # get neural network parameters with nn.parameter_scope('trainable'): with nn.parameter_scope('critic'): critic_params = nn.get_parameters() with nn.parameter_scope('actor'): actor_params = nn.get_parameters() # setup optimizers self.critic_solver = S.Adam(self.critic_lr) self.critic_solver.set_parameters(critic_params) self.actor_solver = S.Adam(self.actor_lr) self.actor_solver.set_parameters(actor_params) with nn.parameter_scope('trainable'): trainable_params = nn.get_parameters() with nn.parameter_scope('target'): target_params = nn.get_parameters() # build target update update_targets = [] sync_targets = [] for key, src in trainable_params.items(): dst = target_params[key] updated_dst = (1.0 - self.tau) * dst + self.tau * src update_targets.append(F.assign(dst, updated_dst)) sync_targets.append(F.assign(dst, src)) self.update_target_expr = F.sink(*update_targets) self.sync_target_expr = F.sink(*sync_targets)
def mnist_lenet_siamese(x0, x1, test=False): """""" h0 = mnist_lenet_feature(x0, test) h1 = mnist_lenet_feature(x1, test) # share weights # h = (h0 - h1) ** 2 # equivalent h = F.squared_error(h0, h1) p = F.sum(h, axis=1) return p
def feature_matching_loss(x, y, num=4): """ Calculate feature matching loss """ fm_loss = 0.0 for i in range(num): fm_loss += F.mean(F.squared_error(x[i], y[i])) return fm_loss
def sigmas_regularization(ctx, log_var0, log_var1): with nn.context_scope(ctx): h0 = F.exp(log_var0) h0 = F.pow_scalar(h0, 0.5) h1 = F.exp(log_var1) h1 = F.pow_scalar(h1, 0.5) r = F.mean(F.squared_error(h0, h1)) return r
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1): #TODO: squared error/absolute error s0 = F.exp(log_var0) s1 = F.exp(log_var1) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5 return loss_sr
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1): var0 = F.exp(log_var0) var1 = F.exp(log_var1) s0 = F.pow_scalar(var0, 0.5) s1 = F.pow_scalar(var0, 0.5) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss = F.log(s1/s0) + (var0/var1 + squared_error/var1) * 0.5 loss_sr = F.mean(loss) return loss_sr
def __call__(self, x, return_encoding_indices=False): x = F.transpose(x, (0, 2, 3, 1)) x_flat = x.reshape((-1, self.embedding_dim)) x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True), (x_flat.shape[0], self.num_embedding)) emb_wt_squared = F.transpose( F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0)) distances = x_flat_squared + emb_wt_squared - 2 * \ F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0))) encoding_indices = F.min(distances, only_index=True, axis=1, keepdims=True) encoding_indices.need_grad = False quantized = F.embed( encoding_indices.reshape(encoding_indices.shape[:-1]), self.embedding_weight).reshape(x.shape) if return_encoding_indices: return encoding_indices, F.transpose(quantized, (0, 3, 1, 2)) encodings = F.one_hot(encoding_indices, (self.num_embedding, )) e_latent_loss = F.mean( F.squared_error(quantized.get_unlinked_variable(need_grad=False), x)) q_latent_loss = F.mean( F.squared_error(quantized, x.get_unlinked_variable(need_grad=False))) loss = q_latent_loss + self.commitment_cost * e_latent_loss quantized = x + (quantized - x).get_unlinked_variable(need_grad=False) avg_probs = F.mean(encodings, axis=0) perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10))) return loss, F.transpose(quantized, (0, 3, 1, 2)), perplexity, encodings
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, log_s0, log_s1): v0 = F.exp(log_v0) v1 = F.exp(log_v1) squared_error = F.squared_error(pred0, pred1) s0 = F.exp(log_s0) s1 = F.exp(log_s1) with nn.context_scope(ctx): error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + (s0 / s1 + s1 / s0) loss_sr = F.mean(error) * 0.5 return loss_sr
def preservation_loss(self, x, target): r"""Returns content preservation loss. Args: x (nn.Variable): Input variable. target (nn.Variable): Target variable. Returns: nn.Variable: Output loss. """ loss = F.mean(F.squared_error(x, target)) return loss
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, log_s0, log_s1): v0 = F.exp(log_v0) v1 = F.exp(log_v1) squared_error = F.squared_error(pred0, pred1) s0 = F.exp(log_s0) s1 = F.exp(log_s1) with nn.context_scope(ctx): error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + ( s0 / s1 + s1 / s0) loss_sr = F.mean(error) * 0.5 return loss_sr
def mse(x, y, mask=None, eps=1e-5): # l2 distance and reduce mean se = F.squared_error(x, y) if mask is not None: assert se.shape[:2] == mask.shape[:2] se *= F.reshape(mask, se.shape) return F.sum(se) / (F.sum(mask) + eps) return F.mean(se)
def train(self): # variables for training tx_in = nn.Variable( [self._batch_size, self._x_input_length, self._cols_size]) tx_out = nn.Variable( [self._batch_size, self._x_output_length, self._cols_size]) tpred = self.network(tx_in, self._lstm_unit_name, self._lstm_units) tpred.persistent = True loss = F.mean(F.squared_error(tpred, tx_out)) solver = S.Adam(self._learning_rate) solver.set_parameters(nn.get_parameters()) # variables for validation vx_in = nn.Variable( [self._batch_size, self._x_input_length, self._cols_size]) vx_out = nn.Variable( [self._batch_size, self._x_output_length, self._cols_size]) vpred = self.network(vx_in, self._lstm_unit_name, self._lstm_units) # data iterators tdata = self._load_dataset(self._training_dataset_path, self._batch_size, shuffle=True) vdata = self._load_dataset(self._validation_dataset_path, self._batch_size, shuffle=True) # monitors from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(self._monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Validation error", monitor, interval=10) # Training loop for i in range(self._max_iter): if i % self._val_interval == 0: ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter) monitor_verr.add(i, ve / self._val_iter) te = self._train(tpred, solver, loss, tx_in, tx_out, tdata.next(), self._weight_decay) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, te) monitor_time.add(i) ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter) monitor_verr.add(i, ve / self._val_iter) # Save a best model parameters nn.save_parameters(self._model_params_path)
def spectral_loss(self, x, target): r"""Returns the multi-scale spectral loss. Args: x (nn.Variable): Input variable. target (nn.Variable): Target variable. Returns: nn.Variable: Multi-scale spectral loss. """ loss = [] for window_size in self.hp.window_sizes: sx = log_mel_spectrogram(x, self.hp.sr, window_size) st = log_mel_spectrogram(target, self.hp.sr, window_size) st.need_grad = False # avoid grads flowing though targets loss.append(F.mean(F.squared_error(sx, st))) return sum(loss)
def get_warp_loss(conf, rnn_length, frame_t, frame_t_pre, flow_lr): """ Warp loss """ input_frames = F.reshape(frame_t, (conf.train.batch_size * (rnn_length - 1), conf.train.crop_size, conf.train.crop_size, 3)) frame_t_pre_reshaped = F.reshape( frame_t_pre, (conf.train.batch_size * (rnn_length - 1), conf.train.crop_size, conf.train.crop_size, 3)) s_input_warp = warp_by_flow(frame_t_pre_reshaped, flow_lr) warp_loss = F.mean( F.sum(F.squared_error(input_frames, s_input_warp), axis=[3])) return warp_loss
def sr_loss_with_uncertainty_and_coef(ctx, pred0, pred1, log_var0, log_var1): c0 = srwu_learned_coef(ctx, log_var0) c1 = srwu_learned_coef(ctx, log_var1) sc0 = sigmas_learned_coef(ctx, log_var0, log_var1) sc1 = sigmas_learned_coef(ctx, log_var1, log_var0) c0.need_grad = False c1.need_grad = False sc0.need_grad = False sc1.need_grad = False #TODO: squared error/absolute error s0 = F.exp(log_var0) s1 = F.exp(log_var1) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss_sr = F.mean( squared_error * (c0 / s0 + c1 / s1) + (sc0 * s0 / s1 + sc1 * s1 / s0)) * 0.5 return loss_sr
def _build(self): generator_fn, discriminator_fn = self._network_funcs() # real shape ch, w, h = self.real.shape[1:] # inputs self.x = nn.Variable((1, ch, w, h)) self.y = nn.Variable((1, ch, w, h)) self.rec_x = nn.Variable((1, ch, w, h)) self.rec_y = nn.Variable((1, ch, w, h)) y_real = nn.Variable.from_numpy_array(self.real) y_real.persistent = True # padding inputs padded_x = _pad(self.x, self.kernel, self.num_layer) padded_rec_x = _pad(self.rec_x, self.kernel, self.num_layer) # generate fake image self.fake = generator_fn(x=padded_x, y=self.y) fake_without_grads = F.identity(self.fake) fake_without_grads.need_grad = False rec = generator_fn(x=padded_rec_x, y=self.rec_y) # discriminate images p_real = discriminator_fn(x=y_real) p_fake = discriminator_fn(x=self.fake) p_fake_without_grads = discriminator_fn(x=fake_without_grads) # gradient penalty for discriminator grad_penalty = _calc_gradient_penalty(y_real, fake_without_grads, discriminator_fn) # discriminator loss self.d_real_error = -F.mean(p_real) self.d_fake_error = F.mean(p_fake_without_grads) self.d_error = self.d_real_error + self.d_fake_error \ + self.lam_grad * grad_penalty # generator loss self.rec_error = F.mean(F.squared_error(rec, y_real)) self.g_fake_error = -F.mean(p_fake) self.g_error = self.g_fake_error + self.alpha_recon * self.rec_error
def __init__(self, num_actions, num_envs, batch_size, v_coeff, ent_coeff, lr_scheduler): # inference graph self.infer_obs_t = nn.Variable((num_envs, 4, 84, 84)) self.infer_pi_t,\ self.infer_value_t = cnn_network(self.infer_obs_t, num_actions, 'network') self.infer_t = F.sink(self.infer_pi_t, self.infer_value_t) # evaluation graph self.eval_obs_t = nn.Variable((1, 4, 84, 84)) self.eval_pi_t, _ = cnn_network(self.eval_obs_t, num_actions, 'network') # training graph self.obss_t = nn.Variable((batch_size, 4, 84, 84)) self.acts_t = nn.Variable((batch_size, 1)) self.rets_t = nn.Variable((batch_size, 1)) self.advs_t = nn.Variable((batch_size, 1)) pi_t, value_t = cnn_network(self.obss_t, num_actions, 'network') # value loss l2loss = F.squared_error(value_t, self.rets_t) self.value_loss = v_coeff * F.mean(l2loss) # policy loss log_pi_t = F.log(pi_t + 1e-20) a_one_hot = F.one_hot(self.acts_t, (num_actions, )) log_probs_t = F.sum(log_pi_t * a_one_hot, axis=1, keepdims=True) self.pi_loss = F.mean(log_probs_t * self.advs_t) # KL loss entropy = -ent_coeff * F.mean(F.sum(pi_t * log_pi_t, axis=1)) self.loss = self.value_loss - self.pi_loss - entropy self.params = nn.get_parameters() self.solver = S.RMSprop(lr_scheduler(0.0), 0.99, 1e-5) self.solver.set_parameters(self.params) self.lr_scheduler = lr_scheduler
def vgg16_perceptual_loss(fake, real): ''' VGG perceptual loss based on VGG-16 network. Assuming the values in fake and real are in [0, 255]. ''' from nnabla.models.imagenet import VGG16 class VisitFeatures(object): def __init__(self): self.features = [] self.relu_counter = 0 # self.features_at = set([1, 4, 7, 10]) : ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'] self.features_at = set([4, 7]) def __call__(self, f): if not f.name.startswith('ReLU'): return if self.relu_counter in self.features_at: self.features.append(f.outputs[0]) self.relu_counter += 1 vgg = VGG16() def get_features(x): o = vgg(x, use_up_to='lastconv') f = VisitFeatures() o.visit(f) return f with nn.parameter_scope("vgg16_loss"): fake_features = get_features(fake) real_features = get_features(real) return sum([ F.mean(F.squared_error(ff, fr)) for ff, fr in zip(fake_features.features, real_features.features) ])
def sr_loss(ctx, pred0, pred1): with nn.context_scope(ctx): pred_x_u0 = F.softmax(pred0) pred_x_u1 = F.softmax(pred1) loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1)) return loss_sr
def sr_loss(ctx, pred0, pred1): with nn.context_scope(ctx): loss_sr = F.mean(F.squared_error(pred0, pred1)) return loss_sr
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path x1 = nn.Variable([args.batch_size, 1, 28, 28]) #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1]) #z = vectorizer(x1,maxh = 1024) #fake = generator(z,maxh= 1024) z = vectorizer(x1) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. - 0.5 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() monitor_vec1.add(i, fake) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i))
def recon_loss(ctx, pred, x_l): with nn.context_scope(ctx): loss_recon = F.mean(F.squared_error(pred, x_l)) return loss_recon