def sigmoid_ce(logits, value, mask=None, eps=1e-5): # sigmoid cross entropy and reduce_mean sce = F.sigmoid_cross_entropy( logits, F.constant(val=value, shape=logits.shape)) if mask is not None: assert sce.shape[:2] == mask.shape[:2] sce *= F.reshape(mask, sce.shape) return F.sum(sce) / (F.sum(mask) + eps) return F.mean(sce)
def __init__(self, batch_size=32, learning_rate=1e-4, max_iter=5086, total_epochs=20, monitor_path=None, val_weight=None, model_load_path=None): """ Construct all the necessary attributes for the attribute classifier. Args: batch_size (int): number of samples contained in each generated batch learning_rate (float) : learning rate max_iter (int) : maximum iterations for an epoch total_epochs (int) : total epochs to train the model val_weight : sample weights monitor_path (str) : model parameter to be saved model_load_path (str) : load the model """ self.batch_size = batch_size # Resnet 50 # training graph model = ResNet50() self.input_image = nn.Variable((self.batch_size, ) + model.input_shape) self.label = nn.Variable([self.batch_size, 1]) # fine tuning pool = model(self.input_image, training=True, use_up_to='pool') self.clf = clf_resnet50(pool) self.clf.persistent = True # loss self.loss = F.mean(F.sigmoid_cross_entropy(self.clf, self.label)) # hyper parameters self.solver = S.Adam(learning_rate) self.solver.set_parameters(nn.get_parameters()) # validation graph self.x_v = nn.Variable((self.batch_size, ) + model.input_shape) pool_v = model(self.x_v, training=False, use_up_to='pool') self.v_clf = clf_resnet50(pool_v, train=False) self.v_clf_out = F.sigmoid(self.v_clf) self.print_freq = 100 self.validation_weight = val_weight # val params self.acc = 0.0 self.total_epochs = total_epochs self.max_iter = max_iter self.monitor_path = monitor_path if model_load_path is not None: _ = nn.load_parameters(model_load_path)
def adversarial_loss(self, results, v): r"""Returns the adversarial loss. Args: results (list): Output from discriminator. v (int, optional): Target value. Real=1.0, fake=0.0. Returns: nn.Variable: Output variable. """ loss = [] for out in results: t = F.constant(v, shape=out[-1].shape) r = F.sigmoid_cross_entropy(out[-1], t) loss.append(F.mean(r)) return sum(loss)
def update_graph(self, key='train'): r"""Builds the graph and update the placeholder. Args: key (str, optional): Type of computational graph. Defaults to 'train'. """ assert key in ('train', 'valid') self.model.training = key != 'valid' hp = self.hparams # define input variables x_txt = nn.Variable([hp.batch_size, hp.text_len]) x_mel = nn.Variable([hp.batch_size, hp.mel_len, hp.n_mels * hp.r]) x_gat = nn.Variable([hp.batch_size, hp.mel_len]) # output variables o_mel, o_mel_p, o_gat, o_att = self.model(x_txt, x_mel) o_mel = o_mel.apply(persistent=True) o_mel_p = o_mel_p.apply(persistent=True) o_gat = o_gat.apply(persistent=True) o_att = o_att.apply(persistent=True) # loss functions def criteria(x, t): return F.mean(F.squared_error(x, t)) l_mel = (criteria(o_mel, x_mel) + criteria(o_mel_p, x_mel)).apply(persistent=True) l_gat = F.mean(F.sigmoid_cross_entropy(o_gat, x_gat)).apply(persistent=True) l_net = (l_mel + l_gat).apply(persistent=True) self.placeholder[key] = { 'x_mel': x_mel, 'x_gat': x_gat, 'x_txt': x_txt, 'o_mel': o_mel, 'o_mel_p': o_mel_p, 'o_gat': o_gat, 'o_att': o_att, 'l_mel': l_mel, 'l_gat': l_gat, 'l_net': l_net } self.out_variables = ['train/l_mel', 'train/l_gat', 'train/l_net']
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((1,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def vae(x, shape_z, test=False): """ Function for calculate Elbo(evidence lowerbound) loss. This sample is a Bernoulli generator version. Args: x(`~nnabla.Variable`): N-D array shape_z(tuple of int): size of z test : True=train, False=test Returns: ~nnabla.Variable: Elbo loss """ ############################################# # Encoder of 2 fully connected layers # ############################################# # Normalize input xa = x / 256. batch_size = x.shape[0] # 2 fully connected layers, and Elu replaced from original Softplus. h = F.elu(PF.affine(xa, (500, ), name='fc1')) h = F.elu(PF.affine(h, (500, ), name='fc2')) # The outputs are the parameters of Gauss probability density. mu = PF.affine(h, shape_z, name='fc_mu') logvar = PF.affine(h, shape_z, name='fc_logvar') sigma = F.exp(0.5 * logvar) # The prior variable and the reparameterization trick if not test: # training with reparameterization trick epsilon = F.randn(mu=0, sigma=1, shape=(batch_size, ) + shape_z) z = mu + sigma * epsilon else: # test without randomness z = mu ############################################# # Decoder of 2 fully connected layers # ############################################# # 2 fully connected layers, and Elu replaced from original Softplus. h = F.elu(PF.affine(z, (500, ), name='fc3')) h = F.elu(PF.affine(h, (500, ), name='fc4')) # The outputs are the parameters of Bernoulli probabilities for each pixel. prob = PF.affine(h, (1, 28, 28), name='fc5') ############################################# # Elbo components and loss objective # ############################################# # Binarized input xb = F.greater_equal_scalar(xa, 0.5) # E_q(z|x)[log(q(z|x))] # without some constant terms that will canceled after summation of loss logqz = 0.5 * F.sum(1.0 + logvar, axis=1) # E_q(z|x)[log(p(z))] # without some constant terms that will canceled after summation of loss logpz = 0.5 * F.sum(mu * mu + sigma * sigma, axis=1) # E_q(z|x)[log(p(x|z))] logpx = F.sum(F.sigmoid_cross_entropy(prob, xb), axis=(1, 2, 3)) # Vae loss, the negative evidence lowerbound loss = F.mean(logpx + logpz - logqz) return loss
def train(args): x1 = nn.Variable([args.batch_size, 1, 28, 28]) z_vec = vectorizer(x1) z = z_vec.unlinked() fake2 = generator(z_vec) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake2, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) with nn.parameter_scope("vec"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. * 2 - 1.0 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() fake2.forward() monitor_vec1.add(i, fake2) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i)
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)
def classification_loss(x, label): return F.sum(F.sigmoid_cross_entropy(x, label)) / x.shape[0]
def idr_loss(camloc, raydir, alpha, color_gt, mask_obj, conf): # Setting B, R, _ = raydir.shape L = conf.layers D = conf.depth feature_size = conf.feature_size # Ray trace (visibility) x_hit, mask_hit, dists, mask_pin, mask_pout = \ ray_trace(partial(sdf_net, conf=conf), camloc, raydir, mask_obj, t_near=conf.t_near, t_far=conf.t_far, sphere_trace_itr=conf.sphere_trace_itr, ray_march_points=conf.ray_march_points, n_chunks=conf.n_chunks, max_post_itr=conf.max_post_itr, post_method=conf.post_method, eps=conf.eps) x_hit = x_hit.apply(need_grad=False) mask_hit = mask_hit.apply(need_grad=False, persistent=True) dists = dists.apply(need_grad=False) mask_pin = mask_pin.apply(need_grad=False) mask_pout = mask_pout.apply(need_grad=False) mask_us = mask_pin + mask_pout P = F.sum(mask_us) # Current points x_curr = (camloc.reshape((B, 1, 3)) + dists * raydir).apply(need_grad=True) # Eikonal loss bounding_box_size = conf.bounding_box_size x_free = F.rand(-bounding_box_size, bounding_box_size, shape=(B, R // 2, 3)) x_point = F.concatenate(*[x_curr, x_free], axis=1) sdf_xp, _, grad_xp = sdf_feature_grad(implicit_network, x_point, conf) gp = (F.norm(grad_xp, axis=[grad_xp.ndim - 1], keepdims=True) - 1.0)**2.0 loss_eikonal = F.sum(gp[:, :R, :] * mask_us) + F.sum(gp[:, R:, :]) loss_eikonal = loss_eikonal / (P + B * R // 2) loss_eikonal = loss_eikonal.apply(persistent=True) sdf_curr = sdf_xp[:, :R, :] grad_curr = grad_xp[:, :R, :] # Mask loss logit = -alpha.reshape([1 for _ in range(sdf_curr.ndim)]) * sdf_curr loss_mask = F.sigmoid_cross_entropy(logit, mask_obj) loss_mask = loss_mask * mask_pout loss_mask = F.sum(loss_mask) / P / alpha loss_mask = loss_mask.apply(persistent=True) # Lighting x_hat = sample_network(x_curr, sdf_curr, raydir, grad_curr) _, feature, grad = sdf_feature_grad(implicit_network, x_hat, conf) normal = grad color_pred = lighting_network(x_hat, normal, feature, -raydir, D) # Color loss loss_color = F.absolute_error(color_gt, color_pred) loss_color = loss_color * mask_pin loss_color = F.sum(loss_color) / P loss_color = loss_color.apply(persistent=True) # Total loss loss = loss_color + conf.mask_weight * \ loss_mask + conf.eikonal_weight * loss_eikonal return loss, loss_color, loss_mask, loss_eikonal, mask_hit
def loss(p, t): return F.mean(F.sum(F.sigmoid_cross_entropy(p, t), axis=1))
def train(batch_size, X_train, max_iter): from nnabla.ext_utils import get_extension_context context = "cpu" ctx = get_extension_context(context, device_id="0", type_config="float") nn.set_default_context(ctx) z = nn.Variable([batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True pred_fake = discriminator(fake) labels = func.constant(1, pred_fake.shape) loss_gen = func.mean(func.sigmoid_cross_entropy(pred_fake, labels)) fake_disc = fake.get_unlinked_variable(need_grad=True) pred_fake_disc = discriminator(fake_disc) disc_fake_label = func.constant(0, pred_fake_disc.shape) loss_disc_fake = func.mean( func.sigmoid_cross_entropy(pred_fake_disc, disc_fake_label)) r = nn.Variable([batch_size, 784]) real_pred = discriminator(r) disc_real_label = func.constant(0, real_pred.shape) loss_disc_real = func.mean( func.sigmoid_cross_entropy(pred_fake_disc, disc_real_label)) loss_disc = loss_disc_real + loss_disc_fake solver_gen = sol.Adam(0.0002, beta1=0.5) solver_disc = sol.Adam(0.0002, beta1=0.5) for i in range(0, max_iter): index = np.random.randint(0, X_train.shape[0], size=batch_size) input_image = X_train[index] r.d = input_image z.d = np.random.randn(*z.shape) solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(0.0001) solver_gen.update() solver_disc.zero_grad() loss_disc.forward(clear_no_need_grad=True) loss_disc.backward(clear_buffer=True) solver_disc.weight_decay(0.0001) solver_disc.update() print( "epoch-->[%d]-------loss_generator-->[%f]-------loss_discriminator-->[%f]" % (i, loss_gen.d, loss_disc.d)) if i % 100 == 0: with nn.parameter_scope("generator"): nn.save_parameters( "/home/vaibhav/deep_learning/gan/code/gen_weights/epoch_%d.h5" % i) with nn.parameter_scope("discriminator"): nn.save_parameters( "/home/vaibhav/deep_learning/gan/code/disc_weights/epoch_%d.h5" % i)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join(args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [{ 'name': 'Generator', 'batch_size': args.batch_size, 'outputs': { 'G': fake }, 'names': { 'z': z } }, { 'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': { 'D': pred_real }, 'names': { 'x': x } }], 'executors': [{ 'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G'] }, { 'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D'] }] } save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")