def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error( di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate( solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. nnp_file = os.path.join( args.model_save_path, 'vat_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batchsize_v, 'outputs': {'y': hv}, 'names': {'x': xv}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() #image.d /= 255.0 solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) nnp_file = os.path.join(args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter)) runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': vpred }, 'names': { 'x': vimage } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y'] }] } save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage.d], [vimage], vpred, nnp_file)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) nnp_file = os.path.join( args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x0': vimage0, 'x1': vimage1}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x0', 'x1'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [ vimage0, vimage1], vpred, nnp_file)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_binary_connect_lenet_prediction if args.net == 'bincon': mnist_cnn_prediction = mnist_binary_connect_lenet_prediction elif args.net == 'binnet': mnist_cnn_prediction = mnist_binary_net_lenet_prediction elif args.net == 'bwn': mnist_cnn_prediction = mnist_binary_weight_lenet_prediction elif args.net == 'bincon_resnet': mnist_cnn_prediction = mnist_binary_connect_resnet_prediction elif args.net == 'binnet_resnet': mnist_cnn_prediction = mnist_binary_net_resnet_prediction elif args.net == 'bwn_resnet': mnist_cnn_prediction = mnist_binary_weight_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create predition graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nnp_file = os.path.join( args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x': vimage}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage.d], [ vimage], vpred, nnp_file)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join(args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [{ 'name': 'Generator', 'batch_size': args.batch_size, 'outputs': { 'G': fake }, 'names': { 'z': z } }, { 'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': { 'D': pred_real }, 'names': { 'x': x } }], 'executors': [{ 'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G'] }, { 'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D'] }] } save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((size,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) nnp_file = os.path.join( args.work_dir, 'wtov_%06d.nnp' % (args.max_epoch)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': size, 'outputs': {'e': hr}, 'names': {'w': xr}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['w'], 'output': ['e']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.work_dir, [xi], [xr], hr, nnp_file) exit() # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) nnp_file = os.path.join( args.model_save_path, 'resnet_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': v_model.pred.shape[0], 'outputs': {'y': v_model.pred}, 'names': {'x': v_model.image}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [v_model.image.d], [ v_model.image], v_model.pred, nnp_file)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) nnp_file = os.path.join(args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': vpred }, 'names': { 'x0': vimage0, 'x1': vimage1 } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x0', 'x1'], 'output': ['y'] }] } save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [vimage0, vimage1], vpred, nnp_file)