def test_nnp_graph_reshape(tmpdir, variable_batch_size, batch_size, shape): x = nn.Variable([10, 1, 28, 28, 10, 10]) y = F.reshape(x, shape=shape) contents = { 'networks': [ {'name': 'graph', 'batch_size': 1, 'outputs': {'y': y}, 'names': {'x': x}}]} from nnabla.utils.save import save tmppath = tmpdir.join('tmp_reshape.nnp') tmppath.ensure() nnp_file = tmppath.strpath save(nnp_file, contents, variable_batch_size=variable_batch_size) from nnabla.utils import nnp_graph nnp = nnp_graph.NnpLoader(nnp_file) graph = nnp.get_network('graph', batch_size=batch_size) x2 = graph.inputs['x'] y2 = graph.outputs['y'] if not variable_batch_size: assert x2.shape == x.shape assert y2.shape == y.shape return assert x2.shape[0] == batch_size assert y2.shape[0] == batch_size x2.d = np.random.randn(*x2.shape) shape2 = list(shape) shape2[0] = batch_size shape2[1:] = y.shape[1:] y2.forward() assert np.all(y2.d == x2.d.reshape(shape2))
def save_siamese_nnp(args): image = nn.Variable([1, 1, 28, 28]) feature = mnist_lenet_feature(image, test=True) contents = save_nnp({'x': image}, {'y': feature}, args.batch_size) save.save( os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
def check_nnp_graph_save_load(tmpdir, x, y, batch_size, variable_batch_size): # Save contents = { 'networks': [{ 'name': 'graph', 'batch_size': 1, 'outputs': { 'y': y }, 'names': { 'x': x } }] } from nnabla.utils.save import save tmpdir.ensure(dir=True) tmppath = tmpdir.join('tmp.nnp') nnp_file = tmppath.strpath save(nnp_file, contents, variable_batch_size=variable_batch_size) # Load from nnabla.utils import nnp_graph nnp = nnp_graph.NnpLoader(nnp_file) graph = nnp.get_network('graph', batch_size=batch_size) x2 = graph.inputs['x'] y2 = graph.outputs['y'] if not variable_batch_size: assert x2.shape == x.shape assert y2.shape == y.shape return x2, y2 assert x2.shape[0] == batch_size assert y2.shape[0] == batch_size return x2, y2
def test_nnp_graph(seed): rng = np.random.RandomState(seed) def unit(i, prefix): c1 = PF.convolution(i, 4, (3, 3), pad=(1, 1), name=prefix + '-c1') c2 = PF.convolution(F.relu(c1), 4, (3, 3), pad=(1, 1), name=prefix + '-c2') c = F.add2(c2, c1, inplace=True) return c x = nn.Variable([2, 3, 4, 4]) c1 = unit(x, 'c1') c2 = unit(x, 'c2') y = PF.affine(c2, 5, name='fc') runtime_contents = { 'networks': [{ 'name': 'graph', 'batch_size': 2, 'outputs': { 'y': y }, 'names': { 'x': x } }], } import tempfile tmpdir = tempfile.mkdtemp() import os nnp_file = os.path.join(tmpdir, 'tmp.nnp') try: from nnabla.utils.save import save save(nnp_file, runtime_contents) from nnabla.utils import nnp_graph nnp = nnp_graph.NnpLoader(nnp_file) finally: import shutil shutil.rmtree(tmpdir) graph = nnp.get_network('graph') x2 = graph.inputs['x'] y2 = graph.outputs['y'] d = rng.randn(*x.shape).astype(np.float32) x.d = d x2.d = d y.forward(clear_buffer=True) y2.forward(clear_buffer=True) from nbla_test_utils import ArrayDiffStats assert np.allclose(y.d, y2.d), str(ArrayDiffStats(y.d, y2.d))
def test_nnp_graph(seed, tmpdir): rng = np.random.RandomState(seed) def unit(i, prefix): c1 = PF.convolution(i, 4, (3, 3), pad=(1, 1), name=prefix + '-c1') c2 = PF.convolution(F.relu(c1), 4, (3, 3), pad=(1, 1), name=prefix + '-c2') c = F.add2(c2, c1, inplace=True) return c x = nn.Variable([2, 3, 4, 4]) c1 = unit(x, 'c1') c2 = unit(x, 'c2') y = PF.affine(c2, 5, name='fc') runtime_contents = { 'networks': [{ 'name': 'graph', 'batch_size': 2, 'outputs': { 'y': y }, 'names': { 'x': x } }], } tmpdir.ensure(dir=True) nnp_file = tmpdir.join('tmp.nnp').strpath from nnabla.utils.save import save save(nnp_file, runtime_contents) from nnabla.utils import nnp_graph nnp = nnp_graph.NnpLoader(nnp_file) graph = nnp.get_network('graph') x2 = graph.inputs['x'] y2 = graph.outputs['y'] d = rng.randn(*x.shape).astype(np.float32) x.d = d x2.d = d y.forward(clear_buffer=True) y2.forward(clear_buffer=True) assert_allclose(y.d, y2.d)
def save_nnp(args): image = nn.Variable([1, 1, 28, 28]) feature = mnist_lenet_feature(image, test=True) runtime_contents = { 'networks': [ {'name': 'Embedding', 'batch_size': 1, 'outputs': {'f': feature}, 'names': {'image': image}}], 'executors': [ {'name': 'Executor', 'network': 'Embedding', 'data': ['image'], 'output': ['f']}]} import nnabla.utils.save as save save.save(os.path.join(args.monitor_path, 'embedding.nnp'), runtime_contents)
def get_nnp(contents, tmpdir, need_file_object, file_type): import io from nnabla.utils.save import save from nnabla.utils import nnp_graph if file_type == '.nntxt' or file_type == '.prototxt': include_params = True else: include_params = False if need_file_object: nnp_object = io.BytesIO() if file_type == '.nnp' else io.StringIO() save(nnp_object, contents, extension=file_type, include_params=include_params) nnp_object.seek(0) nnp = nnp_graph.NnpLoader(nnp_object, extension=file_type) else: tmpdir.ensure(dir=True) nnp_file = tmpdir.join('tmp'+file_type).strpath save(nnp_file, contents, include_params=include_params) nnp = nnp_graph.NnpLoader(nnp_file) return nnp
def save_model_from_utils_save(nnp_file, model_def, input_shape, variable_batch_size): x = nn.Variable(input_shape) y = model_def(x) contents = { 'networks': [{ 'name': 'model', 'batch_size': 1, 'outputs': { 'y': y }, 'names': { 'x': x } }], 'executors': [{ 'name': 'runtime', 'network': 'model', 'data': ['x'], 'output': ['y'] }] } save.save(nnp_file, contents, variable_batch_size=variable_batch_size)
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) nnp_file = os.path.join( args.model_save_path, 'resnet_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': v_model.pred.shape[0], 'outputs': {'y': v_model.pred}, 'names': {'x': v_model.image}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [v_model.image.d], [ v_model.image], v_model.pred, nnp_file)
pprint.pprint(nn.get_parameters()['affine/W'].d) x.d = range(5) # Execute a forward pass y.forward() # Showing results print( "**** nnabla (python runtime): predictions: {}".format(y.d) ) # we will later check this output with the output of the runtime engine contents = { 'networks': [{ 'name': 'main', 'batch_size': 1, 'outputs': { 'y': y }, 'names': { 'x': x } }], 'executors': [{ 'name': 'runtime', 'network': 'main', 'data': ['x'], 'output': ['y'] }] } save('net.nnp', contents) # save the network
def save(self, fname, inputs, batch_size=1, net_name='net', deploy=False): """ Save QAT network model to NNP file as default. Args: fname (str): NNP file name. inputs (:obj:`nnabla.Variable` or list of :obj:`nnabla.Variable`): Network inputs variables. batch_size (int): batch size. net_name (str): network name. deploy (bool): Whether to apply QNN deployment conversion. deploy=True is not supported yet. Returns: None """ def _force_list(o): if isinstance(o, (tuple)): return list(o) if not isinstance(o, (list)): return [o] return o for i, elm in enumerate(self.registry): pred, training = elm if deploy: assert self.state == QNNState.training # TODO: Convert the training graph to deployment graph # TODO: Save as nnp (we have to define nicely) else: if training: continue from collections import defaultdict inps = defaultdict(list) otps = defaultdict(list) ec_data = [] ec_otps = [] inputs = _force_list(inputs) for i, inp in enumerate(inputs): key = 'x{}'.format(i) inps[key] = inp ec_data.append(key) outputs = _force_list(pred) for i, otp in enumerate(outputs): key = 'y{}'.format(i) otps[key] = otp ec_otps.append(key) contents = { 'networks': [ {'name': net_name, 'batch_size': batch_size, 'outputs': otps, 'names': inps }], 'executors': [ {'name': 'runtime', 'network': net_name, 'data': ec_data, 'outputs': ec_otps }] } from nnabla.utils.save import save save(fname, contents)
def train(): ''' Main script. ''' args = get_args() from numpy.random import seed seed(0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # TRAIN image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) x = image / 255.0 t_onehot = F.one_hot(label, (10, )) with nn.parameter_scope("capsnet"): c1, pcaps, u_hat, caps, pred = model.capsule_net( x, test=False, aug=True, grad_dynamic_routing=args.grad_dynamic_routing) with nn.parameter_scope("capsnet_reconst"): recon = model.capsule_reconstruction(caps, t_onehot) loss_margin, loss_reconst, loss = model.capsule_loss( pred, t_onehot, recon, x) pred.persistent = True # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) vx = vimage / 255.0 with nn.parameter_scope("capsnet"): _, _, _, _, vpred = model.capsule_net(vx, test=True, aug=False) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed train_iter = int(60000 / args.batch_size) val_iter = int(10000 / args.batch_size) logger.info("#Train: {} #Validation: {}".format(train_iter, val_iter)) monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=1) monitor_mloss = MonitorSeries("Training margin loss", monitor, interval=1) monitor_rloss = MonitorSeries("Training reconstruction loss", monitor, interval=1) monitor_err = MonitorSeries("Training error", monitor, interval=1) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=1) monitor_verr = MonitorSeries("Test error", monitor, interval=1) monitor_lr = MonitorSeries("Learning rate", monitor, interval=1) # To_save_nnp m_image, m_label, m_noise, m_recon = model_tweak_digitscaps( args.batch_size) contents = save_nnp({ 'x1': m_image, 'x2': m_label, 'x3': m_noise }, {'y': m_recon}, args.batch_size) save.save(os.path.join(args.monitor_path, 'capsnet_epoch0_result.nnp'), contents) # Initialize DataIterator for MNIST. from numpy.random import RandomState data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223)) vdata = data_iterator_mnist(args.batch_size, False) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Training loop. for e in range(start_point, args.max_epochs): # Learning rate decay learning_rate = solver.learning_rate() if e != 0: learning_rate *= 0.9 solver.set_learning_rate(learning_rate) monitor_lr.add(e, learning_rate) # Training train_error = 0.0 train_loss = 0.0 train_mloss = 0.0 train_rloss = 0.0 for i in range(train_iter): image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() train_error += categorical_error(pred.d, label.d) train_loss += loss.d train_mloss += loss_margin.d train_rloss += loss_reconst.d train_error /= train_iter train_loss /= train_iter train_mloss /= train_iter train_rloss /= train_iter # Validation val_error = 0.0 for j in range(val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) val_error += categorical_error(vpred.d, vlabel.d) val_error /= val_iter # Monitor monitor_time.add(e) monitor_loss.add(e, train_loss) monitor_mloss.add(e, train_mloss) monitor_rloss.add(e, train_rloss) monitor_err.add(e, train_error) monitor_verr.add(e, val_error) save_checkpoint(args.monitor_path, e, solver) # To_save_nnp contents = save_nnp({ 'x1': m_image, 'x2': m_label, 'x3': m_noise }, {'y': m_recon}, args.batch_size) save.save(os.path.join(args.monitor_path, 'capsnet_result.nnp'), contents)
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Supervised Loss * by Unlabeled Data * Calculate Virtual Adversarial Noise * Calculate Unsupervised Loss """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNIST Dataset from mnist_data import load_mnist, data_iterator_mnist images, labels = load_mnist(train=True) rng = np.random.RandomState(706) inds = rng.permutation(len(images)) def feed_labeled(i): j = inds[i] return images[j], labels[j] def feed_unlabeled(i): j = inds[i] return images[j], labels[j] di_l = data_iterator_simple(feed_labeled, args.n_labeled, args.batchsize_l, shuffle=True, rng=rng, with_file_cache=False) di_u = data_iterator_simple(feed_unlabeled, args.n_train, args.batchsize_u, shuffle=True, rng=rng, with_file_cache=False) di_v = data_iterator_mnist(args.batchsize_v, train=False) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False) yl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(yl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False) yu = forward(xu, test=False) y1 = yu.get_unlinked_variable() y1.need_grad = False noise = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True) r = noise / (F.sum(noise**2, [1, 2, 3], keepdims=True))**0.5 r.persistent = True y2 = forward(xu + args.xi_for_vat * r, test=False) y3 = forward(xu + args.eps_for_vat * r, test=False) loss_k = F.mean(distance(y1, y2)) loss_u = F.mean(distance(y1, y3)) # Net for evaluating validation data xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) err = F.mean(F.top_n_error(hv, tv, n=1)) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor training and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) contents = save_nnp({'x': xv}, {'y': hv}, 1) save.save(os.path.join(args.model_save_path, 'result_epoch0.nnp'), contents) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) ################################# ## Training by Labeled Data ##### ################################# # forward, backward and update xl.d, tl.d = di_l.next() xl.d = xl.d / 255 solver.zero_grad() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # Calculate y without noise, only once. xu.d, _ = di_u.next() xu.d = xu.d / 255 yu.forward(clear_buffer=True) ##### Calculate Adversarial Noise ##### # Do power method iteration noise.d = np.random.normal(size=xu.shape).astype(np.float32) for k in range(args.n_iter_for_power_method): r.grad.zero() loss_k.forward(clear_no_need_grad=True) loss_k.backward(clear_buffer=True) noise.data.copy_from(r.grad) ##### Calculate loss for unlabeled data ##### # forward, backward and update solver.zero_grad() loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) contents = save_nnp({'x': xv}, {'y': hv}, 1) save.save(os.path.join(args.model_save_path, 'result.nnp'), contents)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() #image.d /= 255.0 solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) nnp_file = os.path.join(args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter)) runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': vpred }, 'names': { 'x': vimage } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y'] }] } save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage.d], [vimage], vpred, nnp_file)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_binary_connect_lenet_prediction if args.net == 'bincon': mnist_cnn_prediction = mnist_binary_connect_lenet_prediction elif args.net == 'binnet': mnist_cnn_prediction = mnist_binary_net_lenet_prediction elif args.net == 'bwn': mnist_cnn_prediction = mnist_binary_weight_lenet_prediction elif args.net == 'bincon_resnet': mnist_cnn_prediction = mnist_binary_connect_resnet_prediction elif args.net == 'binnet_resnet': mnist_cnn_prediction = mnist_binary_net_resnet_prediction elif args.net == 'bwn_resnet': mnist_cnn_prediction = mnist_binary_weight_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # save_nnp contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size) save.save( os.path.join(args.model_save_path, '{}_result_epoch0.nnp'.format(args.net)), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) # save_nnp_lastepoch contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size) save.save( os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_binary_connect_lenet_prediction if args.net == 'bincon': mnist_cnn_prediction = mnist_binary_connect_lenet_prediction elif args.net == 'binnet': mnist_cnn_prediction = mnist_binary_net_lenet_prediction elif args.net == 'bwn': mnist_cnn_prediction = mnist_binary_weight_lenet_prediction elif args.net == 'bincon_resnet': mnist_cnn_prediction = mnist_binary_connect_resnet_prediction elif args.net == 'binnet_resnet': mnist_cnn_prediction = mnist_binary_net_resnet_prediction elif args.net == 'bwn_resnet': mnist_cnn_prediction = mnist_binary_weight_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create predition graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nnp_file = os.path.join( args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x': vimage}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage.d], [ vimage], vpred, nnp_file)
def train(): bs_train, bs_valid = args.train_batch_size, args.val_batch_size extension_module = args.context ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config ) nn.set_default_context(ctx) if args.input: train_loader, val_loader, n_train_samples, n_val_samples = load_data( bs_train, bs_valid ) else: train_data_source = data_source_cifar10( train=True, shuffle=True, label_shuffle=True ) val_data_source = data_source_cifar10(train=False, shuffle=False) n_train_samples = len(train_data_source.labels) n_val_samples = len(val_data_source.labels) # Data Iterator train_loader = data_iterator( train_data_source, bs_train, None, False, False) val_loader = data_iterator( val_data_source, bs_valid, None, False, False) if args.shuffle_label: if not os.path.exists(args.output): os.makedirs(args.output) np.save(os.path.join(args.output, "x_train.npy"), train_data_source.images) np.save( os.path.join(args.output, "y_shuffle_train.npy"), train_data_source.labels, ) np.save(os.path.join(args.output, "y_train.npy"), train_data_source.raw_label) np.save(os.path.join(args.output, "x_val.npy"), val_data_source.images) np.save(os.path.join(args.output, "y_val.npy"), val_data_source.labels) if args.model == "resnet23": model_prediction = resnet23_prediction elif args.model == "resnet56": model_prediction = resnet56_prediction prediction = functools.partial( model_prediction, ncls=10, nmaps=64, act=F.relu, seed=args.seed) # Create training graphs test = False image_train = nn.Variable((bs_train, 3, 32, 32)) label_train = nn.Variable((bs_train, 1)) pred_train, _ = prediction(image_train, test) loss_train = loss_function(pred_train, label_train) # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) label_valid = nn.Variable((bs_valid, 1)) pred_valid, _ = prediction(image_valid, test) loss_val = loss_function(pred_valid, label_valid) for param in nn.get_parameters().values(): param.grad.zero() cfg = read_yaml("./learning_rate.yaml") print(cfg) lr_sched = create_learning_rate_scheduler(cfg.learning_rate_config) solver = S.Momentum(momentum=0.9, lr=lr_sched.get_lr()) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=1) monitor_err = MonitorSeries("Training error", monitor, interval=1) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=1) monitor_verr = MonitorSeries("Test error", monitor, interval=1) monitor_vloss = MonitorSeries("Test loss", monitor, interval=1) # save_nnp contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid) save.save( os.path.join(args.model_save_path, (args.model+"_epoch0_result.nnp")), contents ) train_iter = math.ceil(n_train_samples / bs_train) val_iter = math.ceil(n_val_samples / bs_valid) # Training-loop for i in range(start_point, args.train_epochs): lr_sched.set_epoch(i) solver.set_learning_rate(lr_sched.get_lr()) print("Learning Rate: ", lr_sched.get_lr()) # Validation ve = 0.0 vloss = 0.0 print("## Validation") for j in range(val_iter): image, label = val_loader.next() image_valid.d = image label_valid.d = label loss_val.forward() vloss += loss_val.data.data.copy() * bs_valid ve += categorical_error(pred_valid.d, label) ve /= args.val_iter vloss /= n_val_samples monitor_verr.add(i, ve) monitor_vloss.add(i, vloss) if int(i % args.model_save_interval) == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Forward/Zerograd/Backward print("## Training") e = 0.0 loss = 0.0 for k in range(train_iter): image, label = train_loader.next() image_train.d = image label_train.d = label loss_train.forward() solver.zero_grad() loss_train.backward() solver.update() e += categorical_error(pred_train.d, label_train.d) loss += loss_train.data.data.copy() * bs_train e /= train_iter loss /= n_train_samples e = categorical_error(pred_train.d, label_train.d) monitor_loss.add(i, loss) monitor_err.add(i, e) monitor_time.add(i) nn.save_parameters( os.path.join(args.model_save_path, "params_%06d.h5" % (args.train_epochs)) ) # save_nnp_lastepoch contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid) save.save(os.path.join(args.model_save_path, (args.model+"_result.nnp")), contents)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.tiny_mode: # We use Tiny ImageNet from Stanford CS231N class. # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/) # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. # Please check README. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 else: # We use ImageNet. # (ImageNet, https://imagenet.herokuapp.com/) # ImageNet consists of 1000 categories, each category has 1280 images # in training set. The image size is various. To adapt ResNet into # 320x320 image inputs, the input image size of ResNet is set as # 224x224. We need to get tar file and create cache file(320x320 images). # Please check README. data = data_iterator_imagenet(args.batch_size, args.train_cachefile_dir) vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir) num_classes = 1000 t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode) t_model.pred.persistent = True # Not clearing buffer of pred in backward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. t_pred2 = t_model.pred.get_unlinked_variable() t_pred2.need_grad = False t_e = F.mean(F.top_n_error(t_pred2, t_model.label)) v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode) v_model.pred.persistent = True # Not clearing buffer of pred in forward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. v_pred2 = v_model.pred.get_unlinked_variable() v_pred2.need_grad = False v_e = F.mean(F.top_n_error(v_pred2, v_model.label)) # Save_nnp_Epoch0 contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'), contents) # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed("Validation time", monitor, interval=10) # Training loop. for i in range(start_point, args.max_iter): # Save parameters if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Validation if i % args.val_interval == 0 and i != 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) l += v_model.loss.d e += v_e.d monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) monitor_vtime.add(i) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() def accumulate_error(l, e, t_model, t_e): l += t_model.loss.d e += t_e.d return l, e # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) l, e = accumulate_error(l, e, t_model, t_e) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters( os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) # Save_nnp contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'), contents)
def save_nnp(nnp_file_name, nn_name, input, output, batchsize): content = _create_nnp_content(nn_name, input, output, batchsize) save.save(nnp_file_name, content) return content
def main(): """ Main script. Steps: * Setup calculation environment * Initialize data iterator. * Create Networks * Create Solver. * Training Loop. * Training * Test * Save """ # Set args args = get_args(monitor_path='tmp.monitor.vae', max_iter=60000, model_save_path=None, learning_rate=3e-4, batch_size=100, weight_decay=0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Initialize data provider di_l = data_iterator_mnist(args.batch_size, True) di_t = data_iterator_mnist(args.batch_size, False) # Network shape_x = (1, 28, 28) shape_z = (50, ) x = nn.Variable((args.batch_size, ) + shape_x) loss_l = vae(x, shape_z, test=False) loss_t = vae(x, shape_z, test=True) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitors for training and validation monitor = M.Monitor(args.model_save_path) monitor_training_loss = M.MonitorSeries("Training loss", monitor, interval=600) monitor_test_loss = M.MonitorSeries("Test loss", monitor, interval=600) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=600) # Save_nnp_at_epoch0 contents = save_nnp({'x': x}, {'y': loss_t}, args.batch_size) save.save( os.path.join(args.model_save_path, '{}_resultEpoch0.nnp'.format(args.net)), contents) # Training Loop. for i in range(args.max_iter): # Initialize gradients solver.zero_grad() # Forward, backward and update x.d, _ = di_l.next() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Forward for test x.d, _ = di_t.next() loss_t.forward(clear_no_need_grad=True) # Monitor for logging monitor_training_loss.add(i, loss_l.d.copy()) monitor_test_loss.add(i, loss_t.d.copy()) monitor_time.add(i) # Save the model nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter)) # save_nnp_lastepoch contents = save_nnp({'x': x}, {'y': loss_t}, args.batch_size) save.save( os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() from numpy.random import seed seed(0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == 'lenet': mnist_cnn_prediction = mnist_lenet_prediction elif args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction else: raise ValueError("Unknown network type {}".format(args.net)) # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. from numpy.random import RandomState data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223)) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) vpred.data.cast(np.float32, ctx) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() loss.data.cast(np.float32, ctx) pred.data.cast(np.float32, ctx) e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file) # append F.Softmax to the prediction graph so users see intuitive outputs runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': F.softmax(vpred) }, 'names': { 'x': vimage } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y'] }] } save.save( os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), runtime_contents)
def meta_train(args, shape_x, train_data, valid_data, test_data): # Build episode generators train_episode_generator = EpisodeGenerator( train_data[0], train_data[1], args.n_class_tr, args.n_shot_tr, args.n_query_tr) valid_episode_generator = EpisodeGenerator( valid_data[0], valid_data[1], args.n_class, args.n_shot, args.n_query) test_episode_generator = EpisodeGenerator( test_data[0], test_data[1], args.n_class, args.n_shot, args.n_query) # Build training model xs_t = nn.Variable((args.n_class_tr * args.n_shot_tr, ) + shape_x) xq_t = nn.Variable((args.n_class_tr * args.n_query_tr, ) + shape_x) hq_t = net(args.n_class_tr, xs_t, xq_t, args.embedding, args.net_type, args.metric, False) yq_t = nn.Variable((args.n_class_tr * args.n_query_tr, 1)) loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t)) # Build evaluation model xs_v = nn.Variable((args.n_class * args.n_shot, ) + shape_x) xq_v = nn.Variable((args.n_class * args.n_query, ) + shape_x) hq_v = net(args.n_class, xs_v, xq_v, args.embedding, args.net_type, args.metric, True) yq_v = nn.Variable((args.n_class * args.n_query, 1)) err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1)) # Setup solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor outputs monitor = Monitor(args.work_dir) monitor_loss = MonitorSeries( "Training loss", monitor, interval=args.iter_per_epoch) monitor_valid_err = MonitorSeries( "Validation error", monitor, interval=args.iter_per_valid) monitor_test_err = MonitorSeries("Test error", monitor) monitor_test_conf = MonitorSeries("Test error confidence", monitor) # Output files param_file = args.work_dir + "/params.h5" tsne_file = args.work_dir + "/tsne.png" # Save NNP batch_size = 1 contents = save_nnp({'x0': xs_v, 'x1': xq_v}, { 'y': hq_v}, batch_size) save.save(os.path.join(args.work_dir, 'MetricMetaLearning_epoch0.nnp'), contents, variable_batch_size=False) # Training loop train_losses = [] best_err = 1.0 for i in range(args.max_iteration): # Decay learning rate if (i + 1) % args.lr_decay_interval == 0: solver.set_learning_rate(solver.learning_rate() * args.lr_decay) # Create an episode xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next() # Training by the episode solver.zero_grad() loss_t.forward(clear_no_need_grad=True) loss_t.backward(clear_buffer=True) solver.update() train_losses.append(loss_t.d.copy()) # Evaluation if (i + 1) % args.iter_per_valid == 0: train_loss = np.mean(train_losses) train_losses = [] valid_errs = [] for k in range(args.n_episode_for_valid): xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) valid_errs.append(np.float(err_v.d.copy())) valid_err = np.mean(valid_errs) monitor_loss.add(i + 1, loss_t.d.copy()) monitor_valid_err.add(i + 1, valid_err * 100) if valid_err < best_err: best_err = valid_err nn.save_parameters(param_file) # Final evaluation nn.load_parameters(param_file) v_errs = [] for k in range(args.n_episode_for_test): xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) v_errs.append(np.float(err_v.d.copy())) v_err_mean = np.mean(v_errs) v_err_std = np.std(v_errs) v_err_conf = 1.96 * v_err_std / np.sqrt(args.n_episode_for_test) monitor_test_err.add(0, v_err_mean * 100) monitor_test_conf.add(0, v_err_conf * 100) # Visualization n_class = 50 n_sample = 20 visualize_episode_generator = EpisodeGenerator( train_data[0], train_data[1], n_class, 0, n_sample) _, samples, labels = visualize_episode_generator.next() u = get_embeddings(samples, conv4) v = get_tsne(u) plot_tsne(v[:, 0], v[:, 1], labels[:, 0], tsne_file) # Save NNP contents = save_nnp({'x0': xs_v, 'x1': xq_v}, { 'y': hq_v}, batch_size) save.save(os.path.join(args.work_dir, 'MetricMetaLearning.nnp'), contents, variable_batch_size=False)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size extension_module = args.context ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.net == "cifar10_resnet23": prediction = functools.partial( resnet23_prediction, ncls=10, nmaps=64, act=F.relu) data_iterator = data_iterator_cifar10 if args.net == "cifar100_resnet23": prediction = functools.partial( resnet23_prediction, ncls=100, nmaps=384, act=F.elu) data_iterator = data_iterator_cifar100 # Create training graphs test = False image_train = nn.Variable((args.batch_size, 3, 32, 32)) label_train = nn.Variable((args.batch_size, 1)) pred_train = prediction(image_train, test) loss_train = loss_function(pred_train, label_train) input_image_train = {"image": image_train, "label": label_train} # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = prediction(image_valid, test) input_image_valid = {"image": image_valid} # Solvers solver = S.Adam() solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Data Iterator tdata = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) # save_nnp contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_epoch0_result.nnp'.format(args.net)), contents) # Training-loop for i in range(start_point, args.max_iter): # Validation if i % int(n_train_samples / args.batch_size) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i, ve) if int(i % args.model_save_interval) == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Forward/Zerograd/Backward image, label = tdata.next() input_image_train["image"].d = image input_image_train["label"].d = label loss_train.forward() solver.zero_grad() loss_train.backward() # Solvers update solver.update() e = categorical_error( pred_train.d, input_image_train["label"].d) monitor_loss.add(i, loss_train.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nn.save_parameters(os.path.join(args.model_save_path, 'params_%06d.h5' % (args.max_iter))) # save_nnp_lastepoch contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
def train(): """ Main script. """ args = get_args() _ = nn.load_parameters(args.pretrained_model_path) if args.fine_tune: nnabla.parameter.pop_parameter('decoder/logits/affine/conv/W') nnabla.parameter.pop_parameter('decoder/logits/affine/conv/b') n_train_samples = args.train_samples n_val_samples = args.val_samples distributed = args.distributed compute_acc = args.compute_acc if distributed: # Communicator and Context from nnabla.ext_utils import get_extension_context extension_module = "cudnn" ctx = get_extension_context( extension_module, type_config=args.type_config) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank device_id = mpi_rank ctx.device_id = str(device_id) nn.set_default_context(ctx) else: # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) n_devices = 1 device_id = 0 # training data data = data_iterator_segmentation( args.train_samples, args.batch_size, args.train_dir, args.train_label_dir, target_width=args.image_width, target_height=args.image_height) # validation data vdata = data_iterator_segmentation(args.val_samples, args.batch_size, args.val_dir, args.val_label_dir, target_width=args.image_width, target_height=args.image_height) if distributed: data = data.slice( rng=None, num_of_slices=n_devices, slice_pos=device_id) vdata = vdata.slice( rng=None, num_of_slices=n_devices, slice_pos=device_id) num_classes = args.num_class # Workaround to start with the same initialized weights for all workers. np.random.seed(313) t_model = get_model( args, test=False) t_model.pred.persistent = True # Not clearing buffer of pred in backward t_pred2 = t_model.pred.unlinked() t_e = F.sum(F.top_n_error(t_pred2, t_model.label, axis=1) * t_model.mask) / F.sum(t_model.mask) v_model = get_model( args, test=True) v_model.pred.persistent = True # Not clearing buffer of pred in forward v_pred2 = v_model.pred.unlinked() v_e = F.sum(F.top_n_error(v_pred2, v_model.label, axis=1) * v_model.mask) / F.sum(t_model.mask) # Create Solver solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Load checkpoint start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Setting warmup. base_lr = args.learning_rate / n_devices warmup_iter = int(1. * n_train_samples / args.batch_size / args.accum_grad / n_devices) * args.warmup_epoch warmup_slope = base_lr * (n_devices - 1) / warmup_iter solver.set_learning_rate(base_lr) # Create monitor import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_miou = M.MonitorSeries("mean IOU", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed( "Validation time", monitor, interval=1) # save_nnp contents = save_nnp({'x': v_model.image}, { 'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Deeplabv3plus_result_epoch0.nnp'), contents, variable_batch_size=False) # Training loop for i in range(start_point, int(args.max_iter / n_devices)): # Save parameters if i % (args.model_save_interval // n_devices) == 0 and device_id == 0: save_checkpoint(args.model_save_path, i, solver) # Validation if i % (args.val_interval // n_devices) == 0 and i != 0: vmiou_local = 0. val_iter_local = n_val_samples // args.batch_size vl_local = nn.NdArray() vl_local.zero() ve_local = nn.NdArray() ve_local.zero() for j in range(val_iter_local): images, labels, masks = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.mask.d = masks v_model.image.data.cast(np.float32, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) vl_local += v_model.loss.data ve_local += v_e.data # Mean IOU computation if compute_acc: vmiou_local += compute_miou(num_classes, labels, np.argmax(v_model.pred.d, axis=1), masks) vl_local /= val_iter_local ve_local /= val_iter_local if compute_acc: vmiou_local /= val_iter_local vmiou_ndarray = nn.NdArray.from_numpy_array( np.array(vmiou_local)) if distributed: comm.all_reduce(vl_local, division=True, inplace=True) comm.all_reduce(ve_local, division=True, inplace=True) if compute_acc: comm.all_reduce(vmiou_ndarray, division=True, inplace=True) if device_id == 0: monitor_vloss.add(i * n_devices, vl_local.data.copy()) monitor_verr.add(i * n_devices, ve_local.data.copy()) if compute_acc: monitor_miou.add(i * n_devices, vmiou_local) monitor_vtime.add(i * n_devices) # Training l = 0.0 e = 0.0 solver.zero_grad() e_acc = nn.NdArray(t_e.shape) e_acc.zero() l_acc = nn.NdArray(t_model.loss.shape) l_acc.zero() # Gradient accumulation loop for j in range(args.accum_grad): images, labels, masks = data.next() t_model.image.d = images t_model.label.d = labels t_model.mask.d = masks t_model.image.data.cast(np.float32, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) e_acc += t_e.data l_acc += t_model.loss.data # AllReduce if distributed: params = [x.grad for x in nn.get_parameters().values()] comm.all_reduce(params, division=False, inplace=False) comm.all_reduce(l_acc, division=True, inplace=True) comm.all_reduce(e_acc, division=True, inplace=True) solver.scale_grad(1./args.accum_grad) solver.weight_decay(args.weight_decay) solver.update() # Linear Warmup if i <= warmup_iter: lr = base_lr + warmup_slope * i solver.set_learning_rate(lr) if distributed: # Synchronize by averaging the weights over devices using allreduce if (i+1) % args.sync_weight_every_itr == 0: weights = [x.data for x in nn.get_parameters().values()] comm.all_reduce(weights, division=True, inplace=True) if device_id == 0: monitor_loss.add( i * n_devices, (l_acc / args.accum_grad).data.copy()) monitor_err.add( i * n_devices, (e_acc / args.accum_grad).data.copy()) monitor_time.add(i * n_devices) # Learning rate decay at scheduled iter --> changed to poly learning rate decay policy # if i in args.learning_rate_decay_at: solver.set_learning_rate(base_lr * ((1 - i / args.max_iter)**0.1)) if device_id == 0: nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) contents = save_nnp({'x': v_model.image}, { 'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Deeplabv3plus_result.nnp'), contents, variable_batch_size=False)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join(args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [{ 'name': 'Generator', 'batch_size': args.batch_size, 'outputs': { 'G': fake }, 'names': { 'z': z } }, { 'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': { 'D': pred_real }, 'names': { 'x': x } }], 'executors': [{ 'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G'] }, { 'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D'] }] } save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((size,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) nnp_file = os.path.join( args.work_dir, 'wtov_%06d.nnp' % (args.max_epoch)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': size, 'outputs': {'e': hr}, 'names': {'w': xr}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['w'], 'output': ['e']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.work_dir, [xi], [xr], hr, nnp_file) exit() # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def train(): args = get_args() # Set context. from nnabla.ext_utils import get_extension_context logger.info("Running in {}:{}".format(args.context, args.type_config)) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir) _data_source = data_iterator._data_source # dirty hack... # model x = nn.Variable(shape=(args.batch_size, data_config.duration, 1)) # (B, T, 1) onehot = F.one_hot(x, shape=(data_config.q_bit_len, )) # (B, T, C) wavenet_input = F.transpose(onehot, (0, 2, 1)) # (B, C, T) # speaker embedding if args.use_speaker_id: s_id = nn.Variable(shape=(args.batch_size, 1)) with nn.parameter_scope("speaker_embedding"): s_emb = PF.embed(s_id, n_inputs=_data_source.n_speaker, n_features=WavenetConfig.speaker_dims) s_emb = F.transpose(s_emb, (0, 2, 1)) else: s_emb = None net = WaveNet() wavenet_output = net(wavenet_input, s_emb) pred = F.transpose(wavenet_output, (0, 2, 1)) # (B, T, 1) t = nn.Variable(shape=(args.batch_size, data_config.duration, 1)) loss = F.mean(F.softmax_cross_entropy(pred, t)) # for generation prob = F.softmax(pred) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # load checkpoint start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) # setup save env. audio_save_path = os.path.join(os.path.abspath(args.model_save_path), "audio_results") if audio_save_path and not os.path.exists(audio_save_path): os.makedirs(audio_save_path) # save_nnp contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Speechsynthesis_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): # todo: validation x.d, _speaker, t.d = data_iterator.next() if args.use_speaker_id: s_id.d = _speaker.reshape(-1, 1) solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() loss.data.cast(np.float32, ctx) monitor_loss.add(i, loss.d.copy()) if i % args.model_save_interval == 0: prob.forward() audios = mu_law_decode(np.argmax(prob.d, axis=-1), quantize=data_config.q_bit_len) # (B, T) save_audio(audios, i, audio_save_path) # save checkpoint file save_checkpoint(audio_save_path, i, solver) # save_nnp contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Speechsynthesis_result.nnp'), contents)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) nnp_file = os.path.join( args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x0': vimage0, 'x1': vimage1}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x0', 'x1'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [ vimage0, vimage1], vpred, nnp_file)
def save_net_nnp(self, path, inp, out, calc_latency=False, func_real_latency=None, func_accum_latency=None): """ Saves whole net as one nnp Calc whole net (real) latency (using e.g.Nnabla's [Profiler]) Calculate also layer-based latency The modules are discovered using the nnabla graph of the whole net The latency is then calculated based on each individual module's nnabla graph (e.g. [LatencyGraphEstimator]) Args: path inp: input of the created network out: output of the created network calc_latency: flag for calc latency func_real_latency: function to use to calc actual latency func_accum_latency: function to use to calc accum. latency, this is, dissecting the network layer by layer using the graph of the network, calculate the latency for each layer and add up all these results. """ batch_size = inp.shape[0] name = self.name filename = path + name + '.nnp' pathname = os.path.dirname(filename) upper_pathname = os.path.dirname(pathname) if not os.path.exists(upper_pathname): os.mkdir(upper_pathname) if not os.path.exists(pathname): os.mkdir(pathname) dict = {'0': inp} keys = ['0'] name_for_nnp = name if (name != '') else 'empty' contents = { 'networks': [{ 'name': name_for_nnp, 'batch_size': batch_size, 'outputs': { 'out': out }, 'names': dict }], 'executors': [{ 'name': 'runtime', 'network': name_for_nnp, 'data': keys, 'output': ['out'] }] } save(filename, contents, variable_batch_size=False) if calc_latency: acc_latency = func_accum_latency.get_estimation(out) filename = path + name + '.acclat' with open(filename, 'w') as f: print(acc_latency.__str__(), file=f) func_real_latency.run() real_latency = float(func_real_latency.result['forward_all']) filename = path + name + '.realat' with open(filename, 'w') as f: print(real_latency.__str__(), file=f) return real_latency, acc_latency else: return 0.0, 0.0
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def save_modules_nnp_by_mod( self, path, active_only=False, calc_latency=False, func_latency=None, ): """ *** Note: This function is deprecated. Use save_modules_nnp() *** Saves all modules of the network as individual nnp files, using folder structure given by name convention. The modules are extracted going over the module list, not over the graph structure. The latency is then calculated using the module themselves (e.g. [LatencyEstimator]) Args: path active_only: if True, only active modules are saved calc_latency: flag for calc latency func_latency: function to use to calc latency of each of the extracted modules This function needs to work based on the modules """ accum_lat = 0.0 mods = self.get_net_modules(active_only=active_only) for mi in mods: if type(mi) in self.modules_to_profile: if len(mi.input_shapes) == 0: continue pass inp = [nn.Variable((1, ) + si[1:]) for si in mi.input_shapes] out = mi.call(*inp) filename = path + mi.name + '.nnp' pathname = os.path.dirname(filename) upper_pathname = os.path.dirname(pathname) if not os.path.exists(upper_pathname): os.mkdir(upper_pathname) if not os.path.exists(pathname): os.mkdir(pathname) d_dict = {str(i): inpi for i, inpi in enumerate(inp)} d_keys = [str(i) for i, inpi in enumerate(inp)] name_for_nnp = mi.name if (mi.name != '') else 'empty' contents = { 'networks': [{ 'name': name_for_nnp, 'batch_size': 1, 'outputs': { 'out': out }, 'names': d_dict }], 'executors': [{ 'name': 'runtime', 'network': name_for_nnp, 'data': d_keys, 'output': ['out'] }] } if hasattr(mi, '_scope_name'): with nn.parameter_scope(mi._scope_name): save(filename, contents, variable_batch_size=False) else: save(filename, contents, variable_batch_size=False) if calc_latency: latency = func_latency.get_estimation(mi) filename = path + mi.name + '.acclat' with open(filename, 'w') as f: print(latency.__str__(), file=f) accum_lat += latency return accum_lat
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error( di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate( solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. nnp_file = os.path.join( args.model_save_path, 'vat_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batchsize_v, 'outputs': {'y': hv}, 'names': {'x': xv}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
def __call__(self, config): if config.iter != half_iter: return info = self.info datasets = [] with ExitStack() as stack: for d_name, d in info.datasets.items(): ds = {} ds['name'] = d_name ds['uri'] = d.uri ds['cache_dir'] = d.cache_dir di_instance = stack.enter_context( d.data_iterator()) ds['variables'] = [ var_name for var_name in di_instance.variables ] ds['batch_size'] = di_instance.batch_size ds['no_image_normalization'] = not d.normalize ds['shuffle'] = di_instance._shuffle datasets.append(ds) dataset_assign = set() for obj in itertools.chain(info.monitors.values(), info.executors.values(), info.optimizers.values()): for pv in obj.dataset_assign.keys(): dataset_assign.add(pv.name) contents = { 'global_config': { 'default_context': info.global_config.default_context }, 'training_config': { 'max_epoch': info.training_config.max_epoch, 'iter_per_epoch': info.training_config.iter_per_epoch, 'save_best': info.training_config.save_best }, 'networks': [{ 'name': n_name, 'batch_size': n.batch_size, 'outputs': { out: n.variables[out].variable_instance for out in n.outputs }, 'names': { inp: n.variables[inp].variable_instance for inp in itertools.chain( n.inputs, n.outputs) } } for n_name, n in info.networks.items()], 'executors': [{ 'name': e_name, 'network': e.network.name, 'data': [pv.name for pv in e.dataset_assign.keys()], 'generator_variables': [pv.name for pv in e.generator_assign.keys()], 'output': [pv.name for pv in e.output_assign.keys()] } for e_name, e in info.executors.items()], 'optimizers': [{ 'name': o_name, 'solver': o.solver, 'network': o.network.name, 'data_variables': {pv.name: d for pv, d in o.dataset_assign.items()}, 'generator_variables': [pv.name for pv in o.generator_assign.keys()], 'loss_variables': [pv.name for pv in o.loss_variables], 'dataset': [ds_name for ds_name in o.data_iterators.keys()], 'weight_decay': o.weight_decay, 'lr_decay': o.lr_decay, 'lr_decay_interval': o.lr_decay_interval, 'update_interval': o.update_interval } for o_name, o in info.optimizers.items()], 'datasets': datasets, 'monitors': [{ 'name': m_name, 'network': m.network.name, 'data_variables': {pv.name: d for pv, d in m.dataset_assign.items()}, 'generator_variables': [pv.name for pv in m.generator_assign.keys()], 'monitor_variables': [pv.name for pv in m.monitor_variables], 'dataset': [ds_name for ds_name in m.data_iterators.keys()] } for m_name, m in info.monitors.items()], } save.save(saved_nnp_file, contents, include_params, variable_batch_size)