def test_graph_logreg(seed): rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4], need_grad=True) w = nn.Variable([12, 5], need_grad=True) b = nn.Variable([5], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) w.d = rng.randn(*w.shape) b.d = rng.randn(*b.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion with nn.auto_forward(): z = F.affine(x, w, b, 1) l = F.softmax_cross_entropy(z, t, 1) L = F.mean(l) # Backprop # Diff should be initialized since they are always accumulated x.g = 0 w.g = 0 b.g = 0 L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) inputs = [x, w, b] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1e-2)
def ce_loss_with_uncertainty(ctx, pred, y_l, log_var): r = F.randn(0., 1., log_var.shape) r = F.pow_scalar(F.exp(log_var), 0.5) * r h = pred + r with nn.context_scope(ctx): loss_ce = F.mean(F.softmax_cross_entropy(h, y_l)) return loss_ce
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 3) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1.05e-2)
def test_forward_backward(): batch_size, m, h, w = 4, 3, 32, 32 extension_module = "cpu" device_id = 0 ctx = extension_context(extension_module, device_id=device_id) x_l_data = np.random.randn(batch_size, m, h, w) y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32) x_l = nn.Variable(x_l_data.shape) y_l = nn.Variable(y_l_data.shape) x_l.d = x_l_data y_l.d = y_l_data pred = cnn_model_003(ctx, x_l) with nn.context_scope(ctx): loss = F.mean(F.softmax_cross_entropy(pred, y_l)) loss.forward() loss.backward()
def get_model(args, num_classes, test=False, tiny=False): """ Create computation graph and variables. Args: tiny: Tiny ImageNet mode if True. """ data_size = 320 nn_in_size = 224 if tiny: data_size = 64 nn_in_size = 56 image = nn.Variable([args.batch_size, 3, data_size, data_size]) label = nn.Variable([args.batch_size, 1]) pimage = image_preprocess(image, nn_in_size) pred, hidden = model_resnet.resnet_imagenet( pimage, num_classes, args.num_layers, args.shortcut_type, test=test, tiny=tiny) loss = F.mean(F.softmax_cross_entropy(pred, label)) Model = namedtuple('Model', ['image', 'label', 'pred', 'loss', 'hidden']) return Model(image, label, pred, loss, hidden)
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() L.forward(clear_no_need_grad=cnng) L.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() assert np.all(g == g2)
def loss_function(pred, label, label_smoothing=0.1): l = F.softmax_cross_entropy(pred, label) if label_smoothing <= 0: return l return (1 - label_smoothing) * l - label_smoothing * F.mean( F.log_softmax(pred), axis=1, keepdims=True)
def ce_loss(ctx, pred, y_l): with nn.context_scope(ctx): loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l)) return loss_ce
def train_and_eval(): # Settings args = get_args() n_class = args.n_class n_shot = args.n_shot n_query = args.n_query n_class_tr = args.n_class_tr n_shot_tr = args.n_shot_tr if n_shot_tr == 0: n_shot_tr = n_shot n_query_tr = args.n_query_tr if n_query_tr == 0: n_query_tr = n_query dataset = args.dataset dataset_root = args.dataset_root init_type = args.init_type embedding = args.embedding net_type = args.net_type metric = args.metric max_iteration = args.max_iteration lr_decay_interval = args.lr_decay_interval lr_decay = args.lr_decay iter_per_epoch = args.iter_per_epoch iter_per_valid = args.iter_per_valid n_episode_for_valid = args.n_episode_for_valid n_episode_for_test = args.n_episode_for_test work_dir = args.work_dir # Set context from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Monitor outputs from nnabla.monitor import Monitor, MonitorSeries monitor = Monitor(args.work_dir) monitor_loss = MonitorSeries( "Training loss", monitor, interval=iter_per_epoch) monitor_valid_err = MonitorSeries( "Validation error", monitor, interval=iter_per_valid) monitor_test_err = MonitorSeries("Test error", monitor) monitor_test_conf = MonitorSeries("Test error confidence", monitor) # Output files param_file = work_dir + "params.h5" tsne_file = work_dir + "tsne.png" # Load data shape_x = (1, 28, 28) train_data, valid_data, test_data = load_omniglot( dataset_root + "/omniglot/data/") train_episode_generator = EpisodeGenerator( n_class_tr, n_shot_tr, n_query_tr, shape_x, train_data) valid_episode_generator = EpisodeGenerator( n_class, n_shot, n_query, shape_x, valid_data) test_episode_generator = EpisodeGenerator( n_class, n_shot, n_query, shape_x, test_data) # Build training model xs_t = nn.Variable((n_class_tr * n_shot_tr, ) + shape_x) xq_t = nn.Variable((n_class_tr * n_query_tr, ) + shape_x) hq_t = net(n_class_tr, xs_t, xq_t, init_type, embedding, net_type, metric, False) yq_t = nn.Variable((n_class_tr * n_query_tr, 1)) loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t)) # Build evaluation model xs_v = nn.Variable((n_class * n_shot, ) + shape_x) xq_v = nn.Variable((n_class * n_query, ) + shape_x) hq_v = net(n_class, xs_v, xq_v, init_type, embedding, net_type, metric, True) yq_v = nn.Variable((n_class * n_query, 1)) err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1)) # Setup solver solver = S.Adam(1.0e-3) solver.set_parameters(nn.get_parameters()) learning_rate_decay_activate = True # Training loop train_losses = [] best_err = 1.0 for i in range(max_iteration): # Decay learning rate if learning_rate_decay_activate and ((i + 1) % lr_decay_interval == 0): solver.set_learning_rate(solver.learning_rate() * lr_decay) # Create an episode xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next() # Training by the episode solver.zero_grad() loss_t.forward(clear_no_need_grad=True) loss_t.backward(clear_buffer=True) solver.update() train_losses.append(loss_t.d.copy()) # Evaluation if (i + 1) % iter_per_valid == 0: train_loss = np.mean(train_losses) train_losses = [] valid_errs = [] for k in range(n_episode_for_valid): xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) valid_errs.append(np.float(err_v.d.copy())) valid_err = np.mean(valid_errs) #monitor_loss.add(i + 1, train_loss) monitor_valid_err.add(i + 1, valid_err * 100) if valid_err < best_err: best_err = valid_err nn.save_parameters(param_file) # Final evaluation nn.load_parameters(param_file) v_errs = [] for k in range(n_episode_for_test): xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) v_errs.append(np.float(err_v.d.copy())) v_err = np.mean(v_errs) v_err_conf = 1.96 * np.std(v_errs) / np.sqrt(n_episode_for_test) monitor_test_err.add(0, v_err * 100) monitor_test_conf.add(0, v_err_conf) # Visualization n_class = 50 n_sample = 20 batch = test_data[:n_class].reshape(n_class * n_sample, 1, 28, 28) label = [] for i in range(n_class): label.extend(np.ones(n_sample) * (i % 50)) u = get_embeddings(batch, conv4) v = get_tsne(u) plot_tsne(v[:, 0], v[:, 1], label, tsne_file)
def train(): args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == "cifar10_resnet23_prediction": model_prediction = cifar10_resnet23_prediction elif args.net == 'cifar10_binary_connect_resnet23_prediction': model_prediction = cifar10_binary_connect_resnet23_prediction elif args.net == 'cifar10_binary_net_resnet23_prediction': model_prediction = cifar10_binary_net_resnet23_prediction elif args.net == 'cifar10_binary_weight_resnet23_prediction': model_prediction = cifar10_binary_weight_resnet23_prediction elif args.net == 'cifar10_fp_connect_resnet23_prediction': model_prediction = functools.partial( cifar10_fp_connect_resnet23_prediction, n=args.bit_width, delta=args.delta) elif args.net == 'cifar10_fp_net_resnet23_prediction': model_prediction = functools.partial( cifar10_fp_net_resnet23_prediction, n=args.bit_width, delta=args.delta) elif args.net == 'cifar10_pow2_connect_resnet23_prediction': model_prediction = functools.partial( cifar10_pow2_connect_resnet23_prediction, n=args.bit_width, m=args.upper_bound) elif args.net == 'cifar10_pow2_net_resnet23_prediction': model_prediction = functools.partial( cifar10_pow2_net_resnet23_prediction, n=args.bit_width, m=args.upper_bound) elif args.net == 'cifar10_inq_resnet23_prediction': model_prediction = functools.partial(cifar10_inq_resnet23_prediction, num_bits=args.bit_width) # TRAIN maps = 64 data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) label = nn.Variable([args.batch_size, 1]) # Create model_prediction graph. pred = model_prediction(image, maps=maps, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = model_prediction(vimage, maps=maps, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def loss_function(pred, label, reduction='mean'): loss_dict = { 'mean': F.mean(F.softmax_cross_entropy(pred, label)), 'sum': F.sum(F.softmax_cross_entropy(pred, label)) } return loss_dict[reduction]
def ce_loss(ctx, pred, y_l): with nn.context_scope(ctx): loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l)) return loss_ce
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for CIFAR10. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ # define training parameters augmented_shift = True augmented_flip = True batch_size = 128 vbatch_size = 100 num_classes = 10 weight_decay = 0.0002 momentum = 0.9 learning_rates = (cfg.initial_learning_rate,)*80 + \ (cfg.initial_learning_rate / 10.,)*40 + \ (cfg.initial_learning_rate / 100.,)*40 print('lr={}'.format(learning_rates)) print('weight_decay={}'.format(weight_decay)) print('momentum={}'.format(momentum)) # create nabla context from nnabla.ext_utils import get_extension_context ctx = get_extension_context('cudnn', device_id=args.gpu) nn.set_default_context(ctx) # Initialize DataIterator for CIFAR10. logger.info("Get CIFAR10 Data ...") data = cifar_data.DataIterator(batch_size, augmented_shift=augmented_shift, augmented_flip=augmented_flip) vdata = cifar_data.DataIterator(vbatch_size, val=True) if cfg.weightfile is not None: logger.info(f"Loading weights from {cfg.weightfile}") nn.load_parameters(cfg.weightfile) # TRAIN # Create input variables. image = nn.Variable([batch_size, 3, 32, 32]) label = nn.Variable([batch_size, 1]) # Create prediction graph. pred, hidden = resnet_cifar10(image, num_classes=num_classes, cfg=cfg, test=False) pred.persistent = True # Compute initial network size num_weights, kbytes_weights = network_size_weights() kbytes_weights.forward() print(f"Initial network size (weights) is {float(kbytes_weights.d):.3f}KB " f"(total number of weights: {int(num_weights):d}).") num_activations, kbytes_activations = network_size_activations() kbytes_activations.forward() print( f"Initial network size (activations) is {float(kbytes_activations.d):.3f}KB " f"(total number of activations: {int(num_activations):d}).") # Create loss function. cost_lambda2 = nn.Variable(()) cost_lambda2.d = cfg.initial_cost_lambda2 cost_lambda2.persistent = True cost_lambda3 = nn.Variable(()) cost_lambda3.d = cfg.initial_cost_lambda3 cost_lambda3.persistent = True loss1 = F.mean(F.softmax_cross_entropy(pred, label)) loss1.persistent = True if cfg.target_weight_kbytes > 0: loss2 = F.relu(kbytes_weights - cfg.target_weight_kbytes)**2 loss2.persistent = True else: loss2 = nn.Variable(()) loss2.d = 0 loss2.persistent = True if cfg.target_activation_kbytes > 0: loss3 = F.relu(kbytes_activations - cfg.target_activation_kbytes)**2 loss3.persistent = True else: loss3 = nn.Variable(()) loss3.d = 0 loss3.persistent = True loss = loss1 + cost_lambda2 * loss2 + cost_lambda3 * loss3 # VALID # Create input variables. vimage = nn.Variable([vbatch_size, 3, 32, 32]) vlabel = nn.Variable([vbatch_size, 1]) # Create predition graph. vpred, vhidden = resnet_cifar10(vimage, num_classes=num_classes, cfg=cfg, test=True) vpred.persistent = True # Create Solver. if cfg.optimizer == "adam": solver = S.Adam(alpha=learning_rates[0]) else: solver = S.Momentum(learning_rates[0], momentum) solver.set_parameters(nn.get_parameters()) # Training loop (epochs) logger.info("Start Training ...") i = 0 best_v_err = 1.0 # logs of the results iters = [] res_train_err = [] res_train_loss = [] res_val_err = [] # print all variables that exist for k in nn.get_parameters(): print(k) res_n_b = collections.OrderedDict() res_n_w = collections.OrderedDict() res_n_a = collections.OrderedDict() res_d_b = collections.OrderedDict() res_d_w = collections.OrderedDict() res_d_a = collections.OrderedDict() res_xmin_b = collections.OrderedDict() res_xmin_w = collections.OrderedDict() res_xmin_a = collections.OrderedDict() res_xmax_b = collections.OrderedDict() res_xmax_w = collections.OrderedDict() res_xmax_a = collections.OrderedDict() for k in nn.get_parameters(): if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'bquant'): res_n_b[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'Wquant'): res_n_w[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'n') and (k.split('/')[-3] == 'Aquant'): res_n_a[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'bquant'): res_d_b[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'Wquant'): res_d_w[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'd') and (k.split('/')[-3] == 'Aquant'): res_d_a[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'bquant'): res_xmin_b[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'Wquant'): res_xmin_w[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmin') and (k.split('/')[-3] == 'Aquant'): res_xmin_a[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'bquant'): res_xmax_b[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'Wquant'): res_xmax_w[k] = [] for k in nn.get_parameters(): if (k.split('/')[-1] == 'xmax') and (k.split('/')[-3] == 'Aquant'): res_xmax_a[k] = [] for epoch in range(len(learning_rates)): train_loss = list() train_loss1 = list() train_loss2 = list() train_loss3 = list() train_err = list() # check whether we need to adapt the learning rate if epoch > 0 and learning_rates[epoch - 1] != learning_rates[epoch]: solver.set_learning_rate(learning_rates[epoch]) # Training loop (iterations) start_epoch = True while data.current != 0 or start_epoch: start_epoch = False # Next batch image.d, label.d = data.next() # Training forward/backward solver.zero_grad() loss.forward() loss.backward() if weight_decay is not None: solver.weight_decay(weight_decay) # scale gradients if cfg.target_weight_kbytes > 0 or cfg.target_activation_kbytes > 0: clip_quant_grads() solver.update() e = categorical_error(pred.d, label.d) train_loss += [loss.d] train_loss1 += [loss1.d] train_loss2 += [loss2.d] train_loss3 += [loss3.d] train_err += [e] # make sure that parametric values are clipped to correct values (if outside) clip_quant_vals() # Intermediate Validation (when constraint is set and fulfilled) kbytes_weights.forward() kbytes_activations.forward() if ((cfg.target_weight_kbytes > 0 and (cfg.target_weight_kbytes <= 0 or float(kbytes_weights.d) <= cfg.target_weight_kbytes) and (cfg.target_activation_kbytes <= 0 or float( kbytes_activations.d) <= cfg.target_activation_kbytes))): ve = list() start_epoch_ = True while vdata.current != 0 or start_epoch_: start_epoch_ = False vimage.d, vlabel.d = vdata.next() vpred.forward() ve += [categorical_error(vpred.d, vlabel.d)] v_err = np.array(ve).mean() if v_err < best_v_err: best_v_err = v_err nn.save_parameters( os.path.join(cfg.params_dir, 'params_best.h5')) print( f'Best validation error (fulfilling constraints: {best_v_err}' ) save_nnp(os.path.join(cfg.params_dir, nnp_out_name), 'resnet20', batch_size, image, vimage, pred, vpred, loss) sys.stdout.flush() sys.stderr.flush() i += 1 # Validation ve = list() start_epoch = True while vdata.current != 0 or start_epoch: start_epoch = False vimage.d, vlabel.d = vdata.next() vpred.forward() ve += [categorical_error(vpred.d, vlabel.d)] v_err = np.array(ve).mean() kbytes_weights.forward() kbytes_activations.forward() if ((v_err < best_v_err and (cfg.target_weight_kbytes <= 0 or float(kbytes_weights.d) <= cfg.target_weight_kbytes) and (cfg.target_activation_kbytes <= 0 or float(kbytes_activations.d) <= cfg.target_activation_kbytes))): best_v_err = v_err nn.save_parameters(os.path.join(cfg.params_dir, 'params_best.h5')) save_nnp(os.path.join(cfg.params_dir, nnp_out_name), 'resnet20', batch_size, image, vimage, pred, vpred, loss) sys.stdout.flush() sys.stderr.flush() if cfg.target_weight_kbytes > 0: print( f"Current network size (weights) is {float(kbytes_weights.d):.3f}KB " f"(#params: {int(num_weights)}, " f"avg. bitwidth: {8. * 1024. * kbytes_weights.d / num_weights})" ) sys.stdout.flush() sys.stderr.flush() if cfg.target_activation_kbytes > 0: print( f"Current network size (activations) is {float(kbytes_activations.d):.3f}KB" ) sys.stdout.flush() sys.stderr.flush() for k in nn.get_parameters(): if k.split('/')[-1] == 'n': print(f'{k}', f'{nn.get_parameters()[k].d}', f'{nn.get_parameters()[k].g}') sys.stdout.flush() sys.stderr.flush() if k.split('/')[-3] == 'bquant': res_n_b[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Wquant': res_n_w[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Aquant': res_n_a[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-1] == 'd': print(f'{k}', f'{nn.get_parameters()[k].d}', f'{nn.get_parameters()[k].g}') sys.stdout.flush() sys.stderr.flush() if k.split('/')[-3] == 'bquant': res_d_b[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Wquant': res_d_w[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Aquant': res_d_a[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-1] == 'xmin': print(f'{k}', f'{nn.get_parameters()[k].d}', f'{nn.get_parameters()[k].g}') sys.stdout.flush() sys.stderr.flush() if k.split('/')[-3] == 'bquant': res_xmin_b[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Wquant': res_xmin_w[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Aquant': res_xmin_a[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-1] == 'xmax': print(f'{k}', f'{nn.get_parameters()[k].d}', f'{nn.get_parameters()[k].g}') sys.stdout.flush() sys.stderr.flush() if k.split('/')[-3] == 'bquant': res_xmax_b[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Wquant': res_xmax_w[k].append(np.asscalar(nn.get_parameters()[k].d)) elif k.split('/')[-3] == 'Aquant': res_xmax_a[k].append(np.asscalar(nn.get_parameters()[k].d)) # Print logger.info(f'epoch={epoch}(iter={i}); ' f'overall cost={np.array(train_loss).mean()}; ' f'cross-entropy cost={np.array(train_loss1).mean()}; ' f'weight-size cost={np.array(train_loss2).mean()}; ' f'activations-size cost={np.array(train_loss3).mean()}; ' f'TrainErr={np.array(train_err).mean()}; ' f'ValidErr={v_err}; BestValidErr={best_v_err}') sys.stdout.flush() sys.stderr.flush() # update the logs iters.append(i) res_train_err.append(np.array(train_err).mean()) res_train_loss.append([ np.array(train_loss).mean(), np.array(train_loss1).mean(), np.array(train_loss2).mean(), np.array(train_loss3).mean() ]) res_val_err.append(np.array(v_err).mean()) res_ges = np.concatenate([ np.array(iters)[:, np.newaxis], np.array(res_train_err)[:, np.newaxis], np.array(res_val_err)[:, np.newaxis], np.array(res_train_loss) ], axis=-1) # save the results np.savetxt(cfg.params_dir + '/results.csv', np.array(res_ges), fmt='%10.8f', header='iter,train_err,val_err,loss,loss1,loss2,loss3', comments='', delimiter=',') for rs, res in zip([ 'res_n_b.csv', 'res_n_w.csv', 'res_n_a.csv', 'res_d_b.csv', 'res_d_w.csv', 'res_d_a.csv', 'res_min_b.csv', 'res_min_w.csv', 'res_min_a.csv', 'res_max_b.csv', 'res_max_w.csv', 'res_max_a.csv' ], [ res_n_b, res_n_w, res_n_a, res_d_b, res_d_w, res_d_a, res_xmin_b, res_xmin_w, res_xmin_a, res_xmax_b, res_xmax_w, res_xmax_a ]): res_mat = np.array([res[i] for i in res]) if res_mat.shape[0] > 1 and res_mat.shape[1] > 1: np.savetxt( cfg.params_dir + '/' + rs, np.array([[i, j, res_mat[i, j]] for i, j in product( range(res_mat.shape[0]), range(res_mat.shape[1]))]), fmt='%10.8f', comments='', delimiter=',')
def __init__(self, solver, tinput=None, tlabel=None, tpred=None, tdata=None, vinput=None, vlabel=None, vpred=None, vdata=None, monitor_path=None, model_save_path=None, max_epoch=1, iter_per_epoch=None, val_iter=None): # Monitors monitor = Monitor(monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_vloss = MonitorSeries("Valid loss", monitor, interval=1) monitor_verr = MonitorSeries("Valid error", monitor, interval=1) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10) # Loss and error tpred = tpred.apply(persistent=True) tloss = F.mean(F.softmax_cross_entropy(tpred, tlabel)) terror = F.mean(F.top_n_error(tpred.get_unlinked_variable(), tlabel)) vpred = vpred.apply(persistent=True) vloss = F.mean(F.softmax_cross_entropy(vpred, vlabel)) verror = F.mean(F.top_n_error(vpred.get_unlinked_variable(), vlabel)) # Updater def tdata_feeder(): tinput.d, tlabel.d = tdata.next() def forward_callback_on_finish(i): terror.forward() def update_callback_on_finish(i): monitor_loss.add(i, tloss.d) monitor_err.add(i, terror.d) monitor_time.add(i) updater = Updater( solver, tloss, data_feeder=tdata_feeder, forward_callback_on_finish=forward_callback_on_finish, update_callback_on_finish=update_callback_on_finish) # Evaluator def vdata_feeder(): vinput.d, vlabel.d = vdata.next() def vloss_callback_on_finish(i, v): monitor_vloss.add(i, v) def verror_callback_on_finish(i, v): monitor_verr.add(i, v) val_iter = val_iter if val_iter is not None else vdata.size // vdata.batch_size evaluator = Evaluator([vloss, verror], data_feeder=vdata_feeder, val_iter=val_iter, callback_on_finish=[ vloss_callback_on_finish, verror_callback_on_finish ]) # Trainer iter_per_epoch = iter_per_epoch if iter_per_epoch is not None \ else tdata.size // tdata.batch_size self.trainer = Trainer(updater, evaluator, model_save_path, max_epoch=max_epoch, iter_per_epoch=iter_per_epoch)
def train(): ''' Run D3Net Semantic Segmentation Training ''' # Check NNabla version if get_nnabla_version_integer() < 12100: raise ValueError( 'This code does not work with nnabla version less than v1.21.0 since [ignore index less than 0](https://github.com/sony/nnabla/pull/945) is added in v1.21.0 . Please update the nnabla version.') args = get_args() # Load D3Net Hyper parameters (D3Net-L or D3Net-S) with open(args.config_file) as file: hparams = yaml.load(file, Loader=yaml.FullLoader) # Get context. ctx = get_extension_context(args.context, device_id=0) comm = CommunicatorWrapper(ctx) nn.set_default_context(comm.ctx) # Change max_iter, learning_rate and weight_decay according no. of gpu devices for multi-gpu training. default_batch_size = 8 train_scale_factor = comm.n_procs * \ (hparams['batch_size'] / default_batch_size) hparams['max_iter'] = int(hparams['max_iter'] // train_scale_factor) hparams['lr'] = hparams['lr'] * train_scale_factor hparams['min_lr'] = hparams['min_lr'] * train_scale_factor hparams['weight_decay'] = hparams['weight_decay'] * comm.n_procs # --------------------- # Create data iterators # --------------------- rng = np.random.RandomState() data = data_iterator_cityscapes( hparams['batch_size'], args.data_dir, rng=rng, train=True) if comm.n_procs > 1: data = data.slice(rng=rng, num_of_slices=comm.n_procs, slice_pos=comm.rank) if comm.rank == 0: if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) # Create monitors monitor = M.Monitor(args.output_dir) monitor_training_loss = M.MonitorSeries( 'Training loss', monitor, interval=args.log_interval) monitor_lr = M.MonitorSeries( 'Learning rate', monitor, interval=args.log_interval) monitor_time = M.MonitorTimeElapsed( "Training time per iteration", monitor, interval=args.log_interval) # --------------------- # Create Training Graph # --------------------- # Create input variables image = nn.Variable( (hparams['batch_size'], 3, hparams['image_height'], hparams['image_width'])) seg_gt = nn.Variable( (hparams['batch_size'], 1, hparams['image_height'], hparams['image_width'])) # D3Net prediction/output seg_pred = d3net_segmentation(image, hparams, recompute=args.recompute) # Configure loss loss = F.mean(F.softmax_cross_entropy(seg_pred, seg_gt, axis=1)) loss.persistent = True # Create Solver solver = S.Momentum(hparams['lr'], hparams['momentum']) solver.set_parameters(nn.get_parameters()) # Initialize LR Scheduler lr_scheduler = PolynomialScheduler(hparams) if args.pretrained is not None: # Initialize the D3Net backbone weights with nn.parameter_scope('backbone'): nn.load_parameters(args.pretrained) # ------------- # Training loop # ------------- for i in range(hparams['max_iter']): image.d, seg_gt.d = data.next() solver.zero_grad() lr = lr_scheduler.get_learning_rate(i) solver.set_learning_rate(lr) loss.forward(clear_no_need_grad=True) if comm.n_procs > 1: all_reduce_callback = comm.get_all_reduce_callback() loss.backward(clear_buffer=True, communicator_callbacks=all_reduce_callback) else: loss.backward(clear_buffer=True) solver.weight_decay(hparams['weight_decay']) solver.update() if comm.rank == 0: # Log monitors monitor_training_loss.add(i, loss.d.copy()) monitor_lr.add(i, lr) monitor_time.add(i) if (i % hparams['save_interval']) == 0: # Save intermediate model parameters nn.save_parameters(os.path.join( args.output_dir, "model_param_%08d.h5" % i)) solver.save_states(os.path.join( args.output_dir, "solver_states.h5")) if comm.rank == 0: # save final model parameters nn.save_parameters(os.path.join(args.output_dir, "final.h5"))
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 4) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 4) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) z4 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) else: raise ValueError() l1 = F.softmax_cross_entropy(z3, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z4, t, 1) L2 = F.mean(l2) # Forwardprop nn.forward_all([L1, L2]) parameters = nn.get_parameters() # Backprop for L1 # Diff should be initialized since they are always accumulated x.grad.zero() initialize_grad(parameters) L1.backward(clear_buffer=True) inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L1, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1.05e-2) # Backprop for L2 # Diff should be initialized since they are always accumulated x.grad.zero() initialize_grad(parameters) L2.backward(clear_buffer=True) inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L2, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1.05e-2)
def loss_func(pred, label): return F.mean(F.softmax_cross_entropy(pred, label))
def cifar100_resnet32_loss(pred, label): loss = F.mean(F.softmax_cross_entropy(pred, label)) return loss
def logreg_loss(y, t): loss_f = F.mean(F.softmax_cross_entropy(y, t)) return loss_f
def loss_function(pred, label): loss = F.mean(F.softmax_cross_entropy(pred, label)) return loss
def sample_from_controller(args): """ 2-layer RNN(LSTM) based controller which outputs an architecture of CNN, represented as a sequence of integers and its list. Given the number of layers, for each layer, it executes 2 types of computation, one for sampling the operation at that layer, another for sampling the skip connection patterns. """ entropys = nn.Variable([1, 1], need_grad=True) log_probs = nn.Variable([1, 1], need_grad=True) entropys.d = log_probs.d = 0.0 # initialize them all num_cells = args.num_cells num_nodes = args.num_nodes lstm_size = args.lstm_size state_size = args.state_size lstm_num_layers = args.lstm_layers temperature = args.temperature tanh_constant = args.tanh_constant op_tanh_reduce = args.op_tanh_reduce num_branch = args.num_ops both_archs = [list(), list()] initializer = I.UniformInitializer((-0.1, 0.1)) prev_h = [ nn.Variable([1, lstm_size], need_grad=True) for _ in range(lstm_num_layers) ] prev_c = [ nn.Variable([1, lstm_size], need_grad=True) for _ in range(lstm_num_layers) ] for i in range(len(prev_h)): prev_h[i].d = 0 # initialize. prev_c[i].d = 0 inputs = nn.Variable([1, lstm_size]) inputs.d = np.random.normal(0, 0.5, [1, lstm_size]) g_emb = nn.Variable([1, lstm_size]) g_emb.d = np.random.normal(0, 0.5, [1, lstm_size]) for ind in range(2): # first create conv cell and then reduc cell. idx_seq = list() ops_seq = list() for node_id in range(num_nodes): if node_id == 0: anchors = nn.parameter.get_parameter_or_create("anchors", [2, lstm_size], initializer, need_grad=False) anchors_w_1 = nn.parameter.get_parameter_or_create( "anchors_w_1", [2, lstm_size], initializer, need_grad=False) else: assert anchors.shape[0] == node_id + \ 2, "Something wrong with anchors." assert anchors_w_1.shape[0] == node_id + \ 2, "Something wrong with anchors_w_1." # for each node, get the index used as inputs for i in range(2): # One-step stacked LSTM. with nn.parameter_scope("controller_lstm"): next_h, next_c = stack_lstm(inputs, prev_h, prev_c, state_size) prev_h, prev_c = next_h, next_c # shape:(1, lstm_size) query = anchors_w_1 with nn.parameter_scope("skip_affine_1"): query = F.tanh( F.add2( query, PF.affine(next_h[-1], lstm_size, w_init=initializer, with_bias=False))) # (node_id + 2, lstm_size) + (1, lstm_size) # broadcast occurs here. resulting shape is; (node_id + 2, lstm_size) with nn.parameter_scope("skip_affine_2"): # (node_id + 2, 1) logit = PF.affine(query, 1, w_init=initializer, with_bias=False) if temperature is not None: logit = F.mul_scalar(logit, (1 / temperature)) if tanh_constant is not None: logit = F.mul_scalar(F.tanh(logit), tanh_constant) index = F.exp(logit) index = F.mul_scalar(index, (1 / index.d.sum())) # Sampling input indices from multinomial distribution. index = np.random.multinomial( 1, np.reshape(index.d, (1, index.d.size))[0], 1) idx_seq.append(index.nonzero()[1]) label = nn.Variable.from_numpy_array( index.transpose()) # (node_id + 2, 1) log_prob = F.softmax_cross_entropy(logit, label) log_probs = F.add2(log_probs, F.sum(log_prob, keepdims=True)) curr_ent = F.softmax_cross_entropy(logit, F.softmax(logit)) entropy = F.sum(curr_ent, keepdims=True) entropys = F.add2(entropys, entropy) taking_ind = int(index.nonzero()[1][0]) # (1, lstm_size) inputs = F.reshape(anchors[taking_ind], (1, anchors.shape[1])) # ops for j in range(2): with nn.parameter_scope("controller_lstm"): next_h, next_c = stack_lstm(inputs, prev_h, prev_c, state_size) prev_h, prev_c = next_h, next_c # shape:(1, lstm_size) # Compute for operation. with nn.parameter_scope("ops"): logit = PF.affine(next_h[-1], num_branch, w_init=initializer, with_bias=False) # shape of logit : (1, num_branch) if temperature is not None: logit = F.mul_scalar(logit, (1 / temperature)) if tanh_constant is not None: op_tanh = tanh_constant / op_tanh_reduce logit = F.mul_scalar(F.tanh(logit), op_tanh) # normalizing logits. normed_logit = np.e**logit.d normed_logit = normed_logit / np.sum(normed_logit) # Sampling operation id from multinomial distribution. branch_id = np.random.multinomial(1, normed_logit[0], 1).nonzero()[1] branch_id = nn.Variable.from_numpy_array(branch_id) ops_seq.append(branch_id.d) # log policy for operation. log_prob = F.softmax_cross_entropy( logit, F.reshape(branch_id, shape=(1, 1))) # accumulate log policy as log probs log_probs = F.add2(log_probs, log_prob) logit = F.transpose(logit, axes=(1, 0)) curr_ent = F.softmax_cross_entropy(logit, F.softmax(logit)) entropy = F.sum(curr_ent, keepdims=True) entropys = F.add2(entropys, entropy) w_emb = nn.parameter.get_parameter_or_create( "w_emb", [num_branch, lstm_size], initializer, need_grad=False) # (1, lstm_size) inputs = F.reshape(w_emb[int(branch_id.d)], (1, w_emb.shape[1])) with nn.parameter_scope("controller_lstm"): next_h, next_c = stack_lstm(inputs, prev_h, prev_c, lstm_size) prev_h, prev_c = next_h, next_c with nn.parameter_scope("skip_affine_3"): adding_w_1 = PF.affine(next_h[-1], lstm_size, w_init=initializer, with_bias=False) # (node_id + 2 + 1, lstm_size) anchors = F.concatenate(anchors, next_h[-1], axis=0) # (node_id + 2 + 1, lstm_size) anchors_w_1 = F.concatenate(anchors_w_1, adding_w_1, axis=0) for idx, ops in zip(idx_seq, ops_seq): both_archs[ind].extend([int(idx), int(ops)]) return both_archs, log_probs, entropys
def train(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == "cifar10_resnet2rnn_10_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[10]) if args.net == "cifar10_resnet2rnn_15_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[15]) if args.net == "cifar10_resnet2rnn_3x3x4_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[3, 3, 4]) if args.net == "cifar10_resnet2rnn_5x5_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[5, 5]) if args.net == "cifar10_resnet2rnn_5_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[5]) if args.net == "cifar10_bresnet2rnn_10_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[10], res_unit=res_unit_bottleneck) if args.net == "cifar10_bresnet2rnn_15_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[15], res_unit=res_unit_bottleneck) if args.net == "cifar10_bresnet2rnn_3x3x4_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[3, 3, 4], res_unit=res_unit_bottleneck) if args.net == "cifar10_bresnet2rnn_5x5_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[5, 5], res_unit=res_unit_bottleneck) if args.net == "cifar10_bresnet2rnn_5_prediction": model_prediction = functools.partial(cifar10_resnet2rnn_prediction, unrolls=[5], res_unit=res_unit_bottleneck) # TRAIN maps = 64 data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) label = nn.Variable([args.batch_size, 1]) # Create `teacher` model_prediction graph. pred = model_prediction(image, maps=maps, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create teacher predition graph. vpred = model_prediction(vimage, maps=maps, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Initialize DataIterator data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_binary_connect_lenet_prediction if args.net == 'bincon': mnist_cnn_prediction = mnist_binary_connect_lenet_prediction elif args.net == 'binnet': mnist_cnn_prediction = mnist_binary_net_lenet_prediction elif args.net == 'bwn': mnist_cnn_prediction = mnist_binary_weight_lenet_prediction elif args.net == 'bincon_resnet': mnist_cnn_prediction = mnist_binary_connect_resnet_prediction elif args.net == 'binnet_resnet': mnist_cnn_prediction = mnist_binary_net_resnet_prediction elif args.net == 'bwn_resnet': mnist_cnn_prediction = mnist_binary_weight_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # save_nnp contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_result_epoch0.nnp'.format(args.net)), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) # save_nnp_lastepoch contents = save_nnp({'x': vimage}, {'y': vpred}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() from numpy.random import seed seed(0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == 'lenet': mnist_cnn_prediction = mnist_lenet_prediction elif args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction else: raise ValueError("Unknown network type {}".format(args.net)) # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. from numpy.random import RandomState data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223)) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) vpred.data.cast(np.float32, ctx) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() loss.data.cast(np.float32, ctx) pred.data.cast(np.float32, ctx) e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file)
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file)
def meta_train(args, train_data, valid_data, test_data): # Build episode generators shape_x = (1, 28, 28) train_episode_generator = EpisodeGenerator(args.n_class_tr, args.n_shot_tr, args.n_query_tr, shape_x, train_data) valid_episode_generator = EpisodeGenerator(args.n_class, args.n_shot, args.n_query, shape_x, valid_data) test_episode_generator = EpisodeGenerator(args.n_class, args.n_shot, args.n_query, shape_x, test_data) # Build training model xs_t = nn.Variable((args.n_class_tr * args.n_shot_tr, ) + shape_x) xq_t = nn.Variable((args.n_class_tr * args.n_query_tr, ) + shape_x) hq_t = net(args.n_class_tr, xs_t, xq_t, args.embedding, args.net_type, args.metric, False) yq_t = nn.Variable((args.n_class_tr * args.n_query_tr, 1)) loss_t = F.mean(F.softmax_cross_entropy(hq_t, yq_t)) # Build evaluation model xs_v = nn.Variable((args.n_class * args.n_shot, ) + shape_x) xq_v = nn.Variable((args.n_class * args.n_query, ) + shape_x) hq_v = net(args.n_class, xs_v, xq_v, args.embedding, args.net_type, args.metric, True) yq_v = nn.Variable((args.n_class * args.n_query, 1)) err_v = F.mean(F.top_n_error(hq_v, yq_v, n=1)) # Setup solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor outputs monitor = Monitor(args.work_dir) monitor_loss = MonitorSeries("Training loss", monitor, interval=args.iter_per_epoch) monitor_valid_err = MonitorSeries("Validation error", monitor, interval=args.iter_per_valid) monitor_test_err = MonitorSeries("Test error", monitor) monitor_test_conf = MonitorSeries("Test error confidence", monitor) # Output files param_file = args.work_dir + "params.h5" tsne_file = args.work_dir + "tsne.png" # Training loop train_losses = [] best_err = 1.0 for i in range(args.max_iteration): # Decay learning rate if (i + 1) % args.lr_decay_interval == 0: solver.set_learning_rate(solver.learning_rate() * args.lr_decay) # Create an episode xs_t.d, xq_t.d, yq_t.d = train_episode_generator.next() # Training by the episode solver.zero_grad() loss_t.forward(clear_no_need_grad=True) loss_t.backward(clear_buffer=True) solver.update() train_losses.append(loss_t.d.copy()) # Evaluation if (i + 1) % args.iter_per_valid == 0: train_loss = np.mean(train_losses) train_losses = [] valid_errs = [] for k in range(args.n_episode_for_valid): xs_v.d, xq_v.d, yq_v.d = valid_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) valid_errs.append(np.float(err_v.d.copy())) valid_err = np.mean(valid_errs) monitor_valid_err.add(i + 1, valid_err * 100) if valid_err < best_err: best_err = valid_err nn.save_parameters(param_file) # Final evaluation nn.load_parameters(param_file) v_errs = [] for k in range(args.n_episode_for_test): xs_v.d, xq_v.d, yq_v.d = test_episode_generator.next() err_v.forward(clear_no_need_grad=True, clear_buffer=True) v_errs.append(np.float(err_v.d.copy())) v_err_mean = np.mean(v_errs) v_err_std = np.std(v_errs) v_err_conf = 1.96 * v_err_std / np.sqrt(args.n_episode_for_test) monitor_test_err.add(0, v_err_mean * 100) monitor_test_conf.add(0, v_err_conf * 100) # Visualization n_class = 50 n_sample = 20 batch = test_data[:n_class].reshape(n_class * n_sample, 1, 28, 28) label = [] for i in range(n_class): label.extend(np.ones(n_sample) * (i % 50)) u = get_embeddings(batch, conv4) v = get_tsne(u) plot_tsne(v[:, 0], v[:, 1], label, tsne_file)
def loss_function(pred, label): """ Compute loss. """ loss = F.mean(F.softmax_cross_entropy(pred, label)) return loss
def ce_loss(pred, label): return F.mean(F.softmax_cross_entropy(pred, label))
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_binary_connect_lenet_prediction if args.net == 'bincon': mnist_cnn_prediction = mnist_binary_connect_lenet_prediction elif args.net == 'binnet': mnist_cnn_prediction = mnist_binary_net_lenet_prediction elif args.net == 'bwn': mnist_cnn_prediction = mnist_binary_weight_lenet_prediction elif args.net == 'bincon_resnet': mnist_cnn_prediction = mnist_binary_connect_resnet_prediction elif args.net == 'binnet_resnet': mnist_cnn_prediction = mnist_binary_net_resnet_prediction elif args.net == 'bwn_resnet': mnist_cnn_prediction = mnist_binary_weight_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create predition graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nnp_file = os.path.join(args.model_save_path, '{}_{:06}.nnp'.format(args.net, args.max_iter)) runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': vpred }, 'names': { 'x': vimage } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y'] }] } save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage.d], [vimage], vpred, nnp_file)
def main(): # Read envvar `NNABLA_EXAMPLES_ROOT` to identify the path to your local # nnabla-examples directory. HERE = os.path.dirname(__file__) nnabla_examples_root = os.environ.get( 'NNABLA_EXAMPLES_ROOT', os.path.join(HERE, '../../../../nnabla-examples')) mnist_examples_root = os.path.realpath( os.path.join(nnabla_examples_root, 'mnist-collection')) sys.path.append(mnist_examples_root) nnabla_examples_git_url = 'https://github.com/sony/nnabla-examples' # Check if nnabla-examples found. try: from args import get_args except ImportError: print('An envvar `NNABLA_EXAMPLES_ROOT`' ' which locates the local path to ' '[nnabla-examples]({})' ' repository must be set correctly.'.format( nnabla_examples_git_url), file=sys.stderr) raise # Import MNIST data from mnist_data import data_iterator_mnist from classification import mnist_lenet_prediction, mnist_resnet_prediction args = get_args(description=__doc__) mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # Create a computation graph to be saved. x = nn.Variable([args.batch_size, 1, 28, 28]) h = mnist_cnn_prediction(x, test=False, aug=False) t = nn.Variable([args.batch_size, 1]) loss = F.mean(F.softmax_cross_entropy(h, t)) y = mnist_cnn_prediction(x, test=True, aug=False) # Save NNP file (used in C++ inference later.). nnp_file = '{}_initialized.nnp'.format(args.net) runtime_contents = { 'networks': [{ 'name': 'training', 'batch_size': args.batch_size, 'outputs': { 'loss': loss }, 'names': { 'x': x, 't': t } }, { 'name': 'runtime', 'batch_size': args.batch_size, 'outputs': { 'y': y }, 'names': { 'x': x } }], 'executors': [{ 'name': 'runtime', 'network': 'runtime', 'data': ['x'], 'output': ['y'] }] } nn.utils.save.save(nnp_file, runtime_contents)
shuffle=True, with_file_cache=False) valid_data_iter = data_iterator_simple(load_valid_func, len(x_valid), batch_size, shuffle=True, with_file_cache=False) x = nn.Variable([batch_size, window_size * 2]) with nn.parameter_scope('W_in'): h = PF.embed(x, vocab_size, embedding_size) h = F.mean(h, axis=1) with nn.parameter_scope('W_out'): y = PF.affine(h, vocab_size, with_bias=False) t = nn.Variable((batch_size, 1)) entropy = F.softmax_cross_entropy(y, t) loss = F.mean(entropy) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics=dict(PPL=np.e**loss), solver=solver) trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch) with open('vectors.txt', 'w') as f: f.write('{} {}\n'.format(vocab_size - 1, embedding_size))
def cifar10_resnet23_loss(pred, label): loss = F.mean(F.softmax_cross_entropy(pred, label)) return loss
def classification_svd(): args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction_slim # TRAIN reference = "reference" slim = "slim" rrate = 0.5 # reduction rate # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create `reference` and "slim" prediction graph. model_load_path = args.model_load_path pred = mnist_cnn_prediction(image, scope=slim, rrate=rrate, test=False) pred.persistent = True # Decompose and set parameters decompose_network_and_set_params(model_load_path, reference, slim, rrate) loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create reference predition graph. vpred = mnist_cnn_prediction(vimage, scope=slim, rrate=rrate, test=True) # Create Solver. solver = S.Adam(args.learning_rate) with nn.parameter_scope(slim): solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) best_ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error( di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate( solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. nnp_file = os.path.join( args.model_save_path, 'vat_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batchsize_v, 'outputs': {'y': hv}, 'names': {'x': xv}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
def distil(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == "cifar10_resnet23_prediction": model_prediction = cifar10_resnet23_prediction data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # TRAIN teacher = "teacher" student = "student" maps = args.maps rrate = args.reduction_rate # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) image.persistent = True # not clear the intermediate buffer re-used label = nn.Variable([args.batch_size, 1]) label.persistent = True # not clear the intermediate buffer re-used # Create `teacher` and "student" prediction graph. model_load_path = args.model_load_path nn.load_parameters(model_load_path) pred_label = model_prediction( image, net=teacher, maps=maps, test=not args.use_batch) pred_label.need_grad = False # no need backward through teacher graph pred = model_prediction(image, net=student, maps=int( maps * (1. - rrate)), test=False) pred.persistent = True # not clear the intermediate buffer used loss_ce = F.mean(F.softmax_cross_entropy(pred, label)) loss_ce_soft = ce_soft(pred, pred_label) loss = args.weight_ce * loss_ce + args.weight_ce_soft * loss_ce_soft # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create teacher prediction graph. vpred = model_prediction(vimage, net=student, maps=int( maps * (1. - rrate)), test=True) # Create Solver. solver = S.Adam(args.learning_rate) with nn.parameter_scope(student): solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Initialize DataIterator for MNIST. data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def train(): args = get_args() # Set context. from nnabla.ext_utils import get_extension_context logger.info("Running in {}:{}".format(args.context, args.type_config)) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir) _data_source = data_iterator._data_source # dirty hack... # model x = nn.Variable(shape=(args.batch_size, data_config.duration, 1)) # (B, T, 1) onehot = F.one_hot(x, shape=(data_config.q_bit_len, )) # (B, T, C) wavenet_input = F.transpose(onehot, (0, 2, 1)) # (B, C, T) # speaker embedding if args.use_speaker_id: s_id = nn.Variable(shape=(args.batch_size, 1)) with nn.parameter_scope("speaker_embedding"): s_emb = PF.embed(s_id, n_inputs=_data_source.n_speaker, n_features=WavenetConfig.speaker_dims) s_emb = F.transpose(s_emb, (0, 2, 1)) else: s_emb = None net = WaveNet() wavenet_output = net(wavenet_input, s_emb) pred = F.transpose(wavenet_output, (0, 2, 1)) # (B, T, 1) t = nn.Variable(shape=(args.batch_size, data_config.duration, 1)) loss = F.mean(F.softmax_cross_entropy(pred, t)) # for generation prob = F.softmax(pred) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # load checkpoint start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) # setup save env. audio_save_path = os.path.join(os.path.abspath(args.model_save_path), "audio_results") if audio_save_path and not os.path.exists(audio_save_path): os.makedirs(audio_save_path) # save_nnp contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Speechsynthesis_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): # todo: validation x.d, _speaker, t.d = data_iterator.next() if args.use_speaker_id: s_id.d = _speaker.reshape(-1, 1) solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() loss.data.cast(np.float32, ctx) monitor_loss.add(i, loss.d.copy()) if i % args.model_save_interval == 0: prob.forward() audios = mu_law_decode(np.argmax(prob.d, axis=-1), quantize=data_config.q_bit_len) # (B, T) save_audio(audios, i, audio_save_path) # save checkpoint file save_checkpoint(audio_save_path, i, solver) # save_nnp contents = save_nnp({'x': x}, {'y': wavenet_output}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Speechsynthesis_result.nnp'), contents)
def main(): # Read envvar `NNABLA_EXAMPLES_ROOT` to identify the path to your local # nnabla-examples directory. HERE = os.path.dirname(__file__) nnabla_examples_root = os.environ.get('NNABLA_EXAMPLES_ROOT', os.path.join( HERE, '../../../../nnabla-examples')) mnist_examples_root = os.path.realpath( os.path.join(nnabla_examples_root, 'mnist-collection')) sys.path.append(mnist_examples_root) nnabla_examples_git_url = 'https://github.com/sony/nnabla-examples' # Check if nnabla-examples found. try: from args import get_args except ImportError: print( 'An envvar `NNABLA_EXAMPLES_ROOT`' ' which locates the local path to ' '[nnabla-examples]({})' ' repository must be set correctly.'.format( nnabla_examples_git_url), file=sys.stderr) raise # Import MNIST data from mnist_data import data_iterator_mnist from classification import mnist_lenet_prediction, mnist_resnet_prediction import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--max_epoch", "-me", type=int, default=100) parser.add_argument("--iter_per_epoch", "-ipe", type=int, default=937) parser.add_argument("--cache_dir", "-cd", type=str, default='cache') parser.add_argument("--batch-size", "-b", type=int, default=128) parser.add_argument("--learning-rate", "-l", type=float, default=1e-3) parser.add_argument("--weight-decay", "-w", type=float, default=0) parser.add_argument("--device-id", "-d", type=str, default='0') parser.add_argument("--type-config", "-t", type=str, default='float') parser.add_argument("--net", "-n", type=str, default='lenet') parser.add_argument('--context', '-c', type=str, default='cpu', help="Extension modules. ex) 'cpu', 'cudnn'.") args = parser.parse_args() args_added = parser.parse_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # Create a computation graph to be saved. x = nn.Variable([args.batch_size, 1, 28, 28]) t = nn.Variable([args.batch_size, 1]) h_t = mnist_cnn_prediction(x, test=False, aug=False) loss_t = F.mean(F.softmax_cross_entropy(h_t, t)) h_v = mnist_cnn_prediction(x, test=True, aug=False) loss_v = F.mean(F.softmax_cross_entropy(h_v, t)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Save NNP file (used in C++ inference later.). nnp_file = '{}_initialized.nnp'.format(args.net) training_contents = { 'global_config': {'default_context': ctx}, 'training_config': {'max_epoch': args.max_epoch, 'iter_per_epoch': args_added.iter_per_epoch, 'save_best': True}, 'networks': [ {'name': 'training', 'batch_size': args.batch_size, 'outputs': {'loss': loss_t}, 'names': {'x': x, 'y': t, 'loss': loss_t}}, {'name': 'validation', 'batch_size': args.batch_size, 'outputs': {'loss': loss_v}, 'names': {'x': x, 'y': t, 'loss': loss_v}}], 'optimizers': [ {'name': 'optimizer', 'solver': solver, 'network': 'training', 'dataset': 'mnist_training', 'weight_decay': 0, 'lr_decay': 1, 'lr_decay_interval': 1, 'update_interval': 1}], 'datasets': [ {'name': 'mnist_training', 'uri': 'MNIST_TRAINING', 'cache_dir': args.cache_dir + '/mnist_training.cache/', 'variables': {'x': x, 'y': t}, 'shuffle': True, 'batch_size': args.batch_size, 'no_image_normalization': True}, {'name': 'mnist_validation', 'uri': 'MNIST_VALIDATION', 'cache_dir': args.cache_dir + '/mnist_test.cache/', 'variables': {'x': x, 'y': t}, 'shuffle': False, 'batch_size': args.batch_size, 'no_image_normalization': True }], 'monitors': [ {'name': 'training_loss', 'network': 'validation', 'dataset': 'mnist_training'}, {'name': 'validation_loss', 'network': 'validation', 'dataset': 'mnist_validation'}], } nn.utils.save.save(nnp_file, training_contents)