def __init__(self, name, monitor, comm, loss, error, batch_size, time=True, flush_interval=10): self.name = name self.comm = comm self.epoch_loss = 0.0 self.epoch_error = 0 self.batch_counter = 0 self.loss = loss self.error = error self.batch_size = batch_size if self.comm.rank == 0: self.monitor_loss = M.MonitorSeries("%s loss" % name, monitor, interval=1) self.monitor_err = M.MonitorSeries("%s error" % name, monitor, interval=1) self.monitor_time = None if time: self.monitor_time = M.MonitorTimeElapsed("Epoch time", monitor, interval=1) self.flush_interval = flush_interval
def __init__(self, train_key2var_dict, test_key2var_dict, path): monitor = nm.Monitor(path) self.monitor_dict = dict() for k, v in train_key2var_dict.items(): self.monitor_dict[k] = nm.MonitorSeries(k, monitor, interval=1) for k, v in test_key2var_dict.items(): self.monitor_dict[k] = nm.MonitorSeries(k, monitor, interval=1)
def validate(args): # load trained param file _ = nn.load_parameters(args.model_load_path) # get data iterator vdata = data_iterator_segmentation(args.val_samples, args.batch_size, args.val_dir, args.val_label_dir) # get deeplabv3plus model v_model = train.get_model(args, test=True) v_model.pred.persistent = True # Not clearing buffer of pred in forward v_pred2 = v_model.pred.unlinked() # Create monitor monitor = M.Monitor(args.monitor_path) monitor_miou = M.MonitorSeries("mean IOU", monitor, interval=1) l = 0.0 e = 0.0 vmiou = 0. # Evaluation loop for j in range(args.val_samples // args.batch_size): images, labels, masks = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.mask.d = masks v_model.pred.forward(clear_buffer=True) miou = train.compute_miou(args.num_class, labels, np.argmax(v_model.pred.d, axis=1), masks) vmiou += miou print(j, miou) monitor_miou.add(0, vmiou / (args.val_samples / args.batch_size)) return vmiou / args.val_samples
def __init__(self, key2var_dict, monitor, interval): super(MonitorManager, self).__init__() self.key2var_dict = key2var_dict self.monitor_dict = dict() for k, v in self.key2var_dict.items(): assert isinstance(v, nn.Variable), "invalid inputs?" self.monitor_dict[k] = nm.MonitorSeries( k, monitor, interval=interval)
def train(max_iter=60000): # Initialize data provider di_l = I.data_iterator_mnist(batch_size, True) di_t = I.data_iterator_mnist(batch_size, False) # Network shape_x = (1, 28, 28) shape_z = (50, ) x = nn.Variable((batch_size, ) + shape_x) loss_l = I.vae(x, shape_z, test=False) loss_t = I.vae(x, shape_z, test=True) # Create solver solver = S.Adam(learning_rate) solver.set_parameters(nn.get_parameters()) # Monitors for training and validation path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_train_loss = M.MonitorSeries("train_loss", monitor, interval=600) monitor_val_loss = M.MonitorSeries("val_loss", monitor, interval=600) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=600) # Training Loop. for i in range(max_iter): # Initialize gradients solver.zero_grad() # Forward, backward and update x.d, _ = di_l.next() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(weight_decay) solver.update() # Forward for test x.d, _ = di_t.next() loss_t.forward(clear_no_need_grad=True) # Monitor for logging monitor_train_loss.add(i, loss_l.d.copy()) monitor_val_loss.add(i, loss_t.d.copy()) monitor_time.add(i) return path
def set_series(self, name): """ Set a new series name. """ assert isinstance(name, str), f"name must be string but {type(name)}." self.series_monitors[name] = M.MonitorSeries(name, self.monitor, interval=self.interval, verbose=not self.silent)
def save_result(self, result: Dict[str, float], evaluate=False) -> None: for key in result: if key not in self.monitor_series: self.monitor_series[key] = M.MonitorSeries(key, self.monitor, interval=1) if not evaluate: self.monitor_series[key].add(self.current_epoch + 1, result[key]) else: self.monitor_series[key].add(self.current_epoch, result[key])
def __init__(self, save_path, series_losses, interval=1, save_time=True): self.monitor = M.Monitor(save_path) self.series_monitors = {} for loss_name, loss in series_losses.items(): self.series_monitors[loss_name] = M.MonitorSeries( loss_name, self.monitor, interval=interval) if save_time: self.monitor_time = M.MonitorTimeElapsed("Epoch time", self.monitor, interval=interval)
def train(max_iter=5000, learning_rate=0.001, weight_decay=0): train = create_net(False) test = create_net(True) # ソルバーの作成 solver = S.Adam(learning_rate) solver.set_parameters(nn.get_parameters()) # モニタの作成 path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_loss_train = M.MonitorSeries("training_loss", monitor, interval=100) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100) monitor_loss_val = M.MonitorSeries("val_loss", monitor, interval=100) # 訓練の実行 for i in range(max_iter): if (i + 1) % 100 == 0: val_error = 0.0 val_iter = 10 for j in range(val_iter): test.image0.d, test.image1.d, test.label.d = test.data.next() test.loss.forward(clear_buffer=True) val_error += test.loss.d monitor_loss_val.add(i, val_error / val_iter) train.image0.d, train.image1.d, train.label.d = train.data.next() solver.zero_grad() train.loss.forward(clear_no_need_grad=True) train.loss.backward(clear_buffer=True) solver.weight_decay(weight_decay) solver.update() monitor_loss_train.add(i, train.loss.d.copy()) monitor_time.add(i) nn.save_parameters(os.path.join(path, "params.h5")) return path
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Set parameter gradients zero * Execute forwardprop on the training graph. * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args(monitor_path='tmp.monitor.bnn') # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_inq_lenet_prediction if args.net == 'inq': mnist_cnn_prediction = mnist_inq_lenet_prediction elif args.net == 'inq_resnet': mnist_cnn_prediction = mnist_inq_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create predition graph. pred = mnist_cnn_prediction(image / 255, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage / 255, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = M.MonitorSeries("Test error", monitor, interval=10) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) # Training backward & update loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Monitor e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file)
def train(): parser = argparse.ArgumentParser() parser.add_argument("--train-file", type=str) parser.add_argument("--valid-file", type=str) parser.add_argument("--num-training-examples", type=int, default=50) parser.add_argument("--accum-grad", type=int, default=1) parser.add_argument("--valid-interval", type=int, default=200) parser.add_argument("--threshold", type=float, default=0.95) parser.add_argument("--context", type=str, default="cpu") parser.add_argument("--device-id", type=int, default=0) args = parser.parse_args() from nnabla.ext_utils import get_extension_context extension_module = args.context ctx = get_extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # prepare data iterators tdata = data_iterator( BAbI15DataSource(args.train_file, args.num_training_examples, shuffle=True), 1, False, False, False) vdata = data_iterator( BAbI15DataSource(args.valid_file, 1000, shuffle=True), 1, False, False, False) # prepare monitors monitor = M.Monitor("./bAbI15") tloss = M.MonitorSeries("Training Loss", monitor, interval=10) terror = M.MonitorSeries("Training Error", monitor, interval=10) verror = M.MonitorSeries("Validation Error", monitor, interval=1) # prepare solver solver = S.Adam() solver_initialized = False cnt = 0 while True: l = 0.0 e = 0.0 solver.zero_grad() for _ in range(args.accum_grad): # read next data x = tdata.next() V = x[1][0][0] E = x[2][0][0] ans = x[3][0][0] # construct GGNN output = predict(V, E) output = F.reshape(output, (1, output.shape[0])) # initialize solver if not solver_initialized: solver.set_parameters(nn.get_parameters()) solver_initialized = True solver.zero_grad() # calculate loss/error label = nn.Variable((1, 1)) label.data.data[0, 0] = ans output2 = output.unlinked() loss = F.mean(F.softmax_cross_entropy(output, label)) error = F.mean(F.top_n_error(output2, label)) F.sink(loss, error).forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) l += loss.data.data e += error.data.data # dump log tloss.add(cnt, l / args.accum_grad) terror.add(cnt, e / args.accum_grad) l = 0.0 e = 0.0 solver.update() cnt += 1 if cnt % args.valid_interval == 0: # validation validation_error = 0 correct_example = None wrong_example = None for _ in range(vdata.size): x = vdata.next() id2str = x[0][0][0] V = x[1][0][0] E = x[2][0][0] ans = x[3][0][0] output = predict(V, E) output = F.reshape(output, (1, output.shape[0])) # calculate error label = nn.Variable((1, 1)) label.data.data[0, 0] = ans error = F.top_n_error(output, label) error.forward(clear_no_need_grad=True) if error.data.data > 0.5: if wrong_example is None: wrong_example = (id2str, V, E, ans, output.data.data) else: if correct_example is None: correct_example = (id2str, V, E, ans, output.data.data) validation_error += error.data.data validation_error /= vdata.size verror.add(cnt, validation_error) accuracy = 1 - validation_error if accuracy >= args.threshold: def show(example): for i, j in example[2]["is"]: print("{} is {}.".format(example[0][i], example[0][j])) for i, j in example[2]["has_fear"]: print("{} are afraid of {}.".format( example[0][i], example[0][j])) i = np.argmax(example[1]) print("What is {} afraid of?".format(example[0][i])) i = np.argmax(example[4]) print("Expected: {}, Actual: {}".format( example[0][example[3]], example[0][i])) if correct_example is not None: show(correct_example) if wrong_example is not None: show(wrong_example) break
def main(): """ Main script. Steps: * Setup calculation environment * Initialize data iterator. * Create Networks * Create Solver. * Training Loop. * Training * Test * Save """ # Set args args = get_args(monitor_path='tmp.monitor.vae', max_iter=60000, model_save_path=None, learning_rate=3e-4, batch_size=100, weight_decay=0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Initialize data provider di_l = data_iterator_mnist(args.batch_size, True) di_t = data_iterator_mnist(args.batch_size, False) # Network shape_x = (1, 28, 28) shape_z = (50, ) x = nn.Variable((args.batch_size, ) + shape_x) loss_l = vae(x, shape_z, test=False) loss_t = vae(x, shape_z, test=True) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitors for training and validation monitor = M.Monitor(args.model_save_path) monitor_training_loss = M.MonitorSeries("Training loss", monitor, interval=600) monitor_test_loss = M.MonitorSeries("Test loss", monitor, interval=600) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=600) # Training Loop. for i in range(args.max_iter): # Initialize gradients solver.zero_grad() # Forward, backward and update x.d, _ = di_l.next() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Forward for test x.d, _ = di_t.next() loss_t.forward(clear_no_need_grad=True) # Monitor for logging monitor_training_loss.add(i, loss_l.d.copy()) monitor_test_loss.add(i, loss_t.d.copy()) monitor_time.add(i) # Save the model nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)
def valid(): """ Main script for validation. """ args = get_args() n_valid_samples = 50000 num_classes = 1000 assert n_valid_samples % args.batch_size == 0, \ "Set batch_size such that n_valid_samples (50000) can be devided by batch_size. \Batch size is now set as {}".format( args.batch_size) # Context from nnabla.ext_utils import get_extension_context extension_module = "cudnn" ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Pipelines and Iterators for validation device_id = int(args.device_id) val_pipes = [ ValPipeline(args.batch_size, args.num_threads, device_id, args.val_cachefile_dir, args.val_list, seed=device_id, num_gpu=1) ] val_pipes[0].build() vdata = DALIClassificationIterator(val_pipes, val_pipes[0].epoch_size("Reader"), auto_reset=True, stop_at_epoch=False) # Network for validation nn.load_parameters(args.model_load_path) v_model = get_model(args, num_classes, 1, args.accum_grad, test=True) v_e = F.mean(F.top_n_error(v_model.pred, v_model.label, n=args.top_n)) # Monitors import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1) monitor_vtime = M.MonitorTimeElapsed("Validation time", monitor, interval=1) # Validation ve_local = 0. val_iter_local = n_valid_samples // args.batch_size for i in range(val_iter_local): nextImage, nextLabel = vdata.next() v_model.image.data.copy_from(nextImage) v_model.label.data.copy_from(nextLabel) v_model.image.data.cast(np.float, ctx) v_model.label.data.cast(np.int32, ctx) v_e.forward(clear_buffer=True) nn.logger.info("validation error is {} at {}-th batch".format( v_e.d, i)) ve_local += v_e.d.copy() ve_local /= val_iter_local monitor_verr.add(0, ve_local) monitor_vtime.add(0)
with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # パラメータスコープの使い方を見ておく。 print(len(nn.get_parameters())) with nn.parameter_scope("gen"): print(len(nn.get_parameters())) # パラメータスコープ内では、`get_parameters()`で取得できるパラメータがフィルタリングされ # る。 # モニターの設定 path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_loss_gen = M.MonitorSeries("generator_loss", monitor, interval=100) monitor_loss_dis = M.MonitorSeries("discriminator_loss", monitor, interval=100) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.0) # パラメータ保存関数の定義 def save_parameters(i): with nn.parameter_scope("gen"): nn.save_parameters(os.path.join(path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(path, "discriminator_param_%06d.h5" % i))
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((1,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def train(max_iter=24000): shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNIST Dataset from mnist_data import load_mnist, data_iterator_mnist images, labels = load_mnist(train=True) rng = np.random.RandomState(706) inds = rng.permutation(len(images)) def feed_labeled(i): j = inds[i] return images[j], labels[j] def feed_unlabeled(i): j = inds[i] return images[j], labels[j] di_l = I.data_iterator_simple( feed_labeled, args.n_labeled, args.batchsize_l, shuffle=True, rng=rng, with_file_cache=False, ) di_u = I.data_iterator_simple( feed_unlabeled, args.n_train, args.batchsize_u, shuffle=True, rng=rng, with_file_cache=False, ) di_v = data_iterator_mnist(args.batchsize_v, train=False) # Create networks # feed-forward-net building function def forward(x, test=False): return I.mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) yl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(yl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) yu = forward(xu, test=False) y1 = yu.get_unlinked_variable() y1.need_grad = False noise = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) r = noise / (F.sum(noise ** 2, [1, 2, 3], keepdims=True)) ** 0.5 r.persistent = True y2 = forward(xu + args.xi_for_vat * r, test=False) y3 = forward(xu + args.eps_for_vat * r, test=False) loss_k = F.mean(I.distance(y1, y2)) loss_u = F.mean(I.distance(y1, y3)) # Net for evaluating validation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) err = F.mean(F.top_n_error(hv, tv, n=1)) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor training and validation stats. path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_verr = M.MonitorSeries("val_error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=240) # Training Loop. for i in range(max_iter): # Validation Test if i % args.val_interval == 0: valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) # forward, backward and update xl.d, tl.d = di_l.next() xl.d = xl.d / 255 solver.zero_grad() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() # Calculate y without noise, only once. xu.d, _ = di_u.next() xu.d = xu.d / 255 yu.forward(clear_buffer=True) # Do power method iteration noise.d = np.random.normal(size=xu.shape).astype(np.float32) for k in range(args.n_iter_for_power_method): r.grad.zero() loss_k.forward(clear_no_need_grad=True) loss_k.backward(clear_buffer=True) noise.data.copy_from(r.grad) # forward, backward and update solver.zero_grad() loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() if i % args.iter_per_epoch == 0: solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) return path
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Supervised Loss * by Unlabeled Data * Calculate Virtual Adversarial Noise * Calculate Unsupervised Loss """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNIST Dataset from mnist_data import load_mnist, data_iterator_mnist images, labels = load_mnist(train=True) rng = np.random.RandomState(706) inds = rng.permutation(len(images)) def feed_labeled(i): j = inds[i] return images[j], labels[j] def feed_unlabeled(i): j = inds[i] return images[j], labels[j] di_l = data_iterator_simple(feed_labeled, args.n_labeled, args.batchsize_l, shuffle=True, rng=rng, with_file_cache=False) di_u = data_iterator_simple(feed_unlabeled, args.n_train, args.batchsize_u, shuffle=True, rng=rng, with_file_cache=False) di_v = data_iterator_mnist(args.batchsize_v, train=False) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False) yl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(yl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False) yu = forward(xu, test=False) y1 = yu.get_unlinked_variable() y1.need_grad = False noise = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True) r = noise / (F.sum(noise**2, [1, 2, 3], keepdims=True))**0.5 r.persistent = True y2 = forward(xu + args.xi_for_vat * r, test=False) y3 = forward(xu + args.eps_for_vat * r, test=False) loss_k = F.mean(distance(y1, y2)) loss_u = F.mean(distance(y1, y3)) # Net for evaluating validation data xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) err = F.mean(F.top_n_error(hv, tv, n=1)) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor training and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) ################################# ## Training by Labeled Data ##### ################################# # forward, backward and update xl.d, tl.d = di_l.next() xl.d = xl.d / 255 solver.zero_grad() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # Calculate y without noise, only once. xu.d, _ = di_u.next() xu.d = xu.d / 255 yu.forward(clear_buffer=True) ##### Calculate Adversarial Noise ##### # Do power method iteration noise.d = np.random.normal(size=xu.shape).astype(np.float32) for k in range(args.n_iter_for_power_method): r.grad.zero() loss_k.forward(clear_no_need_grad=True) loss_k.backward(clear_buffer=True) noise.data.copy_from(r.grad) ##### Calculate loss for unlabeled data ##### # forward, backward and update solver.zero_grad() loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.tiny_mode: # We use Tiny ImageNet from Stanford CS231N class. # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/) # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. # Please check README. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 else: # We use ImageNet. # (ImageNet, https://imagenet.herokuapp.com/) # ImageNet consists of 1000 categories, each category has 1280 images # in training set. The image size is various. To adapt ResNet into # 320x320 image inputs, the input image size of ResNet is set as # 224x224. We need to get tar file and create cache file(320x320 images). # Please check README. data = data_iterator_imagenet(args.batch_size, args.train_cachefile_dir) vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir) num_classes = 1000 t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode) t_model.pred.persistent = True # Not clearing buffer of pred in backward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. t_pred2 = t_model.pred.get_unlinked_variable() t_pred2.need_grad = False t_e = F.mean(F.top_n_error(t_pred2, t_model.label)) v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode) v_model.pred.persistent = True # Not clearing buffer of pred in forward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. v_pred2 = v_model.pred.get_unlinked_variable() v_pred2.need_grad = False v_e = F.mean(F.top_n_error(v_pred2, v_model.label)) # Save_nnp_Epoch0 contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'), contents) # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed("Validation time", monitor, interval=10) # Training loop. for i in range(start_point, args.max_iter): # Save parameters if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Validation if i % args.val_interval == 0 and i != 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) l += v_model.loss.d e += v_e.d monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) monitor_vtime.add(i) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() def accumulate_error(l, e, t_model, t_e): l += t_model.loss.d e += t_e.d return l, e # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) l, e = accumulate_error(l, e, t_model, t_e) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters( os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) # Save_nnp contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'), contents)
# define solvers solver_gen_AB = S.Adam(opt.learning_rate, beta1=0.5) solver_gen_BA = S.Adam(opt.learning_rate, beta1=0.5) solver_dis_A = S.Adam(opt.learning_rate, beta1=0.5) solver_dis_B = S.Adam(opt.learning_rate, beta1=0.5) # define updater updater = Updater(opt.batch_size, opt.lmd, opt.input_shape, iterator_A, iterator_B, gen_AB, gen_BA, dis_A, dis_B, solver_gen_AB, solver_gen_BA, solver_dis_A, solver_dis_B) # define monitor monitor = M.Monitor(opt.monitor_path) monitor_loss_cyc = M.MonitorSeries('Cycle loss', monitor, interval=opt.monitor_interval) monitor_loss_gen = M.MonitorSeries('Generator loss', monitor, interval=opt.monitor_interval) monitor_loss_dis = M.MonitorSeries('Discriminator loss', monitor, interval=opt.monitor_interval) monitor_time = M.MonitorTimeElapsed('Time', monitor, interval=opt.monitor_interval) monitor_A = M.MonitorImageTile('Fake images_A', monitor, normalize_method=lambda x: x + 1 / 2., interval=opt.generate_interval)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path #nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_522000.h5") #z.d = np.random.randn(*z.shape) #gen.forward() #for i in range(40): # Image.fromarray(np.uint8((gen.d[i][0]+1)*255/2.0)).save("./test/"+str(i)+".png") # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) x_ref = nn.Variable([args.batch_size, 1, 28, 28]) #vec = nn.Variable([args.batch_size, 100]) pred_vec = vectorizer(x,test = False) print pred_vec.shape #z = pred_vec.reshape((args.batch_size, 100, 1, 1)) #gen = generator(z,test=True) gen = generator(pred_vec,test=False) gen.persistent = True with nn.parameter_scope("gen"): nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_290000.h5") #loss_dis = F.mean(F.sigmoid_cross_entropy(pred_vec, vec)) print "x_ref shape",x_ref.shape print "gen shape",gen.shape #loss_dis = F.mean(F.squared_error(x_ref.reshape((64,28*28)), gen.reshape((64,28*28)))) loss_dis = F.mean(F.squared_error(x_ref, gen)) print loss_dis.d # Create Solver. solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(),args.batch_size,True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward buf,buf2 = data.next() x.d = buf*2.0/255. - 1.0 x_ref.d = buf*2.0/255. - 1.0 # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "vectorizer_param_%06d.h5" % i))
def train(args): x1 = nn.Variable([args.batch_size, 1, 28, 28]) z_vec = vectorizer(x1) z = z_vec.unlinked() fake2 = generator(z_vec) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake2, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) with nn.parameter_scope("vec"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. * 2 - 1.0 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() fake2.forward() monitor_vec1.add(i, fake2) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i)
def train(args): from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) x = nn.Variable([1, 3, SIZE, SIZE]) y = network(x) dataIn = makeInput() x.d = dataIn.copy() y.forward() img = makePng(y.d) img.save(os.path.join(args.model_save_path, "first.png")) output = nn.Variable([1, 3, SIZE, SIZE]) dataOut = makeOutput("test.png") output.d = dataOut #loss = F.mean(F.sigmoid_cross_entropy(y, output)) loss = F.mean(F.squared_error(y, output)) param = nn.get_parameters() for i, j in param.items(): param.get(i).d = np.random.randn(*(j.d.shape)) solver = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("net"): solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_gen = M.MonitorImageTile("gen images", monitor) #data = data_iterator_mnist(args.batch_size, True) with nn.parameter_scope("net"): param = nn.get_parameters() print param.get("conv0/conv/W").d.reshape((16, 16))[:10, :10] # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("net"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) # Training forward x.d = dataIn.copy() # Generator update. solver.zero_grad() loss.forward(clear_no_need_grad=True) if i % 10 == 0: img = makePng(y.d) img.save(os.path.join(args.model_save_path, "output_%06d.png" % i)) #print "max",max(y.d.flatten()),"min",min(y.d.flatten()) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_gen.add(i, y) monitor_loss_gen.add(i, loss.d.copy()) monitor_time.add(i) with nn.parameter_scope("net"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) return
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path x1 = nn.Variable([args.batch_size, 1, 28, 28]) #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1]) #z = vectorizer(x1,maxh = 1024) #fake = generator(z,maxh= 1024) z = vectorizer(x1) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. - 0.5 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() monitor_vec1.add(i, fake) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i))
def train(): """ Main script. """ args = get_args() _ = nn.load_parameters(args.pretrained_model_path) if args.fine_tune: nnabla.parameter.pop_parameter('decoder/logits/affine/conv/W') nnabla.parameter.pop_parameter('decoder/logits/affine/conv/b') n_train_samples = args.train_samples n_val_samples = args.val_samples distributed = args.distributed compute_acc = args.compute_acc if distributed: # Communicator and Context from nnabla.ext_utils import get_extension_context extension_module = "cudnn" ctx = get_extension_context( extension_module, type_config=args.type_config) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank device_id = mpi_rank ctx.device_id = str(device_id) nn.set_default_context(ctx) else: # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) n_devices = 1 device_id = 0 # training data data = data_iterator_segmentation( args.train_samples, args.batch_size, args.train_dir, args.train_label_dir, target_width=args.image_width, target_height=args.image_height) # validation data vdata = data_iterator_segmentation(args.val_samples, args.batch_size, args.val_dir, args.val_label_dir, target_width=args.image_width, target_height=args.image_height) if distributed: data = data.slice( rng=None, num_of_slices=n_devices, slice_pos=device_id) vdata = vdata.slice( rng=None, num_of_slices=n_devices, slice_pos=device_id) num_classes = args.num_class # Workaround to start with the same initialized weights for all workers. np.random.seed(313) t_model = get_model( args, test=False) t_model.pred.persistent = True # Not clearing buffer of pred in backward t_pred2 = t_model.pred.unlinked() t_e = F.sum(F.top_n_error(t_pred2, t_model.label, axis=1) * t_model.mask) / F.sum(t_model.mask) v_model = get_model( args, test=True) v_model.pred.persistent = True # Not clearing buffer of pred in forward v_pred2 = v_model.pred.unlinked() v_e = F.sum(F.top_n_error(v_pred2, v_model.label, axis=1) * v_model.mask) / F.sum(t_model.mask) # Create Solver solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Load checkpoint start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Setting warmup. base_lr = args.learning_rate / n_devices warmup_iter = int(1. * n_train_samples / args.batch_size / args.accum_grad / n_devices) * args.warmup_epoch warmup_slope = base_lr * (n_devices - 1) / warmup_iter solver.set_learning_rate(base_lr) # Create monitor import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_miou = M.MonitorSeries("mean IOU", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed( "Validation time", monitor, interval=1) # save_nnp contents = save_nnp({'x': v_model.image}, { 'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Deeplabv3plus_result_epoch0.nnp'), contents, variable_batch_size=False) # Training loop for i in range(start_point, int(args.max_iter / n_devices)): # Save parameters if i % (args.model_save_interval // n_devices) == 0 and device_id == 0: save_checkpoint(args.model_save_path, i, solver) # Validation if i % (args.val_interval // n_devices) == 0 and i != 0: vmiou_local = 0. val_iter_local = n_val_samples // args.batch_size vl_local = nn.NdArray() vl_local.zero() ve_local = nn.NdArray() ve_local.zero() for j in range(val_iter_local): images, labels, masks = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.mask.d = masks v_model.image.data.cast(np.float32, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) vl_local += v_model.loss.data ve_local += v_e.data # Mean IOU computation if compute_acc: vmiou_local += compute_miou(num_classes, labels, np.argmax(v_model.pred.d, axis=1), masks) vl_local /= val_iter_local ve_local /= val_iter_local if compute_acc: vmiou_local /= val_iter_local vmiou_ndarray = nn.NdArray.from_numpy_array( np.array(vmiou_local)) if distributed: comm.all_reduce(vl_local, division=True, inplace=True) comm.all_reduce(ve_local, division=True, inplace=True) if compute_acc: comm.all_reduce(vmiou_ndarray, division=True, inplace=True) if device_id == 0: monitor_vloss.add(i * n_devices, vl_local.data.copy()) monitor_verr.add(i * n_devices, ve_local.data.copy()) if compute_acc: monitor_miou.add(i * n_devices, vmiou_local) monitor_vtime.add(i * n_devices) # Training l = 0.0 e = 0.0 solver.zero_grad() e_acc = nn.NdArray(t_e.shape) e_acc.zero() l_acc = nn.NdArray(t_model.loss.shape) l_acc.zero() # Gradient accumulation loop for j in range(args.accum_grad): images, labels, masks = data.next() t_model.image.d = images t_model.label.d = labels t_model.mask.d = masks t_model.image.data.cast(np.float32, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) e_acc += t_e.data l_acc += t_model.loss.data # AllReduce if distributed: params = [x.grad for x in nn.get_parameters().values()] comm.all_reduce(params, division=False, inplace=False) comm.all_reduce(l_acc, division=True, inplace=True) comm.all_reduce(e_acc, division=True, inplace=True) solver.scale_grad(1./args.accum_grad) solver.weight_decay(args.weight_decay) solver.update() # Linear Warmup if i <= warmup_iter: lr = base_lr + warmup_slope * i solver.set_learning_rate(lr) if distributed: # Synchronize by averaging the weights over devices using allreduce if (i+1) % args.sync_weight_every_itr == 0: weights = [x.data for x in nn.get_parameters().values()] comm.all_reduce(weights, division=True, inplace=True) if device_id == 0: monitor_loss.add( i * n_devices, (l_acc / args.accum_grad).data.copy()) monitor_err.add( i * n_devices, (e_acc / args.accum_grad).data.copy()) monitor_time.add(i * n_devices) # Learning rate decay at scheduled iter --> changed to poly learning rate decay policy # if i in args.learning_rate_decay_at: solver.set_learning_rate(base_lr * ((1 - i / args.max_iter)**0.1)) if device_id == 0: nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) contents = save_nnp({'x': v_model.image}, { 'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Deeplabv3plus_result.nnp'), contents, variable_batch_size=False)
def train(generator, discriminator, patch_gan, solver_gen, solver_dis, weight_l1, train_iterator, val_iterator, epoch, monitor, interval): # Create Network Graph # for training im, la = train_iterator.next() # for checking image shape real = nn.Variable(im.shape) # real x = nn.Variable(la.shape) # x # for validation real_val = nn.Variable(im.shape) # real x_val = nn.Variable(la.shape) # x # Generator fake = generator(x, test=False) # pix2pix infers just like training mode. fake_val = generator(x_val, test=False) fake_val.persistent = True # Keep to visualize # Discriminator fake_y = discriminator(x, fake, patch_gan=patch_gan, test=False) real_y = discriminator(x, real, patch_gan=patch_gan, test=False) real_target = nn.Variable(fake_y.shape) real_target.data.fill(1) fake_target = nn.Variable(real_y.shape) fake_target.data.zero() loss_gen = F.mean(weight_l1 * F.abs(real - fake)) + \ F.mean(F.sigmoid_cross_entropy(fake_y, real_target)) loss_dis = F.mean( F.sigmoid_cross_entropy(real_y, real_target) + F.sigmoid_cross_entropy(fake_y, fake_target)) # Setting Solvers with nn.parameter_scope('generator'): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope('discriminator'): solver_dis.set_parameters(nn.get_parameters()) # Create Monitors monitors = { 'loss_gen': nm.MonitorSeries("Generator loss", monitor, interval=interval), 'loss_dis': nm.MonitorSeries("Discriminator loss", monitor, interval=interval), 'time': nm.MonitorTimeElapsed("Training time", monitor, interval=interval), 'fake': nm.MonitorImageTile( "Fake images", monitor, interval=interval, num_images=2, normalize_method=lambda x: np.clip(np.divide(x, 255.0), 0.0, 1.0)), } i = 0 for e in range(epoch): logger.info('Epoch = {}'.format(e)) # Training while e == train_iterator.epoch: # forward / backward process real.d, x.d = train_iterator.next() solver_dis.zero_grad() solver_gen.zero_grad() # Discriminator loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.update() # Generator loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.update() monitors['time'].add(i) monitors['loss_gen'].add(i, loss_gen.d.copy()) monitors['loss_dis'].add(i, loss_dis.d.copy()) # Validation real_val.d, x_val.d = val_iterator.next() fake_val.forward() pix2pix_vis = np.stack( [label_to_image(x_val.d), normalize_image(fake_val.d)], axis=1).reshape((-1, ) + fake.shape[1:]) monitors['fake'].add(i, pix2pix_vis) i += 1 # save parameters of generator save_path = os.path.join(monitor._save_path, 'generator_model_{}.h5'.format(i)) with nn.parameter_scope('generator'): nn.save_parameters(save_path) return save_path
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create prediction graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create prediction graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(start_point, args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file)
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error( di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate( solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. nnp_file = os.path.join( args.model_save_path, 'vat_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batchsize_v, 'outputs': {'y': hv}, 'names': {'x': xv}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
def train(): parser = argparse.ArgumentParser() parser.add_argument("--train-file", type=str) parser.add_argument("--valid-file", type=str) parser.add_argument("--num-training-examples", type=int, default=250) parser.add_argument("--accum-grad", type=int, default=1) parser.add_argument("--valid-interval", type=int, default=200) parser.add_argument("--threshold", type=float, default=0.95) parser.add_argument("--context", type=str, default="cpu") parser.add_argument("--device-id", type=int, default=0) args = parser.parse_args() from nnabla.ext_utils import get_extension_context extension_module = args.context ctx = get_extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # prepare data iterators tdata = data_iterator( BAbI19DataSource(args.train_file, args.num_training_examples, shuffle=True), 1, False, False, False) vdata = data_iterator( BAbI19DataSource(args.valid_file, 1000, shuffle=True), 1, False, False, False) # prepare monitors monitor = M.Monitor("./bAbI19") tloss = M.MonitorSeries("Training Loss", monitor, interval=10) terror = M.MonitorSeries("Training Error", monitor, interval=10) verror = M.MonitorSeries("Validation Error", monitor, interval=1) # prepare solver solver = S.Adam() solver_initialized = False cnt = 0 while True: l = 0.0 e = 0.0 solver.zero_grad() for _ in range(args.accum_grad): # read next data x = tdata.next() V = x[1][0][0] E = x[2][0][0] ans = x[3][0][0] # construct GGNN ## convert to nn.Variable x = nn.Variable(V.shape) x.data.data = V h = nn.Variable((len(V), 6)) h.data.data = utils.h_0(V, 6) outputs = predict(V, E, len(ans)) losses = [] errors = [] for a, output in zip(ans, outputs): label = nn.Variable((1, 1)) label.data.data[0, 0] = a losses.append(F.mean(F.softmax_cross_entropy(output, label))) output2 = output.unlinked() errors.append(F.mean(F.top_n_error(output2, label))) # initialize solver if not solver_initialized: solver.set_parameters(nn.get_parameters()) solver_initialized = True solver.zero_grad() # calculate loss/error loss = F.mean(F.stack(*losses)) error = F.mean(F.stack(*errors)) F.sink(loss, error).forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) l += loss.data.data e += error.data.data # dump log tloss.add(cnt, l / args.accum_grad) terror.add(cnt, e / args.accum_grad) l = 0.0 e = 0.0 solver.update() cnt += 1 if cnt % args.valid_interval == 0: # validation validation_error = 0 correct_example = None wrong_example = None for _ in range(vdata.size): x = vdata.next() id2str = x[0][0][0] V = x[1][0][0] E = x[2][0][0] ans = x[3][0][0] # construct GGNN ## convert to nn.Variable x = nn.Variable(V.shape) x.data.data = V h = nn.Variable((len(V), 6)) h.data.data = utils.h_0(V, 6) outputs = predict(V, E, len(ans)) errors = [] actual = [] for a, output in zip(ans, outputs): label = nn.Variable((1, 1)) label.data.data[0, 0] = a errors.append(F.mean(F.top_n_error(output, label))) actual.append(output.data.data) error = F.mean(F.stack(*errors)) error.forward(clear_no_need_grad=True) x = 0.0 if error.data.data == 0: x = 0 else: x = 1 if x > 0.5: if wrong_example is None: wrong_example = (id2str, V, E, ans, actual) else: if correct_example is None: correct_example = (id2str, V, E, ans, actual) validation_error += x validation_error /= vdata.size verror.add(cnt, validation_error) accuracy = 1 - validation_error if accuracy >= args.threshold: def show(example): if "s" in example[2]: for i, j in example[2]["s"]: print("The {} is south the {}.".format( example[0][i], example[0][j])) if "n" in example[2]: for i, j in example[2]["n"]: print("The {} is north the {}.".format( example[0][i], example[0][j])) if "w" in example[2]: for i, j in example[2]["w"]: print("The {} is west the {}.".format( example[0][i], example[0][j])) if "e" in example[2]: for i, j in example[2]["e"]: print("The {} is east the {}.".format( example[0][i], example[0][j])) i = np.argmax(example[1][:, 0]) j = np.argmax(example[1][:, 1]) print("What is the path from {} to {}?".format( example[0][i], example[0][j])) for (expected, actual) in zip(example[3], example[4]): i = np.argmax(actual[0]) print("Expected: {}, Actual: {}".format( id2classes[expected], id2classes[i])) if correct_example is not None: show(correct_example) if wrong_example is not None: show(wrong_example) break