Esempio n. 1
0
def get_monitors(config, loss_flags, loss_var_dict, test=False):

    log_root_dir = config.monitor_params.monitor_path
    log_dir = os.path.join(log_root_dir, get_current_time())

    # if additional information is given, add it
    if "info" in config.monitor_params:
        info = config.monitor_params.info
        log_dir = f'{log_dir}_{info}'

    master_monitor_misc = nm.Monitor(log_dir)
    monitor_vis = nm.MonitorImage('images', master_monitor_misc,
                                  interval=1, num_images=4,
                                  normalize_method=lambda x: x)
    if test:
        # when inference, returns the visualization monitor only
        return monitor_vis

    interval = config.monitor_params.monitor_freq
    monitoring_var_dict_gen = dict()
    monitoring_var_dict_dis = dict()

    if loss_flags.use_perceptual_loss:
        monitoring_var_dict_gen.update(
            {'perceptual_loss': loss_var_dict['perceptual_loss']})

    if loss_flags.use_gan_loss:
        monitoring_var_dict_gen.update(
            {'gan_loss_gen': loss_var_dict['gan_loss_gen']})

    if loss_flags.use_gan_loss:
        monitoring_var_dict_dis.update(
            {'gan_loss_dis': loss_var_dict['gan_loss_dis']})

    if loss_flags.use_feature_matching_loss:
        monitoring_var_dict_gen.update(
            {'feature_matching_loss': loss_var_dict['feature_matching_loss']})

    if loss_flags.use_equivariance_value_loss:
        monitoring_var_dict_gen.update(
            {'equivariance_value_loss': loss_var_dict['equivariance_value_loss']})

    if loss_flags.use_equivariance_jacobian_loss:
        monitoring_var_dict_gen.update(
            {'equivariance_jacobian_loss': loss_var_dict['equivariance_jacobian_loss']})

    monitoring_var_dict_gen.update(
        {'total_loss_gen': loss_var_dict['total_loss_gen']})

    master_monitor_gen = nm.Monitor(log_dir)
    master_monitor_dis = nm.Monitor(log_dir)

    monitors_gen = MonitorManager(monitoring_var_dict_gen,
                                  master_monitor_gen, interval=interval)
    monitors_dis = MonitorManager(monitoring_var_dict_dis,
                                  master_monitor_dis, interval=interval)
    monitor_time = nm.MonitorTimeElapsed('time_training',
                                         master_monitor_misc, interval=interval)

    return monitors_gen, monitors_dis, monitor_time, monitor_vis, log_dir
Esempio n. 2
0
 def __init__(self,
              name,
              monitor,
              comm,
              loss,
              error,
              batch_size,
              time=True,
              flush_interval=10):
     self.name = name
     self.comm = comm
     self.epoch_loss = 0.0
     self.epoch_error = 0
     self.batch_counter = 0
     self.loss = loss
     self.error = error
     self.batch_size = batch_size
     if self.comm.rank == 0:
         self.monitor_loss = M.MonitorSeries("%s loss" % name,
                                             monitor,
                                             interval=1)
         self.monitor_err = M.MonitorSeries("%s error" % name,
                                            monitor,
                                            interval=1)
         self.monitor_time = None
         if time:
             self.monitor_time = M.MonitorTimeElapsed("Epoch time",
                                                      monitor,
                                                      interval=1)
     self.flush_interval = flush_interval
Esempio n. 3
0
    def __init__(self, save_path, series_losses, interval=1, save_time=True):
        self.monitor = M.Monitor(save_path)

        self.series_monitors = {}
        for loss_name, loss in series_losses.items():
            self.series_monitors[loss_name] = M.MonitorSeries(
                loss_name, self.monitor, interval=interval)

        if save_time:
            self.monitor_time = M.MonitorTimeElapsed("Epoch time",
                                                     self.monitor,
                                                     interval=interval)
Esempio n. 4
0
    def __init__(self, save_path, interval=1, save_time=True, silent=False):
        self.monitor = M.Monitor(save_path)
        self.interval = interval
        self.silent = silent

        self.series_monitors = {}

        if save_time:
            self.monitor_time = M.MonitorTimeElapsed("Epoch time",
                                                     self.monitor,
                                                     interval=interval,
                                                     verbose=not self.silent)
Esempio n. 5
0
def train(max_iter=60000):
    # Initialize data provider
    di_l = I.data_iterator_mnist(batch_size, True)
    di_t = I.data_iterator_mnist(batch_size, False)

    # Network
    shape_x = (1, 28, 28)
    shape_z = (50, )
    x = nn.Variable((batch_size, ) + shape_x)
    loss_l = I.vae(x, shape_z, test=False)
    loss_t = I.vae(x, shape_z, test=True)

    # Create solver
    solver = S.Adam(learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitors for training and validation
    path = cache_dir(os.path.join(I.name, "monitor"))
    monitor = M.Monitor(path)
    monitor_train_loss = M.MonitorSeries("train_loss", monitor, interval=600)
    monitor_val_loss = M.MonitorSeries("val_loss", monitor, interval=600)
    monitor_time = M.MonitorTimeElapsed("time", monitor, interval=600)

    # Training Loop.
    for i in range(max_iter):

        # Initialize gradients
        solver.zero_grad()

        # Forward, backward and update
        x.d, _ = di_l.next()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(weight_decay)
        solver.update()

        # Forward for test
        x.d, _ = di_t.next()
        loss_t.forward(clear_no_need_grad=True)

        # Monitor for logging
        monitor_train_loss.add(i, loss_l.d.copy())
        monitor_val_loss.add(i, loss_t.d.copy())
        monitor_time.add(i)

    return path
Esempio n. 6
0
def train(max_iter=5000, learning_rate=0.001, weight_decay=0):
    train = create_net(False)
    test = create_net(True)

    # ソルバーの作成
    solver = S.Adam(learning_rate)
    solver.set_parameters(nn.get_parameters())

    # モニタの作成
    path = cache_dir(os.path.join(I.name, "monitor"))
    monitor = M.Monitor(path)
    monitor_loss_train = M.MonitorSeries("training_loss", monitor, interval=100)
    monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100)
    monitor_loss_val = M.MonitorSeries("val_loss", monitor, interval=100)

    # 訓練の実行
    for i in range(max_iter):
        if (i + 1) % 100 == 0:
            val_error = 0.0
            val_iter = 10
            for j in range(val_iter):
                test.image0.d, test.image1.d, test.label.d = test.data.next()
                test.loss.forward(clear_buffer=True)
                val_error += test.loss.d
            monitor_loss_val.add(i, val_error / val_iter)
        train.image0.d, train.image1.d, train.label.d = train.data.next()
        solver.zero_grad()
        train.loss.forward(clear_no_need_grad=True)
        train.loss.backward(clear_buffer=True)
        solver.weight_decay(weight_decay)
        solver.update()
        monitor_loss_train.add(i, train.loss.d.copy())
        monitor_time.add(i)

        nn.save_parameters(os.path.join(path, "params.h5"))
        return path
Esempio n. 7
0
def main():
    """
    Main script.
    Steps:
    * Setup calculation environment
    * Initialize data iterator.
    * Create Networks
    * Create Solver.
    * Training Loop.
    *   Training
    *   Test
    * Save
    """

    # Set args
    args = get_args(monitor_path='tmp.monitor.vae',
                    max_iter=60000,
                    model_save_path=None,
                    learning_rate=3e-4,
                    batch_size=100,
                    weight_decay=0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Initialize data provider
    di_l = data_iterator_mnist(args.batch_size, True)
    di_t = data_iterator_mnist(args.batch_size, False)

    # Network
    shape_x = (1, 28, 28)
    shape_z = (50, )
    x = nn.Variable((args.batch_size, ) + shape_x)
    loss_l = vae(x, shape_z, test=False)
    loss_t = vae(x, shape_z, test=True)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitors for training and validation
    monitor = M.Monitor(args.model_save_path)
    monitor_training_loss = M.MonitorSeries("Training loss",
                                            monitor,
                                            interval=600)
    monitor_test_loss = M.MonitorSeries("Test loss", monitor, interval=600)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=600)

    # Training Loop.
    for i in range(args.max_iter):

        # Initialize gradients
        solver.zero_grad()

        # Forward, backward and update
        x.d, _ = di_l.next()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Forward for test
        x.d, _ = di_t.next()
        loss_t.forward(clear_no_need_grad=True)

        # Monitor for logging
        monitor_training_loss.add(i, loss_l.d.copy())
        monitor_test_loss.add(i, loss_t.d.copy())
        monitor_time.add(i)

    # Save the model
    nn.save_parameters(
        os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))
Esempio n. 8
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create prediction graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create prediction graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
Esempio n. 9
0
def train():
    """
    Main script.
    """

    args = get_args()

    _ = nn.load_parameters(args.pretrained_model_path)
    if args.fine_tune:
        nnabla.parameter.pop_parameter('decoder/logits/affine/conv/W')
        nnabla.parameter.pop_parameter('decoder/logits/affine/conv/b')

    n_train_samples = args.train_samples
    n_val_samples = args.val_samples
    distributed = args.distributed
    compute_acc = args.compute_acc

    if distributed:
        # Communicator and Context
        from nnabla.ext_utils import get_extension_context
        extension_module = "cudnn"
        ctx = get_extension_context(
            extension_module, type_config=args.type_config)
        comm = C.MultiProcessDataParalellCommunicator(ctx)
        comm.init()
        n_devices = comm.size
        mpi_rank = comm.rank
        device_id = mpi_rank
        ctx.device_id = str(device_id)
        nn.set_default_context(ctx)
    else:
        # Get context.
        from nnabla.ext_utils import get_extension_context
        extension_module = args.context
        if args.context is None:
            extension_module = 'cpu'
        logger.info("Running in %s" % extension_module)
        ctx = get_extension_context(
            extension_module, device_id=args.device_id, type_config=args.type_config)
        nn.set_default_context(ctx)
        n_devices = 1
        device_id = 0

    # training data
    data = data_iterator_segmentation(
            args.train_samples, args.batch_size, args.train_dir, args.train_label_dir, target_width=args.image_width, target_height=args.image_height)
    # validation data
    vdata = data_iterator_segmentation(args.val_samples, args.batch_size, args.val_dir,
                                       args.val_label_dir, target_width=args.image_width, target_height=args.image_height)

    if distributed:
        data = data.slice(
            rng=None, num_of_slices=n_devices, slice_pos=device_id)
        vdata = vdata.slice(
            rng=None, num_of_slices=n_devices, slice_pos=device_id)
    num_classes = args.num_class

    # Workaround to start with the same initialized weights for all workers.
    np.random.seed(313)
    t_model = get_model(
        args, test=False)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    t_pred2 = t_model.pred.unlinked()
    t_e = F.sum(F.top_n_error(t_pred2, t_model.label, axis=1)
                * t_model.mask) / F.sum(t_model.mask)

    v_model = get_model(
        args, test=True)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward
    v_pred2 = v_model.pred.unlinked()
    v_e = F.sum(F.top_n_error(v_pred2, v_model.label, axis=1)
                * v_model.mask) / F.sum(t_model.mask)

    # Create Solver
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Load checkpoint
    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Setting warmup.
    base_lr = args.learning_rate / n_devices
    warmup_iter = int(1. * n_train_samples /
                      args.batch_size / args.accum_grad / n_devices) * args.warmup_epoch
    warmup_slope = base_lr * (n_devices - 1) / warmup_iter
    solver.set_learning_rate(base_lr)

    # Create monitor
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_miou = M.MonitorSeries("mean IOU", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed(
        "Validation time", monitor, interval=1)

    # save_nnp
    contents = save_nnp({'x': v_model.image}, {
                        'y': v_model.pred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           'Deeplabv3plus_result_epoch0.nnp'), contents, variable_batch_size=False)

    # Training loop
    for i in range(start_point, int(args.max_iter / n_devices)):
        # Save parameters
        if i % (args.model_save_interval // n_devices) == 0 and device_id == 0:
            save_checkpoint(args.model_save_path, i, solver)
        # Validation
        if i % (args.val_interval // n_devices) == 0 and i != 0:
            vmiou_local = 0.
            val_iter_local = n_val_samples // args.batch_size
            vl_local = nn.NdArray()
            vl_local.zero()
            ve_local = nn.NdArray()
            ve_local.zero()
            for j in range(val_iter_local):
                images, labels, masks = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.mask.d = masks
                v_model.image.data.cast(np.float32, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                vl_local += v_model.loss.data
                ve_local += v_e.data
                # Mean IOU computation
                if compute_acc:
                    vmiou_local += compute_miou(num_classes, labels,
                                                np.argmax(v_model.pred.d, axis=1), masks)

            vl_local /= val_iter_local
            ve_local /= val_iter_local
            if compute_acc:
                vmiou_local /= val_iter_local
                vmiou_ndarray = nn.NdArray.from_numpy_array(
                    np.array(vmiou_local))
            if distributed:
                comm.all_reduce(vl_local, division=True, inplace=True)
                comm.all_reduce(ve_local, division=True, inplace=True)
                if compute_acc:
                    comm.all_reduce(vmiou_ndarray, division=True, inplace=True)

            if device_id == 0:
                monitor_vloss.add(i * n_devices, vl_local.data.copy())
                monitor_verr.add(i * n_devices, ve_local.data.copy())
                if compute_acc:
                    monitor_miou.add(i * n_devices, vmiou_local)
                monitor_vtime.add(i * n_devices)

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        e_acc = nn.NdArray(t_e.shape)
        e_acc.zero()
        l_acc = nn.NdArray(t_model.loss.shape)
        l_acc.zero()
        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels, masks = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.mask.d = masks
            t_model.image.data.cast(np.float32, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)
            e_acc += t_e.data
            l_acc += t_model.loss.data

        # AllReduce
        if distributed:
            params = [x.grad for x in nn.get_parameters().values()]
            comm.all_reduce(params, division=False, inplace=False)
            comm.all_reduce(l_acc, division=True, inplace=True)
            comm.all_reduce(e_acc, division=True, inplace=True)
        solver.scale_grad(1./args.accum_grad)
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Linear Warmup
        if i <= warmup_iter:
            lr = base_lr + warmup_slope * i
            solver.set_learning_rate(lr)

        if distributed:
            # Synchronize by averaging the weights over devices using allreduce
            if (i+1) % args.sync_weight_every_itr == 0:
                weights = [x.data for x in nn.get_parameters().values()]
                comm.all_reduce(weights, division=True, inplace=True)

        if device_id == 0:
            monitor_loss.add(
                i * n_devices, (l_acc / args.accum_grad).data.copy())
            monitor_err.add(
                i * n_devices, (e_acc / args.accum_grad).data.copy())
            monitor_time.add(i * n_devices)

        # Learning rate decay at scheduled iter --> changed to poly learning rate decay policy
        # if i in args.learning_rate_decay_at:
        solver.set_learning_rate(base_lr * ((1 - i / args.max_iter)**0.1))

    if device_id == 0:
        nn.save_parameters(os.path.join(args.model_save_path,
                                        'param_%06d.h5' % args.max_iter))

    contents = save_nnp({'x': v_model.image}, {
                        'y': v_model.pred}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           'Deeplabv3plus_result.nnp'), contents, variable_batch_size=False)
Esempio n. 10
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Dataset
    # We use Tiny ImageNet from Stanford CS231N class.
    # https://tiny-imagenet.herokuapp.com/
    # Tiny ImageNet consists of 200 categories, each category has 500 images
    # in training set. The image size is 64x64. To adapt ResNet into 64x64
    # image inputs, the input image size of ResNet is set as 56x56, and
    # the stride in the first conv and the first max pooling are removed.
    data = data_iterator_tiny_imagenet(args.batch_size, 'train')
    vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')

    num_classes = 200
    tiny = True  # TODO: Switch ILSVRC2012 dataset and TinyImageNet.
    t_model = get_model(
        args, num_classes, test=False, tiny=tiny)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    v_model = get_model(
        args, num_classes, test=True, tiny=tiny)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % args.val_interval == 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                l += v_model.loss.d
                e += categorical_error(v_model.pred.d, v_model.label.d)
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            l += t_model.loss.d
            e += categorical_error(t_model.pred.d, t_model.label.d)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(os.path.join(args.model_save_path,
                                    'param_%06d.h5' % args.max_iter))
def train():
    """
    Main script.

    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Instantiate a communicator and set parameter variables.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error

    """

    args = get_args()
    if args.tiny_mode:
        n_train_samples = 100000
    else:
        n_train_samples = 1282167

    # Communicator and Context
    from nnabla.ext_utils import get_extension_context
    extension_module = "cudnn"
    ctx = get_extension_context(extension_module, type_config=args.type_config)
    comm = C.MultiProcessDataParalellCommunicator(ctx)
    comm.init()
    n_devices = comm.size
    mpi_rank = comm.rank
    device_id = mpi_rank
    ctx.device_id = str(device_id)
    nn.set_default_context(ctx)

    # workarond to start with the same parameters.
    rng = np.random.RandomState(device_id)
    if args.tiny_mode:
        # We use Tiny ImageNet from Stanford CS231N class.
        # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/)
        # Tiny ImageNet consists of 200 categories, each category has 500 images
        # in training set. The image size is 64x64. To adapt ResNet into 64x64
        # image inputs, the input image size of ResNet is set as 56x56, and
        # the stride in the first conv and the first max pooling are removed.
        # Please check README.
        data = data_iterator_tiny_imagenet(args.batch_size, 'train')
        vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')
        num_classes = 200
    else:
        # We use ImageNet.
        # (ImageNet, https://imagenet.herokuapp.com/)
        # ImageNet consists of 1000 categories, each category has 1280 images
        # in training set. The image size is various. To adapt ResNet into
        # 320x320 image inputs, the input image size of ResNet is set as
        # 224x224. We need to get tar file and create cache file(320x320 images).
        # Please check README.
        data = data_iterator_imagenet(args.batch_size,
                                      args.train_cachefile_dir,
                                      rng=rng)
        vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir)
        vdata = vdata.slice(rng=None,
                            num_of_slices=n_devices,
                            slice_pos=device_id)
        num_classes = 1000
    # Workaround to start with the same initialized weights for all workers.
    np.random.seed(313)
    t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    t_pred2 = t_model.pred.unlinked()
    t_e = F.mean(F.top_n_error(t_pred2, t_model.label))
    v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward
    v_pred2 = v_model.pred.unlinked()
    v_e = F.mean(F.top_n_error(v_pred2, v_model.label))

    # Add parameters to communicator.
    comm.add_context_and_parameters((ctx, nn.get_parameters()))

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Setting warmup.
    base_lr = args.learning_rate / n_devices
    warmup_iter = int(1. * n_train_samples / args.batch_size /
                      args.accum_grad / n_devices) * args.warmup_epoch
    warmup_slope = base_lr * (n_devices - 1) / warmup_iter
    solver.set_learning_rate(base_lr)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=1)

    # Training loop.
    vl = nn.Variable()
    ve = nn.Variable()
    for i in range(int(args.max_iter / n_devices)):
        # Save parameters
        if i % (args.model_save_interval // n_devices) == 0 and device_id == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % (args.val_interval // n_devices) == 0 and i != 0:
            ve_local = 0.
            vl_local = 0.
            val_iter_local = args.val_iter // n_devices
            for j in range(val_iter_local):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                vl_local += v_model.loss.d.copy()
                ve_local += v_e.d.copy()
            vl_local /= val_iter_local
            vl.d = vl_local
            comm.all_reduce(vl.data, division=True, inplace=True)
            ve_local /= val_iter_local
            ve.d = ve_local
            comm.all_reduce(ve.data, division=True, inplace=True)

            if device_id == 0:
                monitor_vloss.add(i * n_devices, vl.d.copy())
                monitor_verr.add(i * n_devices, ve.d.copy())
                monitor_vtime.add(i * n_devices)

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        def accumulate_error(l, e, t_model, t_e):
            l += t_model.loss.d
            e += t_e.d
            return l, e

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            if j != 0:
                # Update e and l according to previous results of forward
                # propagation.
                # The update of last iteration is performed
                # after solver update to avoid unnecessary CUDA synchronization.
                # This is performed after data.next() in order to overlap
                # the data loading and graph execution.
                # TODO: Move this to the bottom of the loop when prefetch
                # data loader is available.
                l, e = accumulate_error(l, e, t_model, t_e)
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)

        # AllReduce
        params = [x.grad for x in nn.get_parameters().values()]
        comm.all_reduce(params, division=False, inplace=False)

        # Update
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Accumulate errors after solver update
        l, e = accumulate_error(l, e, t_model, t_e)

        # Linear Warmup
        if i <= warmup_iter:
            lr = base_lr + warmup_slope * i
            solver.set_learning_rate(lr)

        # Synchronize by averaging the weights over devices using allreduce
        if (i + 1) % args.sync_weight_every_itr == 0:
            weights = [x.data for x in nn.get_parameters().values()]
            comm.all_reduce(weights, division=True, inplace=True)

        if device_id == 0:
            monitor_loss.add(i * n_devices, l / args.accum_grad)
            monitor_err.add(i * n_devices, e / args.accum_grad)
            monitor_time.add(i * n_devices)

        # Learning rate decay at scheduled iter
        if i * n_devices in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)

    if device_id == 0:
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         'param_%06d.h5' % (args.max_iter / n_devices)))
Esempio n. 12
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    x1 = nn.Variable([args.batch_size, 1, 28, 28])

    #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1])
    #z = vectorizer(x1,maxh = 1024)
    #fake = generator(z,maxh= 1024)
    z = vectorizer(x1)
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    loss_vec = F.mean(F.squared_error(fake, x1))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    solver_vec = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("vec"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec1 = M.MonitorImageTile("vec images1",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec2 = M.MonitorImageTile("vec images2",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()

        x1.d = image / 255. - 0.5
        # Generator update.
        solver_vec.zero_grad()
        loss_vec.forward(clear_no_need_grad=True)
        loss_vec.backward(clear_buffer=True)
        solver_vec.weight_decay(args.weight_decay)
        solver_vec.update()
        monitor_vec1.add(i, fake)
        monitor_vec2.add(i, x1)
        monitor_loss_vec.add(i, loss_vec.d.copy())

        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))
def train_transformer(config, netG, netD, solver_netG, solver_netD,
                      train_iterators, monitor):

    netG_A2B, netG_B2A = netG['netG_A2B'], netG['netG_B2A']
    netD_A, netD_B = netD['netD_A'], netD['netD_B']
    solver_netG_AB, solver_netG_BA = solver_netG['netG_A2B'], solver_netG[
        'netG_B2A']
    solver_netD_A, solver_netD_B = solver_netD['netD_A'], solver_netD['netD_B']

    train_iterator_src, train_iterator_trg = train_iterators

    if config["train"][
            "cycle_loss"] and config["train"]["cycle_loss"]["lambda"] > 0:
        print(
            f'Applying Cycle Loss, weight: {config["train"]["cycle_loss"]["lambda"]}.'
        )
        with_cycle_loss = True
    else:
        with_cycle_loss = False

    if config["train"][
            "shape_loss"] and config["train"]["shape_loss"]["lambda"] > 0:
        print(
            f'Applying Shape Loss using PCA, weight: {config["train"]["shape_loss"]["lambda"]}.'
        )
        with_shape_loss = True
    else:
        with_shape_loss = False

    # Load boundary image to get Variable shapes
    bod_map_A = train_iterator_src.next()[0]
    bod_map_B = train_iterator_trg.next()[0]
    real_bod_map_A = nn.Variable(bod_map_A.shape)
    real_bod_map_B = nn.Variable(bod_map_B.shape)
    real_bod_map_A.persistent, real_bod_map_B.persistent = True, True

    ################### Graph Construction ####################
    # Generator
    with nn.parameter_scope('netG_transformer'):
        with nn.parameter_scope('netG_A2B'):
            fake_bod_map_B = netG_A2B(
                real_bod_map_A, test=False,
                norm_type=config["norm_type"])  # (1, 15, 64, 64)
        with nn.parameter_scope('netG_B2A'):
            fake_bod_map_A = netG_B2A(
                real_bod_map_B, test=False,
                norm_type=config["norm_type"])  # (1, 15, 64, 64)
    fake_bod_map_B.persistent, fake_bod_map_A.persistent = True, True

    fake_bod_map_B_unlinked = fake_bod_map_B.get_unlinked_variable()
    fake_bod_map_A_unlinked = fake_bod_map_A.get_unlinked_variable()

    # Reconstruct images if cycle loss is applied.
    if with_cycle_loss:
        with nn.parameter_scope('netG_transformer'):
            with nn.parameter_scope('netG_B2A'):
                recon_bod_map_A = netG_B2A(
                    fake_bod_map_B_unlinked,
                    test=False,
                    norm_type=config["norm_type"])  # (1, 15, 64, 64)
            with nn.parameter_scope('netG_A2B'):
                recon_bod_map_B = netG_A2B(
                    fake_bod_map_A_unlinked,
                    test=False,
                    norm_type=config["norm_type"])  # (1, 15, 64, 64)
        recon_bod_map_A.persistent, recon_bod_map_B.persistent = True, True

    # Discriminator
    with nn.parameter_scope('netD_transformer'):
        with nn.parameter_scope('netD_A'):
            pred_fake_A = netD_A(fake_bod_map_A_unlinked, test=False)
            pred_real_A = netD_A(real_bod_map_A, test=False)
        with nn.parameter_scope('netD_B'):
            pred_fake_B = netD_B(fake_bod_map_B_unlinked, test=False)
            pred_real_B = netD_B(real_bod_map_B, test=False)
    real_target = F.constant(1, pred_fake_A.shape)
    fake_target = F.constant(0, pred_real_A.shape)

    ################### Loss Definition ####################
    # Generator loss
    # LSGAN loss
    loss_gan_A = lsgan_loss(pred_fake_A, real_target)
    loss_gan_B = lsgan_loss(pred_fake_B, real_target)
    loss_gan_A.persistent, loss_gan_B.persistent = True, True
    loss_gan = loss_gan_A + loss_gan_B

    # Cycle loss
    if with_cycle_loss:
        loss_cycle_A = recon_loss(recon_bod_map_A, real_bod_map_A)
        loss_cycle_B = recon_loss(recon_bod_map_B, real_bod_map_B)
        loss_cycle_A.persistent, loss_cycle_B.persistent = True, True
        loss_cycle = loss_cycle_A + loss_cycle_B

    # Shape loss
    if with_shape_loss:
        with nn.parameter_scope("Align"):
            nn.load_parameters(
                config["train"]["shape_loss"]["align_param_path"])
            shape_bod_map_real_A = models.align_resnet(real_bod_map_A,
                                                       fix_parameters=True)
            shape_bod_map_fake_B = models.align_resnet(fake_bod_map_B_unlinked,
                                                       fix_parameters=True)

            shape_bod_map_real_B = models.align_resnet(real_bod_map_B,
                                                       fix_parameters=True)
            shape_bod_map_fake_A = models.align_resnet(fake_bod_map_A_unlinked,
                                                       fix_parameters=True)

        with nn.parameter_scope("PCA"):
            nn.load_parameters(config["train"]["shape_loss"]["PCA_param_path"])
            shape_bod_map_real_A = PF.affine(shape_bod_map_real_A,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_real_A = shape_bod_map_real_A[:, :3]

            shape_bod_map_fake_B = PF.affine(shape_bod_map_fake_B,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_fake_B = shape_bod_map_fake_B[:, :3]

            shape_bod_map_real_B = PF.affine(shape_bod_map_real_B,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_real_B = shape_bod_map_real_B[:, :3]

            shape_bod_map_fake_A = PF.affine(shape_bod_map_fake_A,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_fake_A = shape_bod_map_fake_A[:, :3]

        shape_bod_map_real_A.persistent, shape_bod_map_fake_A.persistent = True, True
        shape_bod_map_real_B.persistent, shape_bod_map_fake_B.persistent = True, True

        loss_shape_A = recon_loss(shape_bod_map_real_A, shape_bod_map_fake_B)
        loss_shape_B = recon_loss(shape_bod_map_real_B, shape_bod_map_fake_A)
        loss_shape_A.persistent, loss_shape_B.persistent = True, True
        loss_shape = loss_shape_A + loss_shape_B

    # Total Generator Loss
    loss_netG = loss_gan

    if with_cycle_loss:
        loss_netG += loss_cycle * config["train"]["cycle_loss"]["lambda"]

    if with_shape_loss:
        loss_netG += loss_shape * config["train"]["shape_loss"]["lambda"]

    # Discriminator loss
    loss_netD_A = lsgan_loss(pred_real_A, real_target) + \
        lsgan_loss(pred_fake_A, fake_target)
    loss_netD_B = lsgan_loss(pred_real_B, real_target) + \
        lsgan_loss(pred_fake_B, fake_target)
    loss_netD_A.persistent, loss_netD_B.persistent = True, True

    loss_netD = loss_netD_A + loss_netD_B

    ################### Setting Solvers ####################
    # Generator solver
    with nn.parameter_scope('netG_transformer'):
        with nn.parameter_scope('netG_A2B'):
            solver_netG_AB.set_parameters(nn.get_parameters())
        with nn.parameter_scope('netG_B2A'):
            solver_netG_BA.set_parameters(nn.get_parameters())

    # Discrimintar solver
    with nn.parameter_scope('netD_transformer'):
        with nn.parameter_scope('netD_A'):
            solver_netD_A.set_parameters(nn.get_parameters())
        with nn.parameter_scope('netD_B'):
            solver_netD_B.set_parameters(nn.get_parameters())

    ################### Create Monitors ####################
    interval = config["monitor"]["interval"]
    monitors_G_dict = {
        'loss_netG': loss_netG,
        'loss_gan_A': loss_gan_A,
        'loss_gan_B': loss_gan_B
    }

    if with_cycle_loss:
        monitors_G_dict.update({
            'loss_cycle_A': loss_cycle_A,
            'loss_cycle_B': loss_cycle_B
        })

    if with_shape_loss:
        monitors_G_dict.update({
            'loss_shape_A': loss_shape_A,
            'loss_shape_B': loss_shape_B
        })

    monitors_G = MonitorManager(monitors_G_dict, monitor, interval=interval)

    monitors_D_dict = {
        'loss_netD': loss_netD,
        'loss_netD_A': loss_netD_A,
        'loss_netD_B': loss_netD_B
    }
    monitors_D = MonitorManager(monitors_D_dict, monitor, interval=interval)

    monitor_time = nm.MonitorTimeElapsed('time_training',
                                         monitor,
                                         interval=interval)
    monitor_vis = nm.MonitorImage('result',
                                  monitor,
                                  interval=1,
                                  num_images=4,
                                  normalize_method=lambda x: x)

    # Dump training information
    with open(os.path.join(monitor._save_path, "training_info.yaml"),
              "w",
              encoding="utf-8") as f:
        f.write(yaml.dump(config))

    # Training
    epoch = config["train"]["epochs"]
    i = 0
    iter_per_epoch = train_iterator_src.size // config["train"][
        "batch_size"] + 1
    for e in range(epoch):
        logger.info(f'Epoch = {e} / {epoch}')
        train_iterator_src._reset()  # rewind the iterator
        train_iterator_trg._reset()  # rewind the iterator
        for _ in range(iter_per_epoch):
            bod_map_A = train_iterator_src.next()[0]
            bod_map_B = train_iterator_trg.next()[0]
            real_bod_map_A.d, real_bod_map_B.d = bod_map_A, bod_map_B

            # Generate fake image
            fake_bod_map_B.forward(clear_no_need_grad=True)
            fake_bod_map_A.forward(clear_no_need_grad=True)

            # Update Discriminator
            solver_netD_A.zero_grad()
            solver_netD_B.zero_grad()
            loss_netD.forward(clear_no_need_grad=True)
            loss_netD.backward(clear_buffer=True)
            if config["train"]["weight_decay"]:
                solver_netD_A.weight_decay(config["train"]["weight_decay"])
                solver_netD_B.weight_decay(config["train"]["weight_decay"])
            solver_netD_A.update()
            solver_netD_B.update()

            # Update Generator
            solver_netG_BA.zero_grad()
            solver_netG_AB.zero_grad()
            solver_netD_A.zero_grad()
            solver_netD_B.zero_grad()
            fake_bod_map_B_unlinked.grad.zero()
            fake_bod_map_A_unlinked.grad.zero()
            loss_netG.forward(clear_no_need_grad=True)
            loss_netG.backward(clear_buffer=True)
            fake_bod_map_B.backward(grad=None)
            fake_bod_map_A.backward(grad=None)
            solver_netG_AB.update()
            solver_netG_BA.update()

            # Monitors
            monitor_time.add(i)
            monitors_G.add(i)
            monitors_D.add(i)

            i += 1

        images_to_visualize = [
            real_bod_map_A.d, fake_bod_map_B.d, real_bod_map_B.d
        ]
        if with_cycle_loss:
            images_to_visualize.extend(
                [recon_bod_map_A.d, fake_bod_map_A.d, recon_bod_map_B.d])
        else:
            images_to_visualize.extend([fake_bod_map_A.d])
        visuals = combine_images(images_to_visualize)
        monitor_vis.add(i, visuals)

        if e % config["monitor"]["save_interval"] == 0 or e == epoch - 1:
            # Save parameters of networks
            netG_B2A_save_path = os.path.join(monitor._save_path,
                                              f'netG_transformer_B2A_{e}.h5')
            netG_A2B_save_path = os.path.join(monitor._save_path,
                                              f'netG_transformer_A2B_{e}.h5')
            with nn.parameter_scope('netG_transformer'):
                with nn.parameter_scope('netG_A2B'):
                    nn.save_parameters(netG_A2B_save_path)
                with nn.parameter_scope('netG_B2A'):
                    nn.save_parameters(netG_B2A_save_path)

            netD_A_save_path = os.path.join(monitor._save_path,
                                            f'netD_transformer_A_{e}.h5')
            netD_B_save_path = os.path.join(monitor._save_path,
                                            f'netD_transformer_B_{e}.h5')
            with nn.parameter_scope('netD_transformer'):
                with nn.parameter_scope('netD_A'):
                    nn.save_parameters(netD_A_save_path)
                with nn.parameter_scope('netD_B'):
                    nn.save_parameters(netD_B_save_path)
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
		#nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_522000.h5")
	#z.d = np.random.randn(*z.shape)
	#gen.forward()
	#for i in range(40):
	#    Image.fromarray(np.uint8((gen.d[i][0]+1)*255/2.0)).save("./test/"+str(i)+".png")


    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    x_ref = nn.Variable([args.batch_size, 1, 28, 28])
    #vec = nn.Variable([args.batch_size, 100])
    pred_vec = vectorizer(x,test = False)
    print pred_vec.shape
    #z = pred_vec.reshape((args.batch_size, 100, 1, 1))
    #gen = generator(z,test=True)
    gen = generator(pred_vec,test=False)
    gen.persistent = True
    with nn.parameter_scope("gen"):
    	nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_290000.h5")
	#loss_dis = F.mean(F.sigmoid_cross_entropy(pred_vec, vec))
    print "x_ref shape",x_ref.shape
    print "gen shape",gen.shape
    #loss_dis = F.mean(F.squared_error(x_ref.reshape((64,28*28)), gen.reshape((64,28*28))))
    loss_dis = F.mean(F.squared_error(x_ref, gen))
    print loss_dis.d

    # Create Solver.
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_dis = M.MonitorSeries(
        "Discriminator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(),args.batch_size,True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("dis"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "vectorizer_param_%06d.h5" % i))

        # Training forward
        buf,buf2 = data.next()
        x.d = buf*2.0/255. - 1.0
        x_ref.d = buf*2.0/255. - 1.0

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("dis"):
        nn.save_parameters(os.path.join(
            args.model_save_path, "vectorizer_param_%06d.h5" % i))
Esempio n. 15
0
def train(args):
    x1 = nn.Variable([args.batch_size, 1, 28, 28])
    z_vec = vectorizer(x1)
    z = z_vec.unlinked()
    fake2 = generator(z_vec)
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    loss_vec = F.mean(F.squared_error(fake2, x1))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    solver_vec = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("vec"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec1 = M.MonitorImageTile("vec images1",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec2 = M.MonitorImageTile("vec images2",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))
            with nn.parameter_scope("vec"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "vectorizer_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()

        x1.d = image / 255. * 2 - 1.0
        # Generator update.
        solver_vec.zero_grad()
        loss_vec.forward(clear_no_need_grad=True)
        loss_vec.backward(clear_buffer=True)
        solver_vec.weight_decay(args.weight_decay)
        solver_vec.update()
        fake2.forward()
        monitor_vec1.add(i, fake2)
        monitor_vec2.add(i, x1)
        monitor_loss_vec.add(i, loss_vec.d.copy())

        x.d = image / 255. * 2 - 1.0  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)
Esempio n. 16
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.get_unlinked_variable(need_grad=True)
    fake_dis.need_grad = True  # TODO: Workaround until v1.0.2
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint files.
        start_point = load_checkpoint(args.checkpoint, {
            "gen": solver_gen,
            "dis": solver_dis
        })

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: (x + 1) / 2.)

    data = data_iterator_mnist(args.batch_size, True)

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'),
        contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'),
        contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.model_save_interval == 0:
            save_checkpoint(args.model_save_path, i, {
                "gen": solver_gen,
                "dis": solver_dis
            })

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'),
              contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'),
              contents)
Esempio n. 17
0
                  solver_gen_BA, solver_dis_A, solver_dis_B)

# define monitor
monitor = M.Monitor(opt.monitor_path)
monitor_loss_cyc = M.MonitorSeries('Cycle loss',
                                   monitor,
                                   interval=opt.monitor_interval)
monitor_loss_gen = M.MonitorSeries('Generator loss',
                                   monitor,
                                   interval=opt.monitor_interval)

monitor_loss_dis = M.MonitorSeries('Discriminator loss',
                                   monitor,
                                   interval=opt.monitor_interval)
monitor_time = M.MonitorTimeElapsed('Time',
                                    monitor,
                                    interval=opt.monitor_interval)
monitor_A = M.MonitorImageTile('Fake images_A',
                               monitor,
                               normalize_method=lambda x: x + 1 / 2.,
                               interval=opt.generate_interval)
monitor_B = M.MonitorImageTile('Fake images_B',
                               monitor,
                               normalize_method=lambda x: x + 1 / 2.,
                               interval=opt.generate_interval)

# training loop
for i in range(opt.max_iter):
    (x_A, x_AB, x_ABA, x_B, x_BA, x_BAB, loss_cyc, loss_gen,
     loss_dis) = updater.update(i)
Esempio n. 18
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Supervised Loss
    *     by Unlabeled Data
    *       Calculate Virtual Adversarial Noise
    *       Calculate Unsupervised Loss
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNIST Dataset
    from mnist_data import load_mnist, data_iterator_mnist
    images, labels = load_mnist(train=True)
    rng = np.random.RandomState(706)
    inds = rng.permutation(len(images))

    def feed_labeled(i):
        j = inds[i]
        return images[j], labels[j]

    def feed_unlabeled(i):
        j = inds[i]
        return images[j], labels[j]

    di_l = data_iterator_simple(feed_labeled,
                                args.n_labeled,
                                args.batchsize_l,
                                shuffle=True,
                                rng=rng,
                                with_file_cache=False)
    di_u = data_iterator_simple(feed_unlabeled,
                                args.n_train,
                                args.batchsize_u,
                                shuffle=True,
                                rng=rng,
                                with_file_cache=False)
    di_v = data_iterator_mnist(args.batchsize_v, train=False)

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l, ) + shape_x, need_grad=False)
    yl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(yl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=False)
    yu = forward(xu, test=False)
    y1 = yu.get_unlinked_variable()
    y1.need_grad = False

    noise = nn.Variable((args.batchsize_u, ) + shape_x, need_grad=True)
    r = noise / (F.sum(noise**2, [1, 2, 3], keepdims=True))**0.5
    r.persistent = True
    y2 = forward(xu + args.xi_for_vat * r, test=False)
    y3 = forward(xu + args.eps_for_vat * r, test=False)
    loss_k = F.mean(distance(y1, y2))
    loss_u = F.mean(distance(y1, y3))

    # Net for evaluating validation data
    xv = nn.Variable((args.batchsize_v, ) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)
    err = F.mean(F.top_n_error(hv, tv, n=1))

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor training and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            valid_error = calc_validation_error(di_v, xv, tv, err,
                                                args.val_iter)
            monitor_verr.add(i, valid_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # forward, backward and update
        xl.d, tl.d = di_l.next()
        xl.d = xl.d / 255
        solver.zero_grad()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # Calculate y without noise, only once.
        xu.d, _ = di_u.next()
        xu.d = xu.d / 255
        yu.forward(clear_buffer=True)

        ##### Calculate Adversarial Noise #####
        # Do power method iteration
        noise.d = np.random.normal(size=xu.shape).astype(np.float32)
        for k in range(args.n_iter_for_power_method):
            r.grad.zero()
            loss_k.forward(clear_no_need_grad=True)
            loss_k.backward(clear_buffer=True)
            noise.data.copy_from(r.grad)

        ##### Calculate loss for unlabeled data #####
        # forward, backward and update
        solver.zero_grad()
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(solver.learning_rate() *
                                     args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, err, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
Esempio n. 19
0
def train(args):
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    x = nn.Variable([1, 3, SIZE, SIZE])
    y = network(x)
    dataIn = makeInput()

    x.d = dataIn.copy()
    y.forward()
    img = makePng(y.d)
    img.save(os.path.join(args.model_save_path, "first.png"))

    output = nn.Variable([1, 3, SIZE, SIZE])
    dataOut = makeOutput("test.png")
    output.d = dataOut

    #loss = F.mean(F.sigmoid_cross_entropy(y, output))
    loss = F.mean(F.squared_error(y, output))

    param = nn.get_parameters()
    for i, j in param.items():
        param.get(i).d = np.random.randn(*(j.d.shape))

    solver = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("net"):
        solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_gen = M.MonitorImageTile("gen images", monitor)

    #data = data_iterator_mnist(args.batch_size, True)
    with nn.parameter_scope("net"):
        param = nn.get_parameters()
        print param.get("conv0/conv/W").d.reshape((16, 16))[:10, :10]

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("net"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))

        # Training forward
        x.d = dataIn.copy()
        # Generator update.
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        if i % 10 == 0:
            img = makePng(y.d)
            img.save(os.path.join(args.model_save_path, "output_%06d.png" % i))
        #print "max",max(y.d.flatten()),"min",min(y.d.flatten())
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        monitor_gen.add(i, y)
        monitor_loss_gen.add(i, loss.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("net"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))

    return
Esempio n. 20
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_inq_lenet_prediction
    if args.net == 'inq':
        mnist_cnn_prediction = mnist_inq_lenet_prediction
    elif args.net == 'inq_resnet':
        mnist_cnn_prediction = mnist_inq_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
Esempio n. 21
0
            sequence_length] = sample.decoder_input.parent_index[:
                                                                 sequence_length]


logger.info("Create monitors")
import nnabla.monitor as M
monitor = M.Monitor(args.output)
monitor_loss = M.MonitorSeries("training loss", monitor, interval=data.size)
monitor_err = M.MonitorSeries("training error", monitor, interval=data.size)
monitor_tacc = M.MonitorSeries("test accuracy", monitor, interval=data.size)
monitor_tbleu4 = M.MonitorSeries("test bleu4", monitor, interval=data.size)
monitor_terr = M.MonitorSeries("test error with oracle sequence",
                               monitor,
                               interval=data.size)
monitor_time = M.MonitorTimeElapsed("training time",
                                    monitor,
                                    interval=data.size)

import transpyle
unparser = transpyle.python.unparser.NativePythonUnparser()
best_bleu4 = 0.0

# Training loop
iter = 0
logger.info("Start training")
while iter < data.size * args.train_epochs:
    prev_epoch = int(iter / data.size)
    create_batch(data, query, action, action_type, node_type, parent_rule,
                 parent_index)

    solver.zero_grad()
Esempio n. 22
0
with nn.parameter_scope("dis"):
    solver_dis.set_parameters(nn.get_parameters())

# パラメータスコープの使い方を見ておく。
print(len(nn.get_parameters()))
with nn.parameter_scope("gen"):
    print(len(nn.get_parameters()))
# パラメータスコープ内では、`get_parameters()`で取得できるパラメータがフィルタリングされ
# る。

# モニターの設定
path = cache_dir(os.path.join(I.name, "monitor"))
monitor = M.Monitor(path)
monitor_loss_gen = M.MonitorSeries("generator_loss", monitor, interval=100)
monitor_loss_dis = M.MonitorSeries("discriminator_loss", monitor, interval=100)
monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100)
monitor_fake = M.MonitorImageTile("Fake images",
                                  monitor,
                                  normalize_method=lambda x: (x + 1) / 2.0)


# パラメータ保存関数の定義
def save_parameters(i):
    with nn.parameter_scope("gen"):
        nn.save_parameters(os.path.join(path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(path, "discriminator_param_%06d.h5" % i))


# 訓練の実行
Esempio n. 23
0
def main():

    # Get arguments
    args = get_args()
    data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt"
    model_file = args.work_dir + "model.h5"

    # Load Dataset
    itow, wtoi, dataset = load_ptbset(data_file)

    # Computation environment settings
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create data provider
    n_word = len(wtoi)
    n_dim = args.embed_dim
    batchsize = args.batchsize
    half_window = args.half_window_length
    n_negative = args.n_negative_sample

    di = DataIteratorForEmbeddingLearning(
        batchsize=batchsize,
        half_window=half_window,
        n_negative=n_negative,
        dataset=dataset)

    # Create model
    # - Real batch size including context samples and negative samples
    size = batchsize * (1 + n_negative) * (2 * (half_window - 1))

    # Model for learning
    # - input variables
    xl = nn.Variable((size,))  # variable for word
    yl = nn.Variable((size,))  # variable for context

    # Embed layers for word embedding function
    # - f_embed : word index x to get y, the n_dim vector
    # --  for each sample in a minibatch
    hx = PF.embed(xl, n_word, n_dim, name="e1")  # feature vector for word
    hy = PF.embed(yl, n_word, n_dim, name="e1")  # feature vector for context
    hl = F.sum(hx * hy, axis=1)

    # -- Approximated likelihood of context prediction
    # pos: word context, neg negative samples
    tl = nn.Variable([size, ], need_grad=False)
    loss = F.sigmoid_cross_entropy(hl, tl)
    loss = F.mean(loss)

    # Model for test of searching similar words
    xr = nn.Variable((1,), need_grad=False)
    hr = PF.embed(xr, n_word, n_dim, name="e1")  # feature vector for test

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = M.Monitor(args.work_dir)
    monitor_loss = M.MonitorSeries(
        "Training loss", monitor, interval=args.monitor_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time", monitor, interval=args.monitor_interval)

    # Do training
    max_epoch = args.max_epoch
    for epoch in range(max_epoch):

        # iteration per epoch
        for i in range(di.n_batch):

            # get minibatch
            xi, yi, ti = di.next()

            # learn
            solver.zero_grad()
            xl.d, yl.d, tl.d = xi, yi, ti
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()

            # monitor
            itr = epoch * di.n_batch + i
            monitor_loss.add(itr, loss.d)
            monitor_time.add(itr)

    # Save model
    nn.save_parameters(model_file)

    # Evaluate by similarity
    max_check_words = args.max_check_words
    for i in range(max_check_words):

        # prediction
        xr.d = i
        hr.forward(clear_buffer=True)
        h = hr.d

        # similarity calculation
        w = nn.get_parameters()['e1/embed/W'].d
        s = np.sqrt((w * w).sum(1))
        w /= s.reshape((s.shape[0], 1))
        similarity = w.dot(h[0]) / s[i]

        # for understanding
        output_similar_words(itow, i, similarity)
Esempio n. 24
0
def valid():
    """
    Main script for validation.

    """

    args = get_args()
    n_valid_samples = 50000
    num_classes = 1000
    assert n_valid_samples % args.batch_size == 0, \
        "Set batch_size such that n_valid_samples (50000) can be devided by batch_size. \Batch size is now set as {}".format(
            args.batch_size)

    # Context
    from nnabla.ext_utils import get_extension_context
    extension_module = "cudnn"
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Pipelines and Iterators for validation
    device_id = int(args.device_id)
    val_pipes = [
        ValPipeline(args.batch_size,
                    args.num_threads,
                    device_id,
                    args.val_cachefile_dir,
                    args.val_list,
                    seed=device_id,
                    num_gpu=1)
    ]
    val_pipes[0].build()
    vdata = DALIClassificationIterator(val_pipes,
                                       val_pipes[0].epoch_size("Reader"),
                                       auto_reset=True,
                                       stop_at_epoch=False)

    # Network for validation
    nn.load_parameters(args.model_load_path)
    v_model = get_model(args, num_classes, 1, args.accum_grad, test=True)
    v_e = F.mean(F.top_n_error(v_model.pred, v_model.label, n=args.top_n))

    # Monitors
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=1)

    # Validation
    ve_local = 0.
    val_iter_local = n_valid_samples // args.batch_size
    for i in range(val_iter_local):
        nextImage, nextLabel = vdata.next()
        v_model.image.data.copy_from(nextImage)
        v_model.label.data.copy_from(nextLabel)
        v_model.image.data.cast(np.float, ctx)
        v_model.label.data.cast(np.int32, ctx)
        v_e.forward(clear_buffer=True)
        nn.logger.info("validation error is {} at {}-th batch".format(
            v_e.d, i))
        ve_local += v_e.d.copy()
    ve_local /= val_iter_local

    monitor_verr.add(0, ve_local)
    monitor_vtime.add(0)
def train(config, netG, netD, solver_netG, solver_netD, train_iterator,
          monitor):

    if config["train"][
            "feature_loss"] and config["train"]["feature_loss"]["lambda"] > 0:
        print(
            f'Applying VGG feature Loss, weight: {config["train"]["feature_loss"]["lambda"]}.'
        )
        with_feature_loss = True
    else:
        with_feature_loss = False

    # Load image and boundary image to get Variable shapes
    img, bod_map, bod_map_resize = train_iterator.next()

    real_img = nn.Variable(img.shape)
    real_bod_map = nn.Variable(bod_map.shape)
    real_bod_map_resize = nn.Variable(bod_map_resize.shape)

    ################### Graph Construction ####################
    # Generator
    with nn.parameter_scope('netG_decoder'):
        fake_img = netG(real_bod_map, test=False)
    fake_img.persistent = True

    fake_img_unlinked = fake_img.get_unlinked_variable()

    # Discriminator
    with nn.parameter_scope('netD_decoder'):
        pred_fake = netD(F.concatenate(real_bod_map_resize,
                                       fake_img_unlinked,
                                       axis=1),
                         test=False)
        pred_real = netD(F.concatenate(real_bod_map_resize, real_img, axis=1),
                         test=False)
    real_target = F.constant(1, pred_fake.shape)
    fake_target = F.constant(0, pred_real.shape)

    ################### Loss Definition ####################
    # for Generator
    gan_loss_G = gan_loss(pred_fake, real_target)
    gan_loss_G.persistent = True

    weight_L1 = config["train"]["weight_L1"]
    L1_loss = recon_loss(fake_img_unlinked, real_img)
    L1_loss.persistent = True
    loss_netG = gan_loss_G + weight_L1 * L1_loss

    if with_feature_loss:
        feature_loss = vgg16_perceptual_loss(127.5 * (fake_img_unlinked + 1.),
                                             127.5 * (real_img + 1.))
        feature_loss.persistent = True
        loss_netG += feature_loss * config["train"]["feature_loss"]["lambda"]

    # for Discriminator
    loss_netD = (gan_loss(pred_real, real_target) +
                 gan_loss(pred_fake, fake_target)) * 0.5

    ################### Setting Solvers ####################
    # for Generator
    with nn.parameter_scope('netG_decoder'):
        solver_netG.set_parameters(nn.get_parameters())

    # for Discrimintar
    with nn.parameter_scope('netD_decoder'):
        solver_netD.set_parameters(nn.get_parameters())

    ################### Create Monitors ####################
    interval = config["monitor"]["interval"]
    monitors_G_dict = {
        'loss_netG': loss_netG,
        'loss_gan': gan_loss_G,
        'L1_loss': L1_loss
    }

    if with_feature_loss:
        monitors_G_dict.update({'vgg_feature_loss': feature_loss})

    monitors_G = MonitorManager(monitors_G_dict, monitor, interval=interval)

    monitors_D_dict = {'loss_netD': loss_netD}
    monitors_D = MonitorManager(monitors_D_dict, monitor, interval=interval)

    monitor_time = nm.MonitorTimeElapsed('time_training',
                                         monitor,
                                         interval=interval)
    monitor_vis = nm.MonitorImage('result',
                                  monitor,
                                  interval=1,
                                  num_images=4,
                                  normalize_method=lambda x: x)

    # Dump training information
    with open(os.path.join(monitor._save_path, "training_info.yaml"),
              "w",
              encoding="utf-8") as f:
        f.write(yaml.dump(config))

    # Training
    epoch = config["train"]["epochs"]
    i = 0
    lr_decay_start_at = config["train"]["lr_decay_start_at"]
    iter_per_epoch = train_iterator.size // config["train"]["batch_size"] + 1
    for e in range(epoch):
        logger.info(f'Epoch = {e} / {epoch}')
        train_iterator._reset()  # rewind the iterator
        if e > lr_decay_start_at:
            decay_coeff = 1.0 - max(0, e - lr_decay_start_at) / 50.
            lr_decayed = config["train"]["lr"] * decay_coeff
            print(f"learning rate decayed to {lr_decayed}")
            solver_netG.set_learning_rate(lr_decayed)
            solver_netD.set_learning_rate(lr_decayed)

        for _ in range(iter_per_epoch):
            img, bod_map, bod_map_resize = train_iterator.next()
            # bod_map_noize = np.random.random_sample(bod_map.shape) * 0.01
            # bod_map_resize_noize = np.random.random_sample(bod_map_resize.shape) * 0.01

            real_img.d = img
            real_bod_map.d = bod_map  # + bod_map_noize
            real_bod_map_resize.d = bod_map_resize  # + bod_map_resize_noize

            # Generate fake image
            fake_img.forward(clear_no_need_grad=True)

            # Update Discriminator
            solver_netD.zero_grad()
            solver_netG.zero_grad()
            loss_netD.forward(clear_no_need_grad=True)
            loss_netD.backward(clear_buffer=True)
            solver_netD.update()

            # Update Generator
            solver_netD.zero_grad()
            solver_netG.zero_grad()
            fake_img_unlinked.grad.zero()
            loss_netG.forward(clear_no_need_grad=True)
            loss_netG.backward(clear_buffer=True)
            fake_img.backward(grad=None)
            solver_netG.update()

            # Monitors
            monitor_time.add(i)
            monitors_G.add(i)
            monitors_D.add(i)

            i += 1

        images_to_visualize = [real_bod_map_resize.d, fake_img.d, img]
        visuals = combine_images(images_to_visualize)
        monitor_vis.add(i, visuals)

        if e % config["monitor"]["save_interval"] == 0 or e == epoch - 1:
            # Save parameters of networks
            netG_save_path = os.path.join(monitor._save_path,
                                          f'netG_decoder_{e}.h5')
            with nn.parameter_scope('netG_decoder'):
                nn.save_parameters(netG_save_path)
            netD_save_path = os.path.join(monitor._save_path,
                                          f'netD_decoder_{e}.h5')
            with nn.parameter_scope('netD_decoder'):
                nn.save_parameters(netD_save_path)
Esempio n. 26
0
t = nn.Variable((batch_size, 1))
zero = F.constant(0, shape=(batch_size, 1))
one = F.constant(1, shape=(batch_size, 1))
weight = F.clip_by_value(t / 100, zero, one)**0.75
loss = F.sum(weight * ((prediction - F.log(t))**2))

# Create solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

# Create monitor
monitor = M.Monitor('./log')
monitor_loss = M.MonitorSeries("Training loss", monitor, interval=1000)
monitor_valid_loss = M.MonitorSeries("Validation loss", monitor, interval=1)
monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=1000)


# Create updater
def train_data_feeder():
    x_central.d, x_context.d, t.d = train_data_iter.next()


def update_callback_on_finish(i):
    monitor_loss.add(i, loss.d)
    monitor_time.add(i)


updater = Updater(solver=solver,
                  loss=loss,
                  data_feeder=train_data_feeder,
Esempio n. 27
0
def train(generator, discriminator, patch_gan, solver_gen, solver_dis,
          weight_l1, train_iterator, val_iterator, epoch, monitor, interval):
    # Create Network Graph
    # for training
    im, la = train_iterator.next()  # for checking image shape
    real = nn.Variable(im.shape)  # real
    x = nn.Variable(la.shape)  # x
    # for validation
    real_val = nn.Variable(im.shape)  # real
    x_val = nn.Variable(la.shape)  # x

    # Generator
    fake = generator(x, test=False)
    # pix2pix infers just like training mode.
    fake_val = generator(x_val, test=False)
    fake_val.persistent = True  # Keep to visualize
    # Discriminator
    fake_y = discriminator(x, fake, patch_gan=patch_gan, test=False)
    real_y = discriminator(x, real, patch_gan=patch_gan, test=False)
    real_target = nn.Variable(fake_y.shape)
    real_target.data.fill(1)
    fake_target = nn.Variable(real_y.shape)
    fake_target.data.zero()

    loss_gen = F.mean(weight_l1 * F.abs(real - fake)) + \
        F.mean(F.sigmoid_cross_entropy(fake_y, real_target))
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(real_y, real_target) +
        F.sigmoid_cross_entropy(fake_y, fake_target))

    # Setting Solvers
    with nn.parameter_scope('generator'):
        solver_gen.set_parameters(nn.get_parameters())

    with nn.parameter_scope('discriminator'):
        solver_dis.set_parameters(nn.get_parameters())

    # Create Monitors
    monitors = {
        'loss_gen':
        nm.MonitorSeries("Generator loss", monitor, interval=interval),
        'loss_dis':
        nm.MonitorSeries("Discriminator loss", monitor, interval=interval),
        'time':
        nm.MonitorTimeElapsed("Training time", monitor, interval=interval),
        'fake':
        nm.MonitorImageTile(
            "Fake images",
            monitor,
            interval=interval,
            num_images=2,
            normalize_method=lambda x: np.clip(np.divide(x, 255.0), 0.0, 1.0)),
    }

    i = 0
    for e in range(epoch):
        logger.info('Epoch = {}'.format(e))
        # Training
        while e == train_iterator.epoch:
            # forward / backward process
            real.d, x.d = train_iterator.next()
            solver_dis.zero_grad()
            solver_gen.zero_grad()
            # Discriminator
            loss_dis.forward(clear_no_need_grad=True)
            loss_dis.backward(clear_buffer=True)
            solver_dis.update()
            # Generator
            loss_gen.forward(clear_no_need_grad=True)
            loss_gen.backward(clear_buffer=True)
            solver_gen.update()
            monitors['time'].add(i)
            monitors['loss_gen'].add(i, loss_gen.d.copy())
            monitors['loss_dis'].add(i, loss_dis.d.copy())
            # Validation
            real_val.d, x_val.d = val_iterator.next()
            fake_val.forward()
            pix2pix_vis = np.stack(
                [label_to_image(x_val.d),
                 normalize_image(fake_val.d)],
                axis=1).reshape((-1, ) + fake.shape[1:])
            monitors['fake'].add(i, pix2pix_vis)
            i += 1
    # save parameters of generator
    save_path = os.path.join(monitor._save_path,
                             'generator_model_{}.h5'.format(i))
    with nn.parameter_scope('generator'):
        nn.save_parameters(save_path)

    return save_path
Esempio n. 28
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = get_extension_context(extension_module,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    if args.tiny_mode:
        # We use Tiny ImageNet from Stanford CS231N class.
        # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/)
        # Tiny ImageNet consists of 200 categories, each category has 500 images
        # in training set. The image size is 64x64. To adapt ResNet into 64x64
        # image inputs, the input image size of ResNet is set as 56x56, and
        # the stride in the first conv and the first max pooling are removed.
        # Please check README.
        data = data_iterator_tiny_imagenet(args.batch_size, 'train')
        vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')
        num_classes = 200
    else:
        # We use ImageNet.
        # (ImageNet, https://imagenet.herokuapp.com/)
        # ImageNet consists of 1000 categories, each category has 1280 images
        # in training set. The image size is various. To adapt ResNet into
        # 320x320 image inputs, the input image size of ResNet is set as
        # 224x224. We need to get tar file and create cache file(320x320 images).
        # Please check README.
        data = data_iterator_imagenet(args.batch_size,
                                      args.train_cachefile_dir)
        vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir)
        num_classes = 1000
    t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    t_pred2 = t_model.pred.get_unlinked_variable()
    t_pred2.need_grad = False

    t_e = F.mean(F.top_n_error(t_pred2, t_model.label))
    v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix.
    v_pred2 = v_model.pred.get_unlinked_variable()
    v_pred2.need_grad = False

    v_e = F.mean(F.top_n_error(v_pred2, v_model.label))

    # Save_nnp_Epoch0
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'),
              contents)

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_vtime = M.MonitorTimeElapsed("Validation time",
                                         monitor,
                                         interval=10)

    # Training loop.
    for i in range(start_point, args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Validation
        if i % args.val_interval == 0 and i != 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                v_e.forward(clear_buffer=True)
                l += v_model.loss.d
                e += v_e.d
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)
            monitor_vtime.add(i)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        def accumulate_error(l, e, t_model, t_e):
            l += t_model.loss.d
            e += t_e.d
            return l, e

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            t_e.forward(clear_buffer=True)
            l, e = accumulate_error(l, e, t_model, t_e)

        solver.weight_decay(args.weight_decay)
        solver.update()

        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(
        os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))

    # Save_nnp
    contents = save_nnp({'x': v_model.image}, {'y': v_model.pred},
                        args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'),
              contents)
Esempio n. 29
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(
                di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(
                solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    nnp_file = os.path.join(
        args.model_save_path, 'vat_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batchsize_v,
             'outputs': {'y': hv},
             'names': {'x': xv}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
Esempio n. 30
0
def train(max_iter=24000):
    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNIST Dataset
    from mnist_data import load_mnist, data_iterator_mnist

    images, labels = load_mnist(train=True)
    rng = np.random.RandomState(706)
    inds = rng.permutation(len(images))

    def feed_labeled(i):
        j = inds[i]
        return images[j], labels[j]

    def feed_unlabeled(i):
        j = inds[i]
        return images[j], labels[j]

    di_l = I.data_iterator_simple(
        feed_labeled,
        args.n_labeled,
        args.batchsize_l,
        shuffle=True,
        rng=rng,
        with_file_cache=False,
    )
    di_u = I.data_iterator_simple(
        feed_unlabeled,
        args.n_train,
        args.batchsize_u,
        shuffle=True,
        rng=rng,
        with_file_cache=False,
    )
    di_v = data_iterator_mnist(args.batchsize_v, train=False)

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return I.mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    yl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(yl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    yu = forward(xu, test=False)
    y1 = yu.get_unlinked_variable()
    y1.need_grad = False

    noise = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    r = noise / (F.sum(noise ** 2, [1, 2, 3], keepdims=True)) ** 0.5
    r.persistent = True
    y2 = forward(xu + args.xi_for_vat * r, test=False)
    y3 = forward(xu + args.eps_for_vat * r, test=False)
    loss_k = F.mean(I.distance(y1, y2))
    loss_u = F.mean(I.distance(y1, y3))

    # Net for evaluating validation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)
    err = F.mean(F.top_n_error(hv, tv, n=1))

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor training and validation stats.
    path = cache_dir(os.path.join(I.name, "monitor"))
    monitor = M.Monitor(path)
    monitor_verr = M.MonitorSeries("val_error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("time", monitor, interval=240)

    # Training Loop.
    for i in range(max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter)
            monitor_verr.add(i, valid_error)

        # forward, backward and update
        xl.d, tl.d = di_l.next()
        xl.d = xl.d / 255
        solver.zero_grad()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        # Calculate y without noise, only once.
        xu.d, _ = di_u.next()
        xu.d = xu.d / 255
        yu.forward(clear_buffer=True)

        # Do power method iteration
        noise.d = np.random.normal(size=xu.shape).astype(np.float32)
        for k in range(args.n_iter_for_power_method):
            r.grad.zero()
            loss_k.forward(clear_no_need_grad=True)
            loss_k.backward(clear_buffer=True)
            noise.data.copy_from(r.grad)

        # forward, backward and update
        solver.zero_grad()
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = I.calc_validation_error(di_v, xv, tv, err, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    return path