Beispiel #1
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 1000, 1, 1])
    fake = generator(z, maxh=1024)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))
Beispiel #2
0
def train(args):

    ##  Sub-functions
    ## ---------------------------------
    ## Save Models
    def save_models(epoch_num, cle_disout, fake_disout, losses_gen, losses_dis, losses_ae):

        # save generator parameter
        with nn.parameter_scope("gen"):
            nn.save_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(epoch_num + 1)))

        # save discriminator parameter
        with nn.parameter_scope("dis"):
            nn.save_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(epoch_num + 1)))

        # save results
        np.save(os.path.join(args.model_save_path, 'disout_his_{:04}.npy'.format(epoch_num + 1)), np.array([cle_disout, fake_disout]))
        np.save(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(epoch_num + 1)), np.array(losses_gen))
        np.save(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(epoch_num + 1)), np.array(losses_dis))
        np.save(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(epoch_num + 1)), np.array(losses_ae))

    ## Load Models
    def load_models(epoch_num, gen=True, dis=True):

        # load generator parameter
        with nn.parameter_scope("gen"):
            nn.load_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(args.epoch_from)))

        # load discriminator parameter
        with nn.parameter_scope("dis"):
            nn.load_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(args.epoch_from)))

    ## Update parameters
    class updating:

        def __init__(self):
            self.scale = 8 if args.halfprec else 1

        def __call__(self, solver, loss):
            solver.zero_grad()                                  # initialize
            loss.forward(clear_no_need_grad=True)               # calculate forward
            loss.backward(self.scale, clear_buffer=True)      # calculate backward
            solver.scale_grad(1. / self.scale)                # scaling
            solver.weight_decay(args.weight_decay * self.scale) # decay
            solver.update()                                     # update


    ##  Inital Settings
    ## ---------------------------------

    ##  Create network
    #   Clear
    nn.clear_parameters()
    #   Variables
    noisy 		= nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Input
    clean 		= nn.Variable([args.batch_size, 1, 16384], need_grad=False)  # Desire
    z           = nn.Variable([args.batch_size, 1024, 8], need_grad=False)   # Random Latent Variable
    #   Generator
    genout = Generator(noisy, z)                       # Predicted Clean
    genout.persistent = True                # Not to clear at backward
    loss_gen 	= Loss_gen(genout, clean, Discriminator(noisy, genout))
    loss_ae     = F.mean(F.absolute_error(genout, clean))
    #   Discriminator
    fake_dis 	= genout.get_unlinked_variable(need_grad=True)
    cle_disout  = Discriminator(noisy, clean)
    fake_disout  = Discriminator(noisy, fake_dis)
    loss_dis    = Loss_dis(Discriminator(noisy, clean),Discriminator(noisy, fake_dis))

    ##  Solver
    # RMSprop.
    # solver_gen = S.RMSprop(args.learning_rate_gen)
    # solver_dis = S.RMSprop(args.learning_rate_dis)
    # Adam
    solver_gen = S.Adam(args.learning_rate_gen)
    solver_dis = S.Adam(args.learning_rate_dis)
    # set parameter
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    ##  Load data & Create batch
    clean_data, noisy_data = dt.data_loader()
    batches     = dt.create_batch(clean_data, noisy_data, args.batch_size)
    del clean_data, noisy_data

    ##  Initial settings for sub-functions
    fig     = figout()
    disp    = display(args.epoch_from, args.epoch, batches.batch_num)
    upd     = updating()

    ##  Train
    ##----------------------------------------------------

    print('== Start Training ==')

    ##  Load "Pre-trained" parameters
    if args.epoch_from > 0:
        print(' Retrain parameter from pre-trained network')
        load_models(args.epoch_from, dis=False)
        losses_gen  = np.load(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(args.epoch_from)))
        losses_dis  = np.load(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(args.epoch_from)))
        losses_ae   = np.load(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(args.epoch_from)))
    else:
        losses_gen  = []
        losses_ae   = []
        losses_dis  = []

    ## Create loss loggers
    point       = len(losses_gen)
    loss_len    = (args.epoch - args.epoch_from) * ((batches.batch_num+1)//10)
    losses_gen  = np.append(losses_gen, np.zeros(loss_len))
    losses_ae   = np.append(losses_ae, np.zeros(loss_len))
    losses_dis  = np.append(losses_dis, np.zeros(loss_len))

    ##  Training
    for i in range(args.epoch_from, args.epoch):

        print('')
        print(' =========================================================')
        print('  Epoch :: {0}/{1}'.format(i + 1, args.epoch))
        print(' =========================================================')
        print('')

        #  Batch iteration
        for j in range(batches.batch_num):
            print('  Train (Epoch. {0}) - {1}/{2}'.format(i+1, j+1, batches.batch_num))

            ##  Batch setting
            clean.d, noisy.d = batches.next(j)
            #z.d = np.random.randn(*z.shape)
            z.d = np.zeros(z.shape)

            ##  Updating
            upd(solver_gen, loss_gen)       # update Generator
            upd(solver_dis, loss_dis)       # update Discriminator

            ##  Display
            if (j+1) % 10 == 0:
                # Get result for Display
                cle_disout.forward()
                fake_disout.forward()
                loss_ae.forward(clear_no_need_grad=True)

                # Display text
                disp(i, j, loss_gen.d, loss_dis.d, loss_ae.d)

                # Data logger
                losses_gen[point] = loss_gen.d
                losses_ae[point]  = loss_ae.d
                losses_dis[point] = loss_dis.d
                point = point + 1

                # Plot
                fig.waveform(noisy.d[0,0,:], genout.d[0,0,:], clean.d[0,0,:])
                fig.loss(losses_gen[0:point-1], losses_ae[0:point-1], losses_dis[0:point-1])
                fig.histogram(cle_disout.d, fake_disout.d)
                pg.QtGui.QApplication.processEvents()


        ## Save parameters
        if ((i+1) % args.model_save_cycle) == 0:
            save_models(i, cle_disout.d, fake_disout.d, losses_gen[0:point-1], losses_dis[0:point-1], losses_ae[0:point-1])  # save model
            exporter = pg.exporters.ImageExporter(fig.win.scene())  # Call pg.QtGui.QApplication.processEvents() before exporters!!
            exporter.export(os.path.join(args.model_save_path, 'plot_{:04}.png'.format(i + 1))) # save fig

    ## Save parameters (Last)
    save_models(args.epoch-1, cle_disout.d, fake_disout.d, losses_gen, losses_dis, losses_ae)
Beispiel #3
0
def _create_variable(v, name, shape, rng):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier, rng=rng)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(
                calc_normal_std_he_forward(shape[0], numpy.prod(shape[1:])),
                rng=rng)(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(
                calc_normal_std_he_backward(shape[0], numpy.prod(shape[1:])),
                rng=rng)(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(
                calc_normal_std_glorot(shape[0], numpy.prod(shape[1:])),
                rng=rng)(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (
                lambda shape: NormalInitializer(calc_normal_std_he_forward(
                    shape[-3], shape[0], kernel=shape[-2:]),
                                                rng=rng)
                (shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (
                lambda shape: NormalInitializer(calc_normal_std_he_backward(
                    shape[-3], shape[0], kernel=shape[-2:]),
                                                rng=rng)
                (shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(
                calc_normal_std_glorot(shape[-3], shape[0], kernel=shape[-2:]),
                rng=rng)(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier],
                rng=rng)
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(
                calc_uniform_lim_glorot(shape[0], numpy.prod(shape[1:])),
                rng=rng)(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (
                lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                    shape[-3], shape[0], kernel=shape[-2:]),
                                                 rng=rng)
                (shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
Beispiel #4
0
def distil():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet23_prediction":
        model_prediction = cifar10_resnet23_prediction
        data_iterator = data_iterator_cifar10
        c = 3
        h = w = 32
        n_train = 50000
        n_valid = 10000

    # TRAIN
    teacher = "teacher"
    student = "student"
    maps = args.maps
    rrate = args.reduction_rate
    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    image.persistent = True  # not clear the intermediate buffer re-used
    label = nn.Variable([args.batch_size, 1])
    label.persistent = True  # not clear the intermediate buffer re-used
    # Create `teacher` and "student" prediction graph.
    model_load_path = args.model_load_path
    nn.load_parameters(model_load_path)
    pred_label = model_prediction(image,
                                  net=teacher,
                                  maps=maps,
                                  test=not args.use_batch)
    pred_label.need_grad = False  # no need backward through teacher graph
    pred = model_prediction(image,
                            net=student,
                            maps=int(maps * (1. - rrate)),
                            test=False)
    pred.persistent = True  # not clear the intermediate buffer used
    loss_ce = F.mean(F.softmax_cross_entropy(pred, label))
    loss_ce_soft = ce_soft(pred, pred_label)
    loss = args.weight_ce * loss_ce + args.weight_ce_soft * loss_ce_soft

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create teacher prediction graph.
    vpred = model_prediction(vimage,
                             net=student,
                             maps=int(maps * (1. - rrate)),
                             test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    with nn.parameter_scope(student):
        solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator for MNIST.
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
def train():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet23_prediction":
        model_prediction = cifar10_resnet23_prediction
    if args.net == "cifar10_shufflenet_prediction":
        model_prediction = functools.partial(cifar10_shuffle_prediction,
                                             groups=args.groups)

    data_iterator = data_iterator_cifar10
    c = 3
    h = w = 32
    n_train = 50000
    n_valid = 10000

    # TRAIN
    maps = args.maps

    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    label = nn.Variable([args.batch_size, 1])
    # Create model_prediction graph.
    pred = model_prediction(image, maps=maps, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = model_prediction(vimage, maps=maps, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Beispiel #6
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = args.n_label
    n_train_data = 73257
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = args.epoch
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    lambda_ = args.lambda_

    # Model
    ## supervised
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    one = F.constant(1., log_var.shape)
    loss_ce = ce_loss(ctx, pred, y_l)
    reg_sigma = sigma_regularization(ctx, log_var, one)
    loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0,
                                       log_var1)
    reg_sigma0 = sigma_regularization(ctx, log_var0, one)
    reg_sigma1 = sigma_regularization(ctx, log_var1, one)
    reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1)
    loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \
                        + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas
    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)

    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/svhn/train.mat")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/svhn/l_train.mat")
    u_train_path = os.path.join(home, "datasets/svhn/u_train.mat")
    test_path = os.path.join(home, "datasets/svhn/test.mat")

    # data reader
    data_reader = SVHNDataReader(l_train_path,
                                 u_train_path,
                                 test_path,
                                 batch_size=batch_size,
                                 n_cls=n_cls,
                                 da=False,
                                 shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()

        x_l.d, _, y_l.d = x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d = x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()

        # Evaluate
        if int((i + 1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            ve /= iter_val
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, (1. - ve) * 100)
            print(msg)
            if ve < ve_best:
                if not os.path.exists(args.model_save_path):
                    os.makedirs(args.model_save_path)
                if save_path_prev != "":
                    os.remove(save_path_prev)
                save_path = os.path.join(args.model_save_path,
                                         'params_%06d.h5' % epoch)
                nn.save_parameters(save_path)
                save_path_prev = save_path
                ve_best = ve
            st = time.time()
            epoch += 1
Beispiel #7
0
def test_save_load_reshape(tmpdir, variable_batch_size, shape):
    x = nn.Variable([10, 1, 28, 28, 10, 10])
    y = F.reshape(x, shape=shape)
    check_save_load(tmpdir, x, y, variable_batch_size)
Beispiel #8
0
def test_data_grad_reference():
    v = nn.Variable([2, 3, 4])
    assert v.d.dtype == np.float32
    assert v.g.dtype == np.float32
Beispiel #9
0
def test_data_grad():
    v = nn.Variable([2, 3, 4])
    v.d[...] = np.random.randn(*v.shape)
    assert v.d is not v.g
    assert not np.all(v.d == v.g)
Beispiel #10
0
def test_name():
    x = nn.Variable([2, 3])
    x.name = "VariableName"
    assert x.name == "VariableName"
Beispiel #11
0
def test_prohibit_clear_data():
    import nnabla.functions as F
    nn.prefer_cached_array(False)
    shape = (2, 3, 4)
    var_np = np.random.rand(*shape)

    # the case of root variable
    x1 = nn.Variable.from_numpy_array(var_np)
    y1 = F.reshape(x1, (-1, ), inplace=True)
    y1 = F.reshape(y1, shape, inplace=True) * 2

    x2 = nn.Variable.from_numpy_array(var_np)
    y2 = F.reshape(x2, (-1, ), inplace=False)
    y2 = F.reshape(y2, shape, inplace=False) * 2

    nn.forward_all([y1, y2], clear_buffer=True)
    assert_allclose(x1.d, x2.d)
    assert_allclose(y1.d, y2.d)

    # the case of persistent variable
    x1 = nn.Variable.from_numpy_array(var_np)
    p_y1 = F.mul_scalar(x1, 2).apply(persistent=True)
    y1 = F.reshape(p_y1, (-1, ), inplace=True)
    y1 = F.reshape(y1, shape, inplace=True) * 2

    x2 = nn.Variable.from_numpy_array(var_np)
    p_y2 = F.mul_scalar(x2, 2).apply(persistent=True)
    y2 = F.reshape(p_y2, (-1, ), inplace=False)
    y2 = F.reshape(y2, shape, inplace=False) * 2

    nn.forward_all([y1, y2], clear_buffer=True)
    assert_allclose(p_y1.d, p_y2.d)
    assert_allclose(y1.d, y2.d)

    # the case of rewire_on root variable
    # graph A: x11 -> f_inplace -> y11
    x11 = nn.Variable.from_numpy_array(var_np)
    y11 = F.reshape(x11, (-1, ), inplace=True)

    # graph B: x12 -> f_inplace -> mul_scalar -> y12
    x12 = nn.Variable(shape=y11.shape)
    y12 = F.reshape(x12, shape, inplace=True) * 2

    # graph A->B: x11 -> f_inplace -> f_inplace -> mul_scalar -> y12
    x12.rewire_on(y11)

    x2 = nn.Variable.from_numpy_array(var_np)
    y2 = F.reshape(x2, (-1, ), inplace=False)
    y2 = F.reshape(y2, shape, inplace=False) * 2

    nn.forward_all([y12, y2], clear_buffer=True)
    assert_allclose(x11.d, x2.d)
    assert_allclose(y12.d, y2.d)

    # the case of rewire_on persistent variable
    # graph A: x11 -> mul_scalar -> p_x11 -> f_inplace -> y11
    x11 = nn.Variable.from_numpy_array(var_np)
    p_x11 = F.mul_scalar(x11, 2).apply(persistent=True)
    y11 = F.reshape(p_x11, (-1, ), inplace=True)

    # graph B: x12 -> f_inplace -> mul_scalar -> y12
    x12 = nn.Variable(shape=y11.shape)
    y12 = F.reshape(x12, shape, inplace=True) * 2

    # graph A->B: ... -> p_x11 -> f_inplace -> f_inplace -> mul_scalar -> y12
    x12.rewire_on(y11)

    x2 = nn.Variable.from_numpy_array(var_np)
    p_x2 = F.mul_scalar(x2, 2).apply(persistent=True)
    y2 = F.reshape(p_x2, (-1, ), inplace=False)
    y2 = F.reshape(y2, shape, inplace=False) * 2

    nn.forward_all([y12, y2], clear_buffer=True)
    assert_allclose(p_x11.d, p_x2.d)
    assert_allclose(y12.d, y2.d)
Beispiel #12
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    ## supervised
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var)
    loss_supervised = loss_ce

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0,
                                       log_var1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)

    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path,
                                    u_train_path,
                                    test_path,
                                    batch_size=batch_size,
                                    n_cls=n_cls,
                                    da=True,
                                    shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()

        x_l.d, _, y_l.d = x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d = x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()

        # Evaluate
        if (i + 1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch += 1
Beispiel #13
0
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={},
                  num_itr=5, decay=1e-4, clip_norm=0.5, atol=1e-6,
                  ctx=None, solver_name=None):
    if ctx is None:
        ctx = nn.Context()

    # Create params
    p1 = nn.Variable([2, 3, 4])
    p2 = nn.Variable([3, 4, 1, 2])
    p3 = nn.Variable([])

    params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)])
    for p in params.values():
        p.d = rng.randn(*p.shape)
        p.g = rng.randn(*p.shape)

    with nn.context_scope(ctx):
        s = solver(*solver_args, **solver_kwargs)
    s.set_parameters(params)
    if solver_name is not None:
        assert s.name == solver_name

    ref_s = ref_solver(*solver_args, **solver_kwargs)
    ref_s.set_parameters(params)

    # Get params (unordered_map is used in C++, thus check in both directions)
    params_ = s.get_parameters()
    for k0, v0 in iteritems(ref_s.params):
        v1 = params_[k0]
        assert_allclose(v0, v1.d, atol=atol)
    for k1, v1 in iteritems(params_):
        v0 = ref_s.params[k1]
        assert_allclose(v0, v1.d, atol=atol)

    # Check weight decay.
    grad_copy = OrderedDict([(k, p.g.copy())
                             for k, p in iteritems(params)])
    s.weight_decay(decay)
    ref_s.weight_decay(grad_copy, decay)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert_allclose(ref_p, p.g, atol=atol)

    # Check clip grad by norm.
    grad_copy = OrderedDict([(k, p.g.copy())
                             for k, p in iteritems(params)])
    s.clip_grad_by_norm(clip_norm)
    ref_s.clip_grad_by_norm(grad_copy, clip_norm)
    for p, ref_p in zip(params.values(), grad_copy.values()):
        assert np.allclose(ref_p, p.g, atol=atol)

    # Check solver udpate.
    for i in range(num_itr):
        grads = OrderedDict([(k, rng.randn(*p.shape))
                             for k, p in iteritems(params)])
        for k, g in iteritems(grads):
            params[k].g = g
        s.update()
        ref_s.update(grads)
        # update check
        for p, ref_p in zip(params.values(), ref_s.params.values()):
            assert_allclose(ref_p, p.d, atol=atol)
        # iteration state incrementaion check
        for state in s.get_states().values():
            assert state.t == (i + 1)

    # Check inf, nan, and inf/nan
    for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]],
                         [lambda s: s.check_inf_grad(),
                          lambda s: s.check_nan_grad(),
                          lambda s: s.check_inf_or_nan_grad()]):
        def set_value(p):
            p.g[...] = rng.choice(v + [-1, 0, 1],
                                  size=int(np.prod(p.shape)),
                                  replace=True).reshape(p.shape)
            if v[0] not in p.g:
                p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0]
        for p in params.values():
            assert method(s) == False
            g = p.g.copy()
            set_value(p)
            assert method(s) == True
            p.g[...] = g

    # Rescale grad
    scale = 10.
    ref_grad = [p.g.copy() for p in params.values()]
    for p in params.values():
        p.g *= scale
    s.scale_grad(1. / scale)
    for ref, p in zip(ref_grad, params.values()):
        assert_allclose(ref, p.g, atol=1e-4)

    # Save/Load Test
    def test_save_load(s, name):
        # Save states
        import tempfile
        tmpdir = tempfile.mkdtemp("solver-test")
        tmpfile = os.path.join(tmpdir, name)
        states0 = s.get_states()
        s.save_states(tmpfile)
        # Load states
        with nn.context_scope(ctx):
            s1 = solver(*solver_args, **solver_kwargs)
            s1.set_parameters(params)
            s1.load_states(tmpfile)
        # Check save/load states
        states1 = s1.get_states()
        for k0, s0 in iteritems(states0):
            s1 = states1[k0]
            for sname, vx0 in iteritems(s0.pstate):
                vx1 = s1.pstate[sname]
                assert_allclose(vx0.d, vx1.d)
            assert s1.t == s0.t
    test_save_load(s, "states.h5")
    test_save_load(s, "states.protobuf")

    # Check if remove_state_impl work correctly.
    s.clear_parameters()
Beispiel #14
0
    def update_graph(self, key='train'):
        r"""Builds the graph and update the placeholder.

        Args:
            training (bool, optional): Type of the graph. Defaults to `train`.
        """
        assert key in ('train', 'valid')

        self.gen.training = key == 'train'
        self.dis.training = key == 'train'
        hp = self.hp

        def data_aug(v):
            v = random_flip(v)
            v = random_scaling(v, hp.scale_low, hp.scale_high)
            return v

        # define input variables
        input_x = nn.Variable((hp.batch_size, 1, hp.segment_length))
        input_y = nn.Variable((hp.batch_size, 1, hp.segment_length))
        label_x = nn.Variable((hp.batch_size, 1))
        label_y = nn.Variable((hp.batch_size, 1))

        x_aug = data_aug(input_x)
        r_jitter_x = random_jitter(x_aug, hp.max_jitter_steps)

        x_real_con = self.gen.encode(x_aug)
        s_real, s_mu, s_logvar = self.gen.embed(data_aug(input_x))
        x_real = self.gen.decode(x_real_con, s_real)

        r_fake = self.gen.embed(data_aug(input_y))[0]
        x_fake = self.gen.decode(x_real_con, r_fake)
        x_fake_con = self.gen.encode(random_flip(x_fake))

        dis_real_x = self.dis(data_aug(input_x), label_x)
        dis_fake_x = self.dis(data_aug(x_fake), label_y)

        # ------------------------------ Discriminator -----------------------
        d_loss = (self.dis.adversarial_loss(dis_real_x, 1.0) +
                  self.dis.adversarial_loss(dis_fake_x, 0.0))
        # --------------------------------------------------------------------

        # -------------------------------- Generator -------------------------
        g_loss_avd = self.dis.adversarial_loss(self.dis(x_fake, label_y), 1.0)
        g_loss_con = self.dis.preservation_loss(x_fake_con, x_real_con)
        g_loss_kld = self.gen.kl_loss(s_mu, s_logvar)
        g_loss_rec = (self.dis.perceptual_loss(x_real, r_jitter_x) +
                      self.dis.spectral_loss(x_real, r_jitter_x))
        g_loss = (g_loss_avd + hp.lambda_con * g_loss_con +
                  hp.lambda_rec * g_loss_rec + hp.lambda_kld * g_loss_kld)

        # -------------------------------------------------------------------
        set_persistent_all(g_loss_con, g_loss_avd, g_loss, d_loss, x_fake,
                           g_loss_kld, g_loss_rec)

        self.placeholder[key] = dict(
            input_x=input_x,
            label_x=label_x,
            input_y=input_y,
            label_y=label_y,
            x_fake=x_fake,
            d_loss=d_loss,
            g_loss_avd=g_loss_avd,
            g_loss_con=g_loss_con,
            g_loss_rec=g_loss_rec,
            g_loss_kld=g_loss_kld,
            g_loss=g_loss,
        )
Beispiel #15
0
def main():

    args = get_args()
    state_size = args.state_size
    batch_size = args.batch_size
    num_steps = args.num_steps
    num_layers = args.num_layers
    max_epoch = args.max_epoch
    max_norm = args.gradient_clipping_max_norm
    num_words = 10000
    lr = args.learning_rate

    train_data, val_data, test_data = get_data()

    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    from nnabla.monitor import Monitor, MonitorSeries
    monitor = Monitor(args.work_dir)
    monitor_perplexity = MonitorSeries("Training perplexity",
                                       monitor,
                                       interval=10)
    monitor_vperplexity = MonitorSeries("Validation perplexity",
                                        monitor,
                                        interval=(len(val_data) //
                                                  (num_steps * batch_size)))
    monitor_tperplexity = MonitorSeries("Test perplexity",
                                        monitor,
                                        interval=(len(test_data) //
                                                  (num_steps * 1)))

    l1 = LSTMWrapper(batch_size, state_size)
    l2 = LSTMWrapper(batch_size, state_size)

    # train graph

    x = nn.Variable((batch_size, num_steps))
    t = nn.Variable((batch_size, num_steps))
    w = I.UniformInitializer((-0.1, 0.1))
    b = I.ConstantInitializer(1)
    loss = get_loss(l1, l2, x, t, w, b, num_words, batch_size, state_size,
                    True)
    l1.share_data()
    l2.share_data()

    # validaiton graph

    vx = nn.Variable((batch_size, num_steps))
    vt = nn.Variable((batch_size, num_steps))
    vloss = get_loss(l1, l2, vx, vt, w, b, num_words, batch_size, state_size)
    solver = S.Sgd(lr)
    solver.set_parameters(nn.get_parameters())

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    best_val = 10000
    for epoch in range(max_epoch):
        l1.reset_state()
        l2.reset_state()
        for i in range(len(train_data) // (num_steps * batch_size)):
            x.d, t.d = get_batch(train_data, i * num_steps, batch_size,
                                 num_steps)
            solver.zero_grad()
            loss.forward()
            loss.backward(clear_buffer=True)
            solver.weight_decay(1e-5)
            gradient_clipping(nn.get_parameters().values(), max_norm)
            solver.update()
            perp = perplexity(loss.d.copy())
            monitor_perplexity.add(
                (len(train_data) // (num_steps * batch_size)) * (epoch) + i,
                perp)
        l1.reset_state()
        l2.reset_state()
        vloss_avg = 0
        for i in range(len(val_data) // (num_steps * batch_size)):
            vx.d, vt.d = get_batch(val_data, i * num_steps, batch_size,
                                   num_steps)
            vloss.forward()
            vloss_avg += vloss.d.copy()
        vloss_avg /= float((len(val_data) // (num_steps * batch_size)))
        vper = perplexity(vloss_avg)

        if vper < best_val:
            best_val = vper
            if vper < 200:
                save_name = "params_epoch_{:02d}.h5".format(epoch)
                nn.save_parameters(os.path.join(args.save_dir, save_name))
        else:
            solver.set_learning_rate(solver.learning_rate() * 0.25)
            logger.info("Decreased learning rate to {:05f}".format(
                solver.learning_rate()))
        monitor_vperplexity.add(
            (len(val_data) // (num_steps * batch_size)) * (epoch) + i, vper)

    # for final test split
    t_batch_size = 1
    tl1 = LSTMWrapper(t_batch_size, state_size)
    tl2 = LSTMWrapper(t_batch_size, state_size)
    tloss_avg = 0
    tx = nn.Variable((t_batch_size, num_steps))
    tt = nn.Variable((t_batch_size, num_steps))
    tloss = get_loss(tl1, tl2, tx, tt, w, b, num_words, 1, state_size)

    tl1.share_data()
    tl2.share_data()

    for i in range(len(test_data) // (num_steps * t_batch_size)):
        tx.d, tt.d = get_batch(test_data, i * num_steps, 1, num_steps)
        tloss.forward()
        tloss_avg += tloss.d.copy()
    tloss_avg /= float((len(test_data) // (num_steps * t_batch_size)))
    tper = perplexity(tloss_avg)
    monitor_tperplexity.add(
        (len(test_data) // (num_steps * t_batch_size)) * (epoch) + i, tper)
Beispiel #16
0
    def _get_variable_or_create(self, v, callback, current_scope):

        if v.variable is not None:
            return v.variable

        v = callback._apply_generate_variable(v)

        if v.variable is not None:
            return v.variable

        pvar = v.proto
        name = pvar.name
        shape = list(pvar.shape.dim)
        if len(shape) > 0 and shape[0] < 0:
            shape[0] = self.batch_size
        shape = tuple(shape)
        assert np.all(np.array(shape) > 0
                      ), "Shape must be positive. Given {}.".format(shape)

        if pvar.type != 'Parameter':
            # Create a new variable and returns.
            var = nn.Variable(shape)
            v.variable = var
            var.name = name
            return var

        # Trying to load the parameter from the global scope.
        try:
            with nn.parameter_scope('', current_scope):
                param = get_parameter(name)

            if param is not None:
                assert shape == param.shape
                param = param.get_unlinked_variable(need_grad=v.need_grad)
                v.variable = param
                param.name = name
                return param

            # Parameter does not exist in the global scope.
            # Then try to load the parameter from .nnp file.
            callback.verbose('Loading parameter `{}` from .nnp.'.format(name))
            param = get_parameter(name)

            if param is None:
                logger.info(
                    'Parameter `{}` is not found. Initializing.'.format(name))
                tmp = _create_variable(pvar, name, shape, self.rng)
                param = tmp.variable_instance

            # Register the parameter to the current (global) scope.
            with nn.parameter_scope('', current_scope):
                set_parameter(name, param)

        except:
            import traceback
            raise ValueError(
                'An error occurs during creation of a variable `{}` as a'
                ' parameter variable. The error was:\n----\n{}\n----\n'
                'The parameters registered was {}'.format(
                    name, traceback.format_exc(), '\n'.join(
                        list(nn.get_parameters(grad_only=False).keys()))))

        assert shape == param.shape
        param = param.get_unlinked_variable(need_grad=v.need_grad)
        v.variable = param
        param.name = name
        return param
Beispiel #17
0
def animate(args):

    # get context
    ctx = get_extension_context(args.context)
    nn.set_default_context(ctx)
    logger.setLevel(logging.ERROR)  # to supress minor messages

    if not args.config:
        assert not args.params, "pretrained weights file is given, but corresponding config file is not. Please give both."
        download_provided_file(
            "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/voxceleb_trained_info.yaml")
        args.config = 'voxceleb_trained_info.yaml'

        download_provided_file(
            "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/pretrained_fomm_params.h5")

    config = read_yaml(args.config)

    dataset_params = config.dataset_params
    model_params = config.model_params

    if args.detailed:
        vis_params = config.visualizer_params
        visualizer = Visualizer(**vis_params)

    if not args.params:
        assert "log_dir" in config, "no log_dir found in config. therefore failed to locate pretrained parameters."
        param_file = os.path.join(
            config.log_dir, config.saved_parameters)
    else:
        param_file = args.params
    print(f"Loading {param_file} for image animation...")
    nn.load_parameters(param_file)

    bs, h, w, c = [1] + dataset_params.frame_shape
    source = nn.Variable((bs, c, h, w))
    driving_initial = nn.Variable((bs, c, h, w))
    driving = nn.Variable((bs, c, h, w))

    filename = args.driving

    # process repeated until all the test data is used
    driving_video = read_video(
        filename, dataset_params.frame_shape)  # (#frames, h, w, 3)
    driving_video = np.transpose(
        driving_video, (0, 3, 1, 2))  # (#frames, 3, h, w)

    source_img = imread(args.source, channel_first=True,
                        size=(256, 256)) / 255.
    source_img = source_img[:3]

    source.d = np.expand_dims(source_img, 0)
    driving_initial.d = driving_video[0][:3, ]

    with nn.parameter_scope("kp_detector"):
        kp_source = detect_keypoint(source,
                                    **model_params.kp_detector_params,
                                    **model_params.common_params,
                                    test=True, comm=False)
        persistent_all(kp_source)

    with nn.parameter_scope("kp_detector"):
        kp_driving_initial = detect_keypoint(driving_initial,
                                             **model_params.kp_detector_params,
                                             **model_params.common_params,
                                             test=True, comm=False)
        persistent_all(kp_driving_initial)

    with nn.parameter_scope("kp_detector"):
        kp_driving = detect_keypoint(driving,
                                     **model_params.kp_detector_params,
                                     **model_params.common_params,
                                     test=True, comm=False)
        persistent_all(kp_driving)

    if args.adapt_movement_scale:
        nn.forward_all([kp_source["value"],
                        kp_source["jacobian"],
                        kp_driving_initial["value"],
                        kp_driving_initial["jacobian"]])
        source_area = ConvexHull(kp_source['value'][0].d).volume
        driving_area = ConvexHull(kp_driving_initial['value'][0].d).volume
        adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area)
    else:
        adapt_movement_scale = 1

    kp_norm = adjust_kp(kp_source=unlink_all(kp_source), kp_driving=kp_driving,
                        kp_driving_initial=unlink_all(kp_driving_initial),
                        adapt_movement_scale=adapt_movement_scale,
                        use_relative_movement=args.unuse_relative_movement,
                        use_relative_jacobian=args.unuse_relative_jacobian)
    persistent_all(kp_norm)

    with nn.parameter_scope("generator"):
        generated = occlusion_aware_generator(source,
                                              kp_source=unlink_all(kp_source),
                                              kp_driving=kp_norm,
                                              **model_params.generator_params,
                                              **model_params.common_params,
                                              test=True, comm=False)

    if not args.full and 'sparse_deformed' in generated:
        del generated['sparse_deformed']  # remove needless info

    persistent_all(generated)

    generated['kp_driving'] = kp_driving
    generated['kp_source'] = kp_source
    generated['kp_norm'] = kp_norm

    # generated contains these values;
    # 'mask': <Variable((bs, num_kp+1, h/4, w/4)) when scale_factor=0.25
    # 'sparse_deformed': <Variable((bs, num_kp+1, num_channel, h/4, w/4))  # (bs, num_kp + 1, c, h, w)
    # 'occlusion_map': <Variable((bs, 1, h/4, w/4))
    # 'deformed': <Variable((bs, c, h, w))
    # 'prediction': <Variable((bs, c, h, w))

    mode = "arbitrary"
    if "log_dir" in config:
        result_dir = os.path.join(args.out_dir, os.path.basename(config.log_dir), f"{mode}")
    else:
        result_dir = os.path.join(args.out_dir, "test_result", f"{mode}")

    # create an empty directory to save generated results
    _ = nm.Monitor(result_dir)

    # load the header images.
    header = imread("imgs/header_combined.png", channel_first=True)
    generated_images = list()

    # compute these in advance and reuse
    nn.forward_all([kp_source["value"],
                    kp_source["jacobian"]],
                   clear_buffer=True)
    nn.forward_all([kp_driving_initial["value"],
                    kp_driving_initial["jacobian"]],
                   clear_buffer=True)

    num_of_driving_frames = driving_video.shape[0]

    for frame_idx in tqdm(range(num_of_driving_frames)):
        driving.d = driving_video[frame_idx][:3, ]
        nn.forward_all([generated["prediction"],
                        generated["deformed"]], clear_buffer=True)

        if args.detailed:
            # visualize source w/kp, driving w/kp, deformed source, generated w/kp, generated image, occlusion map
            visualization = visualizer.visualize(
                source=source.d, driving=driving.d, out=generated)
            if args.full:
                visualization = reshape_result(visualization)  # (H, W, C)
            combined_image = visualization.transpose(2, 0, 1)  # (C, H, W)

        elif args.only_generated:
            combined_image = np.clip(generated["prediction"].d[0], 0.0, 1.0)
            combined_image = (255*combined_image).astype(np.uint8)  # (C, H, W)

        else:
            # visualize source, driving, and generated image
            driving_fake = np.concatenate([np.clip(driving.d[0], 0.0, 1.0),
                                           np.clip(generated["prediction"].d[0], 0.0, 1.0)], axis=2)
            header_source = np.concatenate([np.clip(header / 255., 0.0, 1.0),
                                            np.clip(source.d[0], 0.0, 1.0)], axis=2)
            combined_image = np.concatenate(
                [header_source, driving_fake], axis=1)
            combined_image = (255*combined_image).astype(np.uint8)

        generated_images.append(combined_image)

    # once each video is generated, save it.
    output_filename = f"{os.path.splitext(os.path.basename(filename))[0]}.mp4"
    output_filename = f"{os.path.basename(args.source)}_by_{output_filename}"
    output_filename = output_filename.replace("#", "_")
    if args.output_png:
        monitor_vis = nm.MonitorImage(output_filename, nm.Monitor(result_dir),
                                      interval=1, num_images=1,
                                      normalize_method=lambda x: x)
        for frame_idx, img in enumerate(generated_images):
            monitor_vis.add(frame_idx, img)
    else:
        generated_images = [_.transpose(1, 2, 0) for _ in generated_images]
        # you might need to change ffmpeg_params according to your environment.
        mimsave(f'{os.path.join(result_dir, output_filename)}', generated_images,
                fps=args.fps,
                ffmpeg_params=["-pix_fmt", "yuv420p",
                               "-vcodec", "libx264",
                               "-f", "mp4",
                               "-q", "0"])

    return
Beispiel #18
0
def main():
    """
    main - driver code to run training for Zooming SloMo
    """
    # Check NNabla version
    if get_nnabla_version_integer() < 11700:
        raise ValueError(
            'This does not work with nnabla version less than v1.17.0 since deformable_conv layer is added in v1.17.0 . Please update the nnabla version.'
        )

    conf = get_config()
    extension_module = conf.nnabla_context.context
    ctx = get_extension_context(extension_module,
                                device_id=conf.nnabla_context.device_id)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    print("comm rank", comm.rank)

    # change max_iter, learning_rate and cosine_period when batch-size or no. of gpu devices change.
    default_batch_size = 12
    train_scale_factor = comm.n_procs * \
        (conf.train.batch_size / default_batch_size)
    max_iter = int(conf.train.max_iter // train_scale_factor)
    learning_rate = conf.train.learning_rate * \
        (conf.train.batch_size / default_batch_size)
    cosine_period = int(conf.train.cosine_period // train_scale_factor)

    # for single-GPU training
    data_iterator_train = data_iterator(conf, shuffle=True)

    # for multi-GPU training
    if comm.n_procs > 1:
        data_iterator_train = data_iterator_train.slice(
            rng=None, num_of_slices=comm.n_procs, slice_pos=comm.rank)

    # LR-LFR data for ZoomingSloMo input
    data_lr_lfr = nn.Variable(
        (conf.train.batch_size, (conf.data.n_frames // 2) + 1, 3,
         conf.data.lr_size, conf.data.lr_size))

    # HR-HFR data for ZoomingSloMo ground truth
    data_gt = nn.Variable((conf.train.batch_size, conf.data.n_frames, 3,
                           conf.data.gt_size, conf.data.gt_size))

    if conf.train.only_slomo:
        '''
        High resolution data as input to only-Slomo network for frame interpolation,
        hence we use lesser number of frames.
        '''
        # LFR data for SloMo input,
        slomo_gt = data_gt
        input_to_slomo = slomo_gt[:, 0:conf.data.n_frames:2, :, :, :]

    # setting up monitors for logging
    monitor_path = './nnmonitor'
    monitor = Monitor(monitor_path)
    monitor_loss = MonitorSeries('loss',
                                 monitor,
                                 interval=conf.train.monitor_log_freq)
    monitor_lr = MonitorSeries('learning rate',
                               monitor,
                               interval=conf.train.monitor_log_freq)
    monitor_time = MonitorTimeElapsed("training time per iteration",
                                      monitor,
                                      interval=conf.train.monitor_log_freq)

    scope_name = "ZoomingSloMo" if not conf.train.only_slomo else "SloMo"

    with nn.parameter_scope(scope_name):
        if conf.train.only_slomo:
            generated_frame = zooming_slo_mo_network(input_to_slomo,
                                                     conf.train.only_slomo)
            diff = generated_frame - slomo_gt
        else:
            generated_frame = zooming_slo_mo_network(data_lr_lfr,
                                                     conf.train.only_slomo)
            diff = generated_frame - data_gt

    # Charbonnier loss
    loss = F.sum((diff * diff + conf.train.eps)**0.5)

    # Define optimizer
    solver = S.Adam(alpha=learning_rate,
                    beta1=conf.train.beta1,
                    beta2=conf.train.beta2)

    # Set Parameters
    with nn.parameter_scope(scope_name):
        solver.set_parameters(nn.get_parameters())

    solver_dict = {scope_name: solver}

    if comm.rank == 0:
        print("maximum iterations", max_iter)

    start_point = 0
    if conf.train.checkpoint:
        # Load optimizer/solver information and model weights from checkpoint
        print("Loading weights from checkpoint:", conf.train.checkpoint)
        with nn.parameter_scope(scope_name):
            start_point = load_checkpoint(conf.train.checkpoint, solver_dict)

    if not os.path.isdir(conf.data.output_dir):
        os.makedirs(conf.data.output_dir)

    # Training loop.
    for i in range(start_point, max_iter):
        # Get Training Data
        if conf.train.only_slomo:
            _, data_gt.d = data_iterator_train.next()
        else:
            data_lr_lfr.d, data_gt.d = data_iterator_train.next()
        l_rate = get_repeated_cosine_annealing_learning_rate(
            i, learning_rate, conf.train.eta_min, cosine_period,
            conf.train.cosine_num_period)

        # Update
        solver.zero_grad()
        solver.set_learning_rate(l_rate)
        loss.forward(clear_no_need_grad=True)
        if comm.n_procs > 1:
            all_reduce_callback = comm.get_all_reduce_callback()
            loss.backward(clear_buffer=True,
                          communicator_callbacks=all_reduce_callback)
        else:
            loss.backward(clear_buffer=True)
        solver.update()

        if comm.rank == 0:
            monitor_loss.add(i, loss.d.copy())
            monitor_lr.add(i, l_rate)
            monitor_time.add(i)
            if (i % conf.train.save_checkpoint_freq) == 0:
                # Save intermediate check_points
                with nn.parameter_scope(scope_name):
                    save_checkpoint(conf.data.output_dir, i, solver_dict)

    # Save final model parameters
    if comm.rank == 0:
        with nn.parameter_scope(scope_name):
            nn.save_parameters(
                os.path.join(conf.data.output_dir, "final_model.h5"))
Beispiel #19
0
    def build_static_graph(self):
        real_img = nn.Variable(shape=(self.batch_size, 3, self.img_size,
                                      self.img_size))
        noises = [
            F.randn(shape=(self.batch_size, self.config['latent_dim']))
            for _ in range(2)
        ]
        if self.few_shot_config['common']['type'] == 'cdc':
            NT_class = NoiseTop(n_train=self.train_loader.size,
                                latent_dim=self.config['latent_dim'],
                                batch_size=self.batch_size)
            noises = NT_class()
            self.PD_switch_var = NT_class.PD_switch_var

        if self.config['regularize_gen']:
            fake_img, dlatents = self.generator(self.batch_size,
                                                noises,
                                                return_latent=True)
        else:
            fake_img = self.generator(self.batch_size, noises)
        fake_img_test = self.generator_ema(self.batch_size, noises)

        if self.few_shot_config['common']['type'] != 'cdc':
            fake_disc_out = self.discriminator(fake_img)
            real_disc_out = self.discriminator(real_img)
            disc_loss = disc_logistic_loss(real_disc_out, fake_disc_out)

        gen_loss = 0
        if self.few_shot_config['common']['type'] == 'cdc':
            fake_img_s = self.generator_s(self.batch_size, noises)
            cdc_loss = CrossDomainCorrespondence(
                fake_img,
                fake_img_s,
                _choice_num=self.few_shot_config['cdc']['feature_num'],
                _layer_fix_switch=self.few_shot_config['cdc']['layer_fix'])
            gen_loss += self.few_shot_config['cdc']['lambda'] * cdc_loss
            # --- PatchDiscriminator ---
            fake_disc_out, fake_feature_var = self.discriminator(
                fake_img, patch_switch=True, index=0)
            real_disc_out, real_feature_var = self.discriminator(
                real_img, patch_switch=True, index=0)
            disc_loss = disc_logistic_loss(real_disc_out, fake_disc_out)
            disc_loss_patch = disc_logistic_loss(fake_feature_var,
                                                 real_feature_var)
            disc_loss += self.PD_switch_var * disc_loss_patch

        gen_loss += gen_nonsaturating_loss(fake_disc_out)

        var_name_list = [
            'real_img', 'noises', 'fake_img', 'gen_loss', 'disc_loss',
            'fake_disc_out', 'real_disc_out', 'fake_img_test'
        ]
        var_list = [
            real_img, noises, fake_img, gen_loss, disc_loss, fake_disc_out,
            real_disc_out, fake_img_test
        ]

        if self.config['regularize_gen']:
            dlatents.need_grad = True
            mean_path_length = nn.Variable()
            pl_reg, path_mean, _ = gen_path_regularize(
                fake_img=fake_img,
                latents=dlatents,
                mean_path_length=mean_path_length)
            path_mean_update = F.assign(mean_path_length, path_mean)
            path_mean_update.name = 'path_mean_update'
            pl_reg += 0 * path_mean_update
            gen_loss_reg = gen_loss + pl_reg
            var_name_list.append('gen_loss_reg')
            var_list.append(gen_loss_reg)

        if self.config['regularize_disc']:
            real_img.need_grad = True
            real_disc_out = self.discriminator(real_img)
            disc_loss_reg = disc_loss + self.config[
                'r1_coeff'] * 0.5 * disc_r1_loss(
                    real_disc_out, real_img) * self.config['disc_reg_step']
            real_img.need_grad = False
            var_name_list.append('disc_loss_reg')
            var_list.append(disc_loss_reg)

        Parameters = namedtuple('Parameters', var_name_list)
        self.parameters = Parameters(*var_list)
Beispiel #20
0
    def __call__(self,
                 outputs,
                 inputs,
                 grad_outputs=None,
                 persistent_outputs=[],
                 bind_grad_output=False):
        """
        The logic of this method is almost same as one in visit_function_backward in C++ layer.
        """
        # TODO: address test in the dynamic graph mode
        # TODO: address inplace-Function and its test
        # TODO: address auto_forward is very slow. It may be python overhead since small diff when BS is large.
        # TODO: address auto_forward consumes lots of memory, need to call v.get_unlinked_variable()?
        # TODO: address auto_forward consumes lots of memory, need to use NdArray as inputs?
        # TODO: NHWC format
        # TODO: Half
        # TODO: address `set default context`

        # Check outputs/inputs
        outputs = self._force_list(outputs)
        if not all([isinstance(o, nn.Variable) for o in outputs]):
            raise ValueError("Element of outputs must be `nnabla.Variable`.")
        inputs = self._force_list(inputs)
        if not all([isinstance(i, nn.Variable) for i in inputs]):
            raise ValueError("Element of inputs must be `nnabla.Variable`.")

        # Check grad_outputs
        if grad_outputs is None:
            grad_outputs = [None] * len(outputs)
        elif isinstance(grad_outputs, (int, float, np.ndarray, nn.NdArray)):
            grad_outputs = self._force_list(grad_outputs)
        elif isinstance(grad_outputs, list):
            if len(outputs) != len(grad_outputs):
                raise ValueError(
                    "Length of `grad_outputs` and lenght of `outputs` must be the same."
                )
            for i in range(len(outputs)):
                o = outputs[i]
                go = grad_outputs[i]
                if not isinstance(
                        go, (type(None), int, float, np.ndarray, nn.NdArray)):
                    raise ValueError(
                        "Element of `grad_outputs` must be "
                        "in (`None`, `int`, `float`, `numpy.ndarray`, `nnabla.NdArray`) or "
                        "list of (`None`, `int`, `float`, `numpy.ndarray`, `nnabla.NdArray`)\n"
                        "type(grad_outputs[{}] = {}".format(i, type(go)))
                elif isinstance(go, np.ndarray) and go.shape != o.shape:
                    raise ValueError(
                        "Shape of each of outputs and grad_outputs must be same.\n"
                        "output[{}]({}) != grad_output[{}]({})".format(
                            i, o.shape, i, go.shape))
                elif isinstance(go, nn.NdArray) and go.shape != o.shape:
                    raise ValueError(
                        "Shape of each of outputs and grad_outputs must be same.\n"
                        "output[{}]({}) != grad_output[{}]({})".format(
                            i, o.shape, i, go.shape))
        # Check persistent_outputs
        if len(persistent_outputs) != 0 and len(outputs) != len(
                persistent_outputs):
            raise ValueError(
                "Length of outputs and persistent_outputs "
                "must be the same except for "
                "the case that the lenght of the persistent_outputs is 0.")

        # Persistent outputs since outputs are basically losses to be monitored
        persistent_outputs = [True] * len(
            outputs) if persistent_outputs == [] else persistent_outputs
        for o, p in zip(outputs, persistent_outputs):
            o.persistent = p

        # Set grad_outputs
        for i in range(len(outputs)):
            o = outputs[i]
            go = grad_outputs[i]
            if go is None:
                pass
            elif isinstance(go, (int, float)):
                grad_output = nn.Variable(o.shape).apply(d=go, need_grad=False)
                outputs[i] = o * grad_output
            elif isinstance(go, np.ndarray):
                grad_output = nn.Variable(o.shape).apply(d=go, need_grad=False)
                outputs[i] = o * grad_output
            elif isinstance(go, nn.NdArray):
                grad_output = nn.Variable(o.shape).apply(data=go,
                                                         need_grad=False)
                outputs[i] = o * grad_output

        # Coerce to sum if there is multiple outputs
        output = sum(outputs) if len(outputs) != 1 else outputs[0]

        # Connect the forward and backward graph
        grad_output = GradEndFunction()(output).apply(need_grad=False)

        # Open list of next search candidate
        ids = {}

        def get_id(func):
            if func not in ids.keys():
                size = len(ids)
                ids[func] = size
                return size
            return ids[func]

        open = set()
        func = output.parent
        open.add((-output.rank, get_id(func), func))

        # Map for grad_variables consumed on the backward graph
        grad_vars = OrderedDict()  # {F_fwd: {VO_fwd: [VI_bwd]}}
        grad_vars[func] = OrderedDict({output: [grad_output]})

        # Return grads
        wrt_inputs = inputs
        grads = [None] * len(wrt_inputs)

        # Expand the forward graph to the backward graph
        while len(open) != 0:
            open = sorted(open)  # python set is NOT sorted set.
            rank_func = open.pop(0)  # 0 is necessary
            open = set(open)
            f = rank_func[2]
            if not f.need_grad:
                continue
            # Connect variables on the backward graph
            grad_outputs = self._connect_on_backward_graph(grad_vars, f)

            # Check grads w.r.t. inputs
            for inp, grad_out in zip(f.inputs, grad_outputs):
                if inp not in wrt_inputs or inp.need_grad == False:
                    continue
                idx = wrt_inputs.index(inp)
                grads[idx] = grad_out
                if bind_grad_output:
                    inp.grad = grad_out.data

            # Propagate down
            for inp in f.inputs:
                if not inp.need_grad:
                    continue
                p_i = inp.parent
                if not p_i:
                    continue
                open.add((-p_i.rank, get_id(p_i), p_i))

        return grads
Beispiel #21
0
def test_save_load_broadcast(tmpdir, variable_batch_size):
    x = nn.Variable([10, 1, 4, 1, 8])
    y = F.broadcast(x, shape=[10, 1, 4, 3, 8])
    check_save_load(tmpdir, x, y, variable_batch_size)
Beispiel #22
0
def test_no_value():
    a = nn.Variable(())
    b = nn.Variable(())
    with pytest.raises(RuntimeError):
        F.concatenate(*[a, b], axis=0)
Beispiel #23
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size
    extension_module = args.context
    ctx = get_extension_context(
        extension_module, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)
    if args.net == "cifar10_resnet23":
        prediction = functools.partial(
            resnet23_prediction, ncls=10, nmaps=64, act=F.relu)
        data_iterator = data_iterator_cifar10
    if args.net == "cifar100_resnet23":
        prediction = functools.partial(
            resnet23_prediction, ncls=100, nmaps=384, act=F.elu)
        data_iterator = data_iterator_cifar100

    # Create training graphs
    test = False
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = prediction(image_train, test)
    loss_train = loss_function(pred_train, label_train)
    input_image_train = {"image": image_train, "label": label_train}

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = prediction(image_valid, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Data Iterator
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)

    # save_nnp
    contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_epoch0_result.nnp'.format(args.net)), contents)

    # Training-loop
    for i in range(start_point, args.max_iter):
        # Validation
        if i % int(n_train_samples / args.batch_size) == 0:
            ve = 0.
            for j in range(args.val_iter):
                image, label = vdata.next()
                input_image_valid["image"].d = image
                pred_valid.forward()
                ve += categorical_error(pred_valid.d, label)
            ve /= args.val_iter
            monitor_verr.add(i, ve)
        if int(i % args.model_save_interval) == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)

        # Forward/Zerograd/Backward
        image, label = tdata.next()
        input_image_train["image"].d = image
        input_image_train["label"].d = label
        loss_train.forward()
        solver.zero_grad()
        loss_train.backward()

        # Solvers update
        solver.update()

        e = categorical_error(
            pred_train.d, input_image_train["label"].d)
        monitor_loss.add(i, loss_train.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    nn.save_parameters(os.path.join(args.model_save_path,
                                    'params_%06d.h5' % (args.max_iter)))

    # save_nnp_lastepoch
    contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size)
    save.save(os.path.join(args.model_save_path,
                           '{}_result.nnp'.format(args.net)), contents)
                                       shuffle=True,
                                       with_file_cache=False)
valid_data_iter = data_iterator_simple(load_valid_func,
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

char_embedding_dim = 16
lstm_size = 650
filters = [50, 150, 200, 200]
filster_sizes = [1, 3, 5, 7]
# filters = [50, 100, 150, 200, 200, 200, 200]
# filster_sizes = [1, 2, 3, 4, 5, 6, 7]

x = nn.Variable((batch_size, sentence_length, word_length))
h = PF.embed(x, char_vocab_size, char_embedding_dim)
h = F.transpose(h, (0, 3, 1, 2))
output = []
for f, f_size in zip(filters, filster_sizes):
    _h = PF.convolution(h,
                        f,
                        kernel=(1, f_size),
                        pad=(0, f_size // 2),
                        name='conv_{}'.format(f_size))
    _h = F.max_pooling(_h, kernel=(1, word_length))
    output.append(_h)
h = F.concatenate(*output, axis=1)
h = F.transpose(h, (0, 2, 1, 3))
h = F.reshape(h, (batch_size, sentence_length, sum(filters)))
# h = PF.batch_normalization(h, axes=[2])
Beispiel #25
0
def test_global_avgpool_module():
    shape = (10, 3, 32, 32)
    input = smo.Input(nn.Variable(shape))
    inp_module = smo.Input(value=input)
    pool = smo.GlobalAvgPool(parents=[inp_module])
    assert pool.shape == (10, 3, 1, 1)
Beispiel #26
0
def main():

    # Get arguments
    args = get_args()
    data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt"
    model_file = args.work_dir + "model.h5"

    # Load Dataset
    itow, wtoi, dataset = load_ptbset(data_file)

    # Computation environment settings
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create data provider
    n_word = len(wtoi)
    n_dim = args.embed_dim
    batchsize = args.batchsize
    half_window = args.half_window_length
    n_negative = args.n_negative_sample

    di = DataIteratorForEmbeddingLearning(
        batchsize=batchsize,
        half_window=half_window,
        n_negative=n_negative,
        dataset=dataset)

    # Create model
    # - Real batch size including context samples and negative samples
    size = batchsize * (1 + n_negative) * (2 * (half_window - 1))

    # Model for learning
    # - input variables
    xl = nn.Variable((size,))  # variable for word
    yl = nn.Variable((size,))  # variable for context

    # Embed layers for word embedding function
    # - f_embed : word index x to get y, the n_dim vector
    # --  for each sample in a minibatch
    hx = PF.embed(xl, n_word, n_dim, name="e1")  # feature vector for word
    hy = PF.embed(yl, n_word, n_dim, name="e1")  # feature vector for context
    hl = F.sum(hx * hy, axis=1)

    # -- Approximated likelihood of context prediction
    # pos: word context, neg negative samples
    tl = nn.Variable([size, ], need_grad=False)
    loss = F.sigmoid_cross_entropy(hl, tl)
    loss = F.mean(loss)

    # Model for test of searching similar words
    xr = nn.Variable((size,), need_grad=False)
    hr = PF.embed(xr, n_word, n_dim, name="e1")  # feature vector for test

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = M.Monitor(args.work_dir)
    monitor_loss = M.MonitorSeries(
        "Training loss", monitor, interval=args.monitor_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time", monitor, interval=args.monitor_interval)

    # Do training
    max_epoch = args.max_epoch
    for epoch in range(max_epoch):

        # iteration per epoch
        for i in range(di.n_batch):

            # get minibatch
            xi, yi, ti = di.next()

            # learn
            solver.zero_grad()
            xl.d, yl.d, tl.d = xi, yi, ti
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()

            # monitor
            itr = epoch * di.n_batch + i
            monitor_loss.add(itr, loss.d)
            monitor_time.add(itr)

    # Save model
    nn.save_parameters(model_file)
    nnp_file = os.path.join(
        args.work_dir, 'wtov_%06d.nnp' % (args.max_epoch))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': size,
             'outputs': {'e': hr},
             'names': {'w': xr}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['w'],
             'output': ['e']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.work_dir, [xi], [xr], hr, nnp_file)
    exit()

    # Evaluate by similarity
    max_check_words = args.max_check_words
    for i in range(max_check_words):

        # prediction
        xr.d = i
        hr.forward(clear_buffer=True)
        h = hr.d

        # similarity calculation
        w = nn.get_parameters()['e1/embed/W'].d
        s = np.sqrt((w * w).sum(1))
        w /= s.reshape((s.shape[0], 1))
        similarity = w.dot(h[0]) / s[i]

        # for understanding
        output_similar_words(itow, i, similarity)
Beispiel #27
0
 def prod_sum(inputs0, inputs1):
     out = 0.0
     for inp0, inp1 in zip(inputs0, inputs1):
         out += inp0 * nn.Variable(inp1.shape).apply(data=inp1)
     return out
Beispiel #28
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create predition graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create predition graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)
    nn.save_parameters(
        os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))
Beispiel #29
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    #nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_522000.h5")
    #z.d = np.random.randn(*z.shape)
    #gen.forward()
    #for i in range(40):
    #    Image.fromarray(np.uint8((gen.d[i][0]+1)*255/2.0)).save("./test/"+str(i)+".png")

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    x_ref = nn.Variable([args.batch_size, 1, 28, 28])
    #vec = nn.Variable([args.batch_size, 100])
    pred_vec = vectorizer(x, maxh=1024, test=False)
    print pred_vec.shape
    #z = pred_vec.reshape((args.batch_size, 100, 1, 1))
    #gen = generator(z,test=True)
    gen = generator(pred_vec, maxh=1024, test=False)
    gen.persistent = True
    with nn.parameter_scope("gen"):
        #nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_290000.h5")
        nn.load_parameters(
            "/home/mizuochi/programing/font/tmp.monitor.dcgan1000/generator_param_458000.h5"
        )

#loss_dis = F.mean(F.sigmoid_cross_entropy(pred_vec, vec))
    print "x_ref shape", x_ref.shape
    print "gen shape", gen.shape
    #loss_dis = F.mean(F.squared_error(x_ref.reshape((64,28*28)), gen.reshape((64,28*28))))
    loss_dis = F.mean(F.squared_error(x_ref, gen))
    print loss_dis.d

    # Create Solver.
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "vectorizer_param_%06d.h5" % i))

        # Training forward
        buf, buf2 = data.next()
        x.d = buf * 2.0 / 255. - 1.0
        x_ref.d = buf * 2.0 / 255. - 1.0

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i))
Beispiel #30
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.get_unlinked_variable(need_grad=True)
    fake_dis.need_grad = True  # TODO: Workaround until v1.0.2
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint files.
        start_point = load_checkpoint(args.checkpoint, {
            "gen": solver_gen,
            "dis": solver_dis
        })

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: (x + 1) / 2.)

    data = data_iterator_mnist(args.batch_size, True)

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'),
        contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'),
        contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.model_save_interval == 0:
            save_checkpoint(args.model_save_path, i, {
                "gen": solver_gen,
                "dis": solver_dis
            })

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'),
              contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'),
              contents)