def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    x1 = nn.Variable([args.batch_size, 1, 56, 56])

    #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1])
    #z = vectorizer(x1,maxh = 1024)
    #fake = generator(z,maxh= 1024)
    z_vec = vectorizer(x1)
    z = z_vec.unlinked()
    #fake2 = generator(z_vec,maxh=512)
    #fake = generator(z,maxh=512)
    fake2 = generator(z_vec)
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    print fake2.d.shape
    print x1.d.shape
    loss_vec = F.mean(F.squared_error(fake2, x1))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    xBuf1 = nn.Variable([args.batch_size, 1, 56, 56])
    zBuf1 = vectorizer(xBuf1)
    xBuf2 = nn.Variable([args.batch_size, 1, 56, 56])
    zBuf2 = vectorizer(xBuf2)

    # Real path
    x = nn.Variable([args.batch_size, 1, 56, 56])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    solver_vec = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("vec"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec1 = M.MonitorImageTile("vec images1",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec2 = M.MonitorImageTile("vec images2",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))
            with nn.parameter_scope("vec"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "vectorizer_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()

        x1.d = image / 255. * 2 - 1.0
        # Generator update.
        solver_vec.zero_grad()
        loss_vec.forward(clear_no_need_grad=True)
        loss_vec.backward(clear_buffer=True)
        solver_vec.weight_decay(args.weight_decay)
        solver_vec.update()
        fake2.forward()
        monitor_vec1.add(i, fake2)
        monitor_vec2.add(i, x1)
        monitor_loss_vec.add(i, loss_vec.d.copy())

        image, _ = data.next()
        x.d = image / 255. * 2 - 1.0  # [0, 255] to [-1, 1]

        #z.d = np.random.randn(*z.shape)
        ratio = np.random.rand()
        image, _ = data.next()
        xBuf1.d = image / 255. * 2 - 1.0  # [0, 255] to [-1, 1]
        zBuf1.forward()

        image, _ = data.next()
        xBuf2.d = image / 255. * 2 - 1.0  # [0, 255] to [-1, 1]
        zBuf2.forward()
        z.d = (1 - ratio) * zBuf1.d + ratio * zBuf2.d

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))
    with nn.parameter_scope("vec"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i))
Beispiel #2
0
def volumetric_rendering(radiance_field,
                         ray_origins,
                         depth_values,
                         return_weights=False,
                         white_bkgd=False,
                         raw_noise_std=0.0,
                         apply_act=False):
    """Integration of volumetric rendering

    Args:
        radiance_field (nn.Variable or nn.NdArray): Shape is (height, width, num_samples, 4). 
        radiance_field[:,:,:,:3] correspond to rgb value at each sampled point while radiance_field[:,:,:,-1] refers to color density.
        ray_origins (nn.Variable or nn.NdArray): Shape is (height, width, 3)
        depth_values (nn.Variable or nn.NdArray): Shape is (num_samples, 1) or (height, width, num_samples) 
        return_weights (bool, optional): Set to true if the coefficients of the volumetric integration sum are to be returned . Defaults to False.

    Returns:
        rgb_map (nn.Variable or nn.NdArray): Shape is (height, width, 3)
        rgb_map (nn.Variable or nn.NdArray): Shape is (height, width, 1)
    """
    if apply_act:
        sigma = F.relu(radiance_field[..., 3])
        rgb = F.sigmoid(radiance_field[..., :3])
    else:
        sigma = radiance_field[..., 3]
        rgb = radiance_field[..., :3]

    if raw_noise_std > 0.0:
        noise = F.randn(shape=sigma.shape)
        sigma += (noise * raw_noise_std)

    if depth_values.ndim == 2:
        distances = depth_values[:, 1:] - depth_values[:, :-1]
        distances = F.concatenate(distances,
                                  F.constant(1e2,
                                             shape=depth_values.shape[:-1] +
                                             (1, )),
                                  axis=-1)
        alpha = 1. - F.exp(-sigma * distances)
        weights = alpha * F.cumprod(1 - alpha + 1e-10, axis=-1, exclusive=True)
        rgb_map = F.sum(weights[..., None] * rgb, axis=-2)
        depth_map = F.sum(weights * depth_values, axis=-1)
        acc_map = F.sum(weights, axis=-1)
    else:
        distances = depth_values[:, :, 1:] - depth_values[:, :, :-1]
        distances = F.concatenate(distances,
                                  F.constant(1e10,
                                             shape=depth_values.shape[:-1] +
                                             (1, )),
                                  axis=-1)
        alpha = 1. - F.exp(-sigma * distances)
        rgb_map = F.sum(weights[..., None] * rgb, axis=rgb.ndim - 2)
        depth_map = F.sum(weights * depth_values, axis=1)
        acc_map = F.sum(weights, axis=-1)

    if white_bkgd:
        rgb_map = rgb_map + (1. - acc_map[..., None])

    if return_weights:
        disp_map = 1.0 / \
            F.maximum2(F.constant(1e-10, depth_map.shape), depth_map / acc_map)
        return rgb_map, depth_map, acc_map, disp_map, weights

    return rgb_map, depth_map, acc_map
Beispiel #3
0
def volume_rendering_transient(radiance_field,
                               ray_origins,
                               depth_values,
                               return_weights=False,
                               white_bkgd=False,
                               raw_noise_std=0.0,
                               beta_min=0.1):

    static_rgb = radiance_field[..., :3]
    static_sigma = radiance_field[..., 3]

    if radiance_field.shape[-1] > 4:
        transient_rgb = radiance_field[..., 4:7]
        transient_sigma = radiance_field[..., 7]
        transient_beta = radiance_field[..., 8]

    distances = depth_values[:, 1:] - depth_values[:, :-1]
    distances = F.concatenate(distances,
                              F.constant(1e2,
                                         shape=depth_values.shape[:-1] +
                                         (1, )),
                              axis=-1)

    static_alpha = 1. - F.exp(-static_sigma * distances)

    if radiance_field.shape[-1] > 4:
        transient_alpha = 1. - F.exp(-transient_sigma * distances)
        alpha = 1. - F.exp(-(static_sigma + transient_sigma) * distances)
        transmittance = F.cumprod(1-static_alpha+1e-10, axis=-1, exclusive=True) * \
            F.cumprod(1-transient_alpha+1e-10, axis=-1, exclusive=True)
    else:
        alpha = static_alpha
        transmittance = F.cumprod(1 - static_alpha + 1e-10,
                                  axis=-1,
                                  exclusive=True)

    # weights = alpha * F.cumprod(1-alpha+1e-10, axis=-1, exclusive=True)

    static_weights = static_alpha * transmittance
    if radiance_field.shape[-1] > 4:
        transient_weights = transient_alpha * transmittance
        weights = alpha * transmittance

    static_rgb_map = F.sum(static_weights[..., None] * static_rgb, axis=-2)

    if isinstance(radiance_field,
                  nn.Variable) and radiance_field.shape[-1] > 4:
        transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb,
                                  axis=-2)
        rgb_map = static_rgb_map + transient_rgb_map
        beta = F.sum(transient_weights * transient_beta, axis=-1)
        beta += beta_min
        acc_map = F.sum(weights, axis=-1)
        if white_bkgd:
            rgb_map = rgb_map + (1. - acc_map[..., None])

    elif isinstance(radiance_field,
                    nn.NdArray) and radiance_field.shape[-1] > 4:
        transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb,
                                  axis=-2)
        rgb_map = static_rgb_map + transient_rgb_map
        static_weights = static_alpha * \
            F.cumprod(1-static_alpha+1e-10, axis=-1, exclusive=True)
        static_rgb_map = F.sum(static_weights[..., None] * static_rgb, axis=-2)
        transient_weights = transient_alpha * \
            F.cumprod(1-transient_alpha+1e-10, axis=-1, exclusive=True)
        transient_rgb_map = F.sum(transient_weights[..., None] * transient_rgb,
                                  axis=-2)
        beta = F.sum(transient_weights * transient_beta, axis=-1) + beta_min
        # rgb_map = static_rgb_map + transient_rgb_map
        acc_map = F.sum(weights, axis=-1)
        if white_bkgd:
            rgb_map = rgb_map + (1. - acc_map[..., None])

    else:
        acc_map = F.sum(static_weights, axis=-1)

    # depth_map = F.sum(weights*depth_values, axis=-1)
    if white_bkgd:
        static_rgb_map = static_rgb_map + (1. - acc_map[..., None])

    if return_weights:
        return static_rgb_map, static_weights

    return rgb_map, weights, static_rgb_map, transient_rgb_map, beta
def train_transformer(config, netG, netD, solver_netG, solver_netD,
                      train_iterators, monitor):

    netG_A2B, netG_B2A = netG['netG_A2B'], netG['netG_B2A']
    netD_A, netD_B = netD['netD_A'], netD['netD_B']
    solver_netG_AB, solver_netG_BA = solver_netG['netG_A2B'], solver_netG[
        'netG_B2A']
    solver_netD_A, solver_netD_B = solver_netD['netD_A'], solver_netD['netD_B']

    train_iterator_src, train_iterator_trg = train_iterators

    if config["train"][
            "cycle_loss"] and config["train"]["cycle_loss"]["lambda"] > 0:
        print(
            f'Applying Cycle Loss, weight: {config["train"]["cycle_loss"]["lambda"]}.'
        )
        with_cycle_loss = True
    else:
        with_cycle_loss = False

    if config["train"][
            "shape_loss"] and config["train"]["shape_loss"]["lambda"] > 0:
        print(
            f'Applying Shape Loss using PCA, weight: {config["train"]["shape_loss"]["lambda"]}.'
        )
        with_shape_loss = True
    else:
        with_shape_loss = False

    # Load boundary image to get Variable shapes
    bod_map_A = train_iterator_src.next()[0]
    bod_map_B = train_iterator_trg.next()[0]
    real_bod_map_A = nn.Variable(bod_map_A.shape)
    real_bod_map_B = nn.Variable(bod_map_B.shape)
    real_bod_map_A.persistent, real_bod_map_B.persistent = True, True

    ################### Graph Construction ####################
    # Generator
    with nn.parameter_scope('netG_transformer'):
        with nn.parameter_scope('netG_A2B'):
            fake_bod_map_B = netG_A2B(
                real_bod_map_A, test=False,
                norm_type=config["norm_type"])  # (1, 15, 64, 64)
        with nn.parameter_scope('netG_B2A'):
            fake_bod_map_A = netG_B2A(
                real_bod_map_B, test=False,
                norm_type=config["norm_type"])  # (1, 15, 64, 64)
    fake_bod_map_B.persistent, fake_bod_map_A.persistent = True, True

    fake_bod_map_B_unlinked = fake_bod_map_B.get_unlinked_variable()
    fake_bod_map_A_unlinked = fake_bod_map_A.get_unlinked_variable()

    # Reconstruct images if cycle loss is applied.
    if with_cycle_loss:
        with nn.parameter_scope('netG_transformer'):
            with nn.parameter_scope('netG_B2A'):
                recon_bod_map_A = netG_B2A(
                    fake_bod_map_B_unlinked,
                    test=False,
                    norm_type=config["norm_type"])  # (1, 15, 64, 64)
            with nn.parameter_scope('netG_A2B'):
                recon_bod_map_B = netG_A2B(
                    fake_bod_map_A_unlinked,
                    test=False,
                    norm_type=config["norm_type"])  # (1, 15, 64, 64)
        recon_bod_map_A.persistent, recon_bod_map_B.persistent = True, True

    # Discriminator
    with nn.parameter_scope('netD_transformer'):
        with nn.parameter_scope('netD_A'):
            pred_fake_A = netD_A(fake_bod_map_A_unlinked, test=False)
            pred_real_A = netD_A(real_bod_map_A, test=False)
        with nn.parameter_scope('netD_B'):
            pred_fake_B = netD_B(fake_bod_map_B_unlinked, test=False)
            pred_real_B = netD_B(real_bod_map_B, test=False)
    real_target = F.constant(1, pred_fake_A.shape)
    fake_target = F.constant(0, pred_real_A.shape)

    ################### Loss Definition ####################
    # Generator loss
    # LSGAN loss
    loss_gan_A = lsgan_loss(pred_fake_A, real_target)
    loss_gan_B = lsgan_loss(pred_fake_B, real_target)
    loss_gan_A.persistent, loss_gan_B.persistent = True, True
    loss_gan = loss_gan_A + loss_gan_B

    # Cycle loss
    if with_cycle_loss:
        loss_cycle_A = recon_loss(recon_bod_map_A, real_bod_map_A)
        loss_cycle_B = recon_loss(recon_bod_map_B, real_bod_map_B)
        loss_cycle_A.persistent, loss_cycle_B.persistent = True, True
        loss_cycle = loss_cycle_A + loss_cycle_B

    # Shape loss
    if with_shape_loss:
        with nn.parameter_scope("Align"):
            nn.load_parameters(
                config["train"]["shape_loss"]["align_param_path"])
            shape_bod_map_real_A = models.align_resnet(real_bod_map_A,
                                                       fix_parameters=True)
            shape_bod_map_fake_B = models.align_resnet(fake_bod_map_B_unlinked,
                                                       fix_parameters=True)

            shape_bod_map_real_B = models.align_resnet(real_bod_map_B,
                                                       fix_parameters=True)
            shape_bod_map_fake_A = models.align_resnet(fake_bod_map_A_unlinked,
                                                       fix_parameters=True)

        with nn.parameter_scope("PCA"):
            nn.load_parameters(config["train"]["shape_loss"]["PCA_param_path"])
            shape_bod_map_real_A = PF.affine(shape_bod_map_real_A,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_real_A = shape_bod_map_real_A[:, :3]

            shape_bod_map_fake_B = PF.affine(shape_bod_map_fake_B,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_fake_B = shape_bod_map_fake_B[:, :3]

            shape_bod_map_real_B = PF.affine(shape_bod_map_real_B,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_real_B = shape_bod_map_real_B[:, :3]

            shape_bod_map_fake_A = PF.affine(shape_bod_map_fake_A,
                                             212,
                                             fix_parameters=True)
            shape_bod_map_fake_A = shape_bod_map_fake_A[:, :3]

        shape_bod_map_real_A.persistent, shape_bod_map_fake_A.persistent = True, True
        shape_bod_map_real_B.persistent, shape_bod_map_fake_B.persistent = True, True

        loss_shape_A = recon_loss(shape_bod_map_real_A, shape_bod_map_fake_B)
        loss_shape_B = recon_loss(shape_bod_map_real_B, shape_bod_map_fake_A)
        loss_shape_A.persistent, loss_shape_B.persistent = True, True
        loss_shape = loss_shape_A + loss_shape_B

    # Total Generator Loss
    loss_netG = loss_gan

    if with_cycle_loss:
        loss_netG += loss_cycle * config["train"]["cycle_loss"]["lambda"]

    if with_shape_loss:
        loss_netG += loss_shape * config["train"]["shape_loss"]["lambda"]

    # Discriminator loss
    loss_netD_A = lsgan_loss(pred_real_A, real_target) + \
        lsgan_loss(pred_fake_A, fake_target)
    loss_netD_B = lsgan_loss(pred_real_B, real_target) + \
        lsgan_loss(pred_fake_B, fake_target)
    loss_netD_A.persistent, loss_netD_B.persistent = True, True

    loss_netD = loss_netD_A + loss_netD_B

    ################### Setting Solvers ####################
    # Generator solver
    with nn.parameter_scope('netG_transformer'):
        with nn.parameter_scope('netG_A2B'):
            solver_netG_AB.set_parameters(nn.get_parameters())
        with nn.parameter_scope('netG_B2A'):
            solver_netG_BA.set_parameters(nn.get_parameters())

    # Discrimintar solver
    with nn.parameter_scope('netD_transformer'):
        with nn.parameter_scope('netD_A'):
            solver_netD_A.set_parameters(nn.get_parameters())
        with nn.parameter_scope('netD_B'):
            solver_netD_B.set_parameters(nn.get_parameters())

    ################### Create Monitors ####################
    interval = config["monitor"]["interval"]
    monitors_G_dict = {
        'loss_netG': loss_netG,
        'loss_gan_A': loss_gan_A,
        'loss_gan_B': loss_gan_B
    }

    if with_cycle_loss:
        monitors_G_dict.update({
            'loss_cycle_A': loss_cycle_A,
            'loss_cycle_B': loss_cycle_B
        })

    if with_shape_loss:
        monitors_G_dict.update({
            'loss_shape_A': loss_shape_A,
            'loss_shape_B': loss_shape_B
        })

    monitors_G = MonitorManager(monitors_G_dict, monitor, interval=interval)

    monitors_D_dict = {
        'loss_netD': loss_netD,
        'loss_netD_A': loss_netD_A,
        'loss_netD_B': loss_netD_B
    }
    monitors_D = MonitorManager(monitors_D_dict, monitor, interval=interval)

    monitor_time = nm.MonitorTimeElapsed('time_training',
                                         monitor,
                                         interval=interval)
    monitor_vis = nm.MonitorImage('result',
                                  monitor,
                                  interval=1,
                                  num_images=4,
                                  normalize_method=lambda x: x)

    # Dump training information
    with open(os.path.join(monitor._save_path, "training_info.yaml"),
              "w",
              encoding="utf-8") as f:
        f.write(yaml.dump(config))

    # Training
    epoch = config["train"]["epochs"]
    i = 0
    iter_per_epoch = train_iterator_src.size // config["train"][
        "batch_size"] + 1
    for e in range(epoch):
        logger.info(f'Epoch = {e} / {epoch}')
        train_iterator_src._reset()  # rewind the iterator
        train_iterator_trg._reset()  # rewind the iterator
        for _ in range(iter_per_epoch):
            bod_map_A = train_iterator_src.next()[0]
            bod_map_B = train_iterator_trg.next()[0]
            real_bod_map_A.d, real_bod_map_B.d = bod_map_A, bod_map_B

            # Generate fake image
            fake_bod_map_B.forward(clear_no_need_grad=True)
            fake_bod_map_A.forward(clear_no_need_grad=True)

            # Update Discriminator
            solver_netD_A.zero_grad()
            solver_netD_B.zero_grad()
            loss_netD.forward(clear_no_need_grad=True)
            loss_netD.backward(clear_buffer=True)
            if config["train"]["weight_decay"]:
                solver_netD_A.weight_decay(config["train"]["weight_decay"])
                solver_netD_B.weight_decay(config["train"]["weight_decay"])
            solver_netD_A.update()
            solver_netD_B.update()

            # Update Generator
            solver_netG_BA.zero_grad()
            solver_netG_AB.zero_grad()
            solver_netD_A.zero_grad()
            solver_netD_B.zero_grad()
            fake_bod_map_B_unlinked.grad.zero()
            fake_bod_map_A_unlinked.grad.zero()
            loss_netG.forward(clear_no_need_grad=True)
            loss_netG.backward(clear_buffer=True)
            fake_bod_map_B.backward(grad=None)
            fake_bod_map_A.backward(grad=None)
            solver_netG_AB.update()
            solver_netG_BA.update()

            # Monitors
            monitor_time.add(i)
            monitors_G.add(i)
            monitors_D.add(i)

            i += 1

        images_to_visualize = [
            real_bod_map_A.d, fake_bod_map_B.d, real_bod_map_B.d
        ]
        if with_cycle_loss:
            images_to_visualize.extend(
                [recon_bod_map_A.d, fake_bod_map_A.d, recon_bod_map_B.d])
        else:
            images_to_visualize.extend([fake_bod_map_A.d])
        visuals = combine_images(images_to_visualize)
        monitor_vis.add(i, visuals)

        if e % config["monitor"]["save_interval"] == 0 or e == epoch - 1:
            # Save parameters of networks
            netG_B2A_save_path = os.path.join(monitor._save_path,
                                              f'netG_transformer_B2A_{e}.h5')
            netG_A2B_save_path = os.path.join(monitor._save_path,
                                              f'netG_transformer_A2B_{e}.h5')
            with nn.parameter_scope('netG_transformer'):
                with nn.parameter_scope('netG_A2B'):
                    nn.save_parameters(netG_A2B_save_path)
                with nn.parameter_scope('netG_B2A'):
                    nn.save_parameters(netG_B2A_save_path)

            netD_A_save_path = os.path.join(monitor._save_path,
                                            f'netD_transformer_A_{e}.h5')
            netD_B_save_path = os.path.join(monitor._save_path,
                                            f'netD_transformer_B_{e}.h5')
            with nn.parameter_scope('netD_transformer'):
                with nn.parameter_scope('netD_A'):
                    nn.save_parameters(netD_A_save_path)
                with nn.parameter_scope('netD_B'):
                    nn.save_parameters(netD_B_save_path)
Beispiel #5
0
    def call(self, memory, inputs=None):
        r"""Return mel-spectrogram and attention matrix.

        Args:
            memory(nn.Variable): A 3D tensor of shape (T, B, C).
            inputs(nn.Variable, optional): A 3D tensor with shape of
                [B, T/r, n_mels(*r)]. Shifted log melspectrogram of sound files.
                Defaults to None.

        Returns:
            nn.Variable: The synthetic mel-spectrograms of shape
                (B, Ty/r, r*n_mels).
            nn.Variable: The attention matrix of shape
                (B, Tx, Ty).

        References:
            - https://github.com/Kyubyong/tacotron/
        """
        hp = self._hparams
        bz, mel_shape = hp.batch_size, hp.n_mels * hp.r
        encoder_dim = hp.encoder_embedding_dim

        # initialize input tensor
        input = F.constant(shape=(bz, 1, mel_shape))

        # initialize hidden states
        context = F.constant(shape=(bz, 1, hp.attention_dim))
        hidden = F.constant(shape=(1, 1, bz, encoder_dim))
        h_gru = [
            F.constant(shape=(1, 1, bz, encoder_dim)),
            F.constant(shape=(1, 1, bz, encoder_dim))
        ]

        outputs, attends = [], []

        for i in range(hp.n_frames):
            if i > 0:
                input = (outputs[-1] if inputs is None else inputs[:,
                                                                   i - 1:i, :])

            # feed a prenet to the input
            input = prenet(input,
                           layer_sizes=hp.prenet_channels,
                           is_training=self.training,
                           scope='prenet_decoder')  # (bz, 1, C)

            # concat the input and context vector
            input = F.concatenate(input, context)  # (bz, 1, 384)

            with nn.parameter_scope('rnn_attention'):
                # calculate the output
                output, hidden = PF.gru(
                    input.reshape((1, bz, -1)),
                    hidden,
                    training=self.training,
                    bidirectional=False)  # (1, bz, 256), (1, 1, bz, 256)

            # compute the context and attention vectors
            context, attend = Bahdanau_attention(
                F.transpose(hidden[0], (1, 0, 2)),
                memory,
                out_features=hp.attention_dim,
                scope='Bahdanau_attention')  # (bz, 1, 256), (bz, 1, T)

            with nn.parameter_scope('rnn_decoder'):
                # concat RNN output and attention context vector
                with nn.parameter_scope('project_to_decoder'):
                    output = F.concatenate(output,
                                           F.transpose(context, (1, 0, 2)),
                                           axis=2)
                    output = PF.affine(output, encoder_dim,
                                       base_axis=2)  # (1, bz, 256)

                # decoder RNN with residual connection
                for j in range(2):
                    with nn.parameter_scope(f'gru_resisidual_{j}'):
                        out, h_gru[j] = PF.gru(output,
                                               h_gru[j],
                                               training=self.training,
                                               bidirectional=False)
                        output += out  # (1, bz, 256)

                # projector to mels
                with nn.parameter_scope('project_to_mel'):
                    output = F.transpose(output, (1, 0, 2))
                    # (bz, 1, n_mels*r)
                    output = PF.affine(output, mel_shape, base_axis=2)

            outputs.append(output)
            attends.append(attend)

        outputs = F.concatenate(*outputs, axis=1)  # (B, T2, C2)
        attends = F.concatenate(*attends, axis=1)  # (B, T2, T1)

        return outputs, attends
Beispiel #6
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(F.sigmoid_cross_entropy(
        pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(F.sigmoid_cross_entropy(
        pred_fake_dis, F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real,
                                               F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries(
        "Discriminator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile(
        "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    nnp = os.path.join(
        args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Generator',
             'batch_size': args.batch_size,
             'outputs': {'G': fake},
             'names': {'z': z}},
            {'name': 'Discriminator',
             'batch_size': args.batch_size,
             'outputs': {'D': pred_real},
             'names': {'x': x}}],
        'executors': [
            {'name': 'Generator',
             'network': 'Generator',
             'data': ['z'],
             'output': ['G']},
            {'name': 'Discriminator',
             'network': 'Discriminator',
             'data': ['x'],
             'output': ['D']}]}

    save.save(nnp, runtime_contents)
    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
 def _loss_minus(self, dout):
     return F.squared_error(dout, F.constant(0., shape=dout.shape))
Beispiel #8
0
def cbhg(inputs, K, projections, depth, is_training, scope):
    r"""Returns the 1D Convolution Bank Highwaynet bindirectional
    GRU (CBHG) module.

    Args:
        inputs (nn.Variable): NNabla Variable of shape (B, C, T).
        K (int): Maximum kernel size.
        projections (list of int): A list of channels.
        depth (int): A depth. This should be an even number.
        is_training (bool): Whether training mode is activated.
        scope (str): The parameter scope name.

    Returns:
        nn.Variable: Output variable.
    """

    with nn.parameter_scope(scope):
        # Convolution bank: concatenate channels from all 1D convolutions
        with nn.parameter_scope('conv_bank'):
            conv = partial(conv1d, inputs, channels=128,
                           activation=F.relu, is_training=is_training)
            conv_outputs = [conv(kernel_size=k, scope=f'conv1d_{k}') for k in range(1, K+1)]
            conv_outputs = F.concatenate(*conv_outputs, axis=1)

        # make sure a valid input to max_pooling
        x = F.pad(conv_outputs, (0,)*5+(1,), mode='constant')

        # Maxpooling: reshape is needed because nnabla does support 1D pooling
        maxpool_output = F.max_pooling(
            x.reshape(x.shape + (1,)),
            kernel=(2, 1), stride=(1, 1)
        ).reshape(conv_outputs.shape)

        # Two projection layers:
        proj1_output = conv1d(
            maxpool_output,
            kernel_size=3,
            channels=projections[0],
            activation=F.relu,
            is_training=is_training,
            scope='proj_1'
        )
        proj2_output = conv1d(
            proj1_output,
            kernel_size=3,
            channels=projections[1],
            activation=None,
            is_training=is_training,
            scope='proj_2'
        )

        # Residual connection:
        highway_input = proj2_output + inputs

        assert depth % 2 == 0
        half_depth = depth // 2

        with nn.parameter_scope('highwaynet'):
            # transposing to shape (B, T, C)
            highway_input = F.transpose(highway_input, (0, 2, 1))

            # Handle dimensionality mismatch:
            if highway_input.shape[2] != half_depth:
                highway_input = PF.affine(
                    highway_input, half_depth, base_axis=2,
                    name='adjust_dim'
                )

            # 4-layer HighwayNet:
            for i in range(4):
                highway_input = highwaynet(
                    highway_input, half_depth,
                    scope=f'highway_{i+1}'
                )

        with nn.parameter_scope('rnn_net'):
            # transpose to shape (T, B, C)
            rnn_input = F.transpose(highway_input, (1, 0, 2))
            outputs, _ = PF.gru(
                rnn_input,
                F.constant(shape=(2, 2, rnn_input.shape[1], half_depth)),
                training=is_training,
                bidirectional=True
            )  # (T, B, C)

    return outputs
Beispiel #9
0
 def SquaredError_Scalor(x, val=1):
     return F.squared_error(x, F.constant(val, x.shape))
Beispiel #10
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        with_bias = True if len(inputs) == 4 else False
        base_axis = self.forward_func.info.args["base_axis"]
        pad = self.forward_func.info.args["pad"]
        stride = self.forward_func.info.args["stride"]
        dilation = self.forward_func.info.args["dilation"]
        group = self.forward_func.info.args["group"]
        channel_last = self.forward_func.info.args["channel_last"]
        # TODO: BHWC
        assert channel_last == False, "`channel_last = False` is only supported now."

        # Inputs
        x0 = inputs[0].data
        w0 = inputs[1].data
        b0 = inputs[2].data if with_bias else None
        dy = inputs[3].data if with_bias else inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dw0 = outputs[1].data
        db0 = outputs[2].data if with_bias else None
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_w0 = inputs[1].grad
        g_b0 = inputs[2].grad if with_bias else None
        g_dy = inputs[3].grad if with_bias else inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dw0 = outputs[1].grad
        g_db0 = outputs[2].grad if with_bias else None

        # Computation
        ## w.r.t. x or w.r.t. w
        if prop_down[0] or prop_down[1]:
            # we can re-use the backward of the forward with different inputs
            inp_x = nn.Variable(x0.shape).apply(data=g_dx0,
                                                grad=g_x0,
                                                need_grad=prop_down[0])
            inp_w = nn.Variable(w0.shape).apply(data=g_dw0,
                                                grad=g_w0,
                                                need_grad=prop_down[1])
            out_y = nn.Variable(dy.shape).apply(grad=dy)
            inputs = [inp_x, inp_w]
            outputs = [out_y]
            if with_bias:
                inp_b = nn.Variable(b0.shape).apply(need_grad=False)
                inputs += [inp_b]
            self.forward_func.backward(inputs, outputs, accum)
        ## w.r.t. b
        if with_bias and prop_down[2] and not accum[2]:
            zeros = F.constant(0, b0.shape)
            if not nn.get_auto_forward():
                zeros.forward()
            g_b0.copy_from(zeros.data)
        ## w.r.t. dy
        if (not with_bias and prop_down[2]) or (with_bias and prop_down[3]):
            accum_dy = accum[3] if with_bias else accum[2]
            g_dy_ = F.convolution(g_dx0, w0, None, base_axis, pad, stride, dilation, group, channel_last) \
                + F.convolution(x0, g_dw0, None, base_axis, pad,
                                stride, dilation, group, channel_last)
            if with_bias:
                g_db0 = F.reshape(g_db0, [
                    1 if i != base_axis else g_db0.shape[0]
                    for i in range(g_dy.ndim)
                ])
                g_dy_ += g_db0
            if accum_dy:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
Beispiel #11
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.get_unlinked_variable(need_grad=True)
    fake_dis.need_grad = True  # TODO: Workaround until v1.0.2
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())
    start_point = 0

    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint files.
        start_point = load_checkpoint(args.checkpoint, {
            "gen": solver_gen,
            "dis": solver_dis
        })

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: (x + 1) / 2.)

    data = data_iterator_mnist(args.batch_size, True)

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'),
        contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(
        os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'),
        contents)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.model_save_interval == 0:
            save_checkpoint(args.model_save_path, i, {
                "gen": solver_gen,
                "dis": solver_dis
            })

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))

    # Save_nnp
    contents = save_nnp({'x': z}, {'y': fake}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'),
              contents)
    contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size)
    save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'),
              contents)
Beispiel #12
0
    def __call__(self,
                 outputs,
                 inputs,
                 grad_outputs=None,
                 persistent_outputs=[],
                 bind_grad_output=False):
        """
        The logic of this method is almost same as one in visit_function_backward in C++ layer.
        """
        # TODO: address auto_forward is very slow. It may be python overhead since small diff when BS is large.
        # TODO: address auto_forward consumes lots of memory, need to call v.get_unlinked_variable()?
        # TODO: address auto_forward consumes lots of memory, need to use NdArray as inputs?

        # Check outputs/inputs
        outputs = self._force_list(outputs)
        if not all([isinstance(o, nn.Variable) for o in outputs]):
            raise ValueError("Element of outputs must be `nnabla.Variable`.")
        inputs = self._force_list(inputs)
        if not all([isinstance(i, nn.Variable) for i in inputs]):
            raise ValueError("Element of inputs must be `nnabla.Variable`.")

        # Check grad_outputs
        if grad_outputs is None:
            grad_outputs = [None] * len(outputs)
        elif isinstance(grad_outputs, (int, float, np.ndarray, nn.NdArray)):
            grad_outputs = self._force_list(grad_outputs)
        elif isinstance(grad_outputs, list):
            if len(outputs) != len(grad_outputs):
                raise ValueError(
                    "Length of `grad_outputs` and length of `outputs` must be the same."
                )
            for i in range(len(outputs)):
                o = outputs[i]
                go = grad_outputs[i]
                if not isinstance(go, (type(None), int, float, np.ndarray,
                                       nn.NdArray, nn.Variable)):
                    raise ValueError(
                        "Element of `grad_outputs` must be "
                        "in (`None`, `int`, `float`, `numpy.ndarray`, "
                        "`nnabla.NdArray`, `nnabla.Variable`) or "
                        "list of (`None`, `int`, `float`, `numpy.ndarray`, "
                        "`nnabla.NdArray`, `nnabla.Variable`)\n"
                        "type(grad_outputs[{}] = {}".format(i, type(go)))
                elif isinstance(go, (np.ndarray, nn.NdArray,
                                     nn.Variable)) and go.shape != o.shape:
                    raise ValueError(
                        "Shape of each of outputs and grad_outputs must be same.\n"
                        "output[{}]({}) != grad_output[{}]({})".format(
                            i, o.shape, i, go.shape))

        # Check persistent_outputs
        if len(persistent_outputs) != 0 and len(outputs) != len(
                persistent_outputs):
            raise ValueError(
                "Length of outputs and persistent_outputs "
                "must be the same except for "
                "the case that the length of the persistent_outputs is 0.")

        # Persistent outputs since outputs are basically losses to be monitored
        persistent_outputs = [True] * len(
            outputs) if persistent_outputs == [] else persistent_outputs
        for o, p in zip(outputs, persistent_outputs):
            o.persistent = p

        # Open list of next search candidate
        ids = {}

        def get_id(func):
            if func not in ids.keys():
                size = len(ids)
                ids[func] = size
                return size
            return ids[func]

        open = set()

        # Map for grad_variables consumed on the gradient graph.
        # End is the special case where d_o = end_f(o) and map[end_f] = {o: [d_o]}
        grad_vars = OrderedDict()  # {F_fwd: {VO_fwd: [VI_bwd]}}

        # Set grad_outputs
        for i in range(len(outputs)):
            o = outputs[i]
            go = grad_outputs[i]
            if go is None:
                output = o
            elif isinstance(go, (int, float)):
                go = nn.Variable(o.shape).apply(d=go, need_grad=False)
                output = o * go
            elif isinstance(go, np.ndarray):
                go = nn.Variable(o.shape).apply(d=go, need_grad=False)
                output = o * go
            elif isinstance(go, nn.NdArray):
                go = nn.Variable(o.shape).apply(data=go, need_grad=False)
                output = o * go
            elif isinstance(go, nn.Variable):
                output = o * go
            func = output.parent
            open.add((-output.rank, get_id(func), func))

            # Connect the graph and its gradient graph
            grad_output = GradEndFunction()(output).apply(need_grad=False)
            grad_vars[func] = OrderedDict({output: [grad_output]})

        # Return grads but
        # replace inputs params with the vars connected with the given graph
        wrt_inputs = self._get_corresponding_vars_on_graph(inputs, outputs)
        grads = [None] * len(wrt_inputs)
        child_nodes = self._get_children(wrt_inputs)
        wrt_inputs = [nn.Variable() if x is None else x for x in wrt_inputs]

        # Expand the graph to its gradient graph
        while len(open) != 0:
            open = sorted(open)  # python set is NOT sorted set.
            rank_func = open.pop(0)  # 0 is necessary
            open = set(open)
            f = rank_func[2]

            if not f.need_grad:
                continue
            # Connect variables on the gradient graph
            grad_outputs = self._connect_on_gradient_graph(grad_vars, f)

            # Check grads w.r.t. inputs
            for inp, grad_out in zip(f.inputs, grad_outputs):
                if inp not in wrt_inputs or inp.need_grad == False or grad_out is None:
                    continue
                idx = wrt_inputs.index(inp)
                if grads[idx] is None:
                    grads[idx] = grad_out
                else:
                    grads[idx] = grads[idx] + grad_out  # accum at leaf
                if bind_grad_output:
                    inp.grad = grads[idx].data

            # Propagate down
            for inp, grad_out in zip(f.inputs, grad_outputs):
                if inp not in child_nodes or not inp.need_grad or grad_out is None:
                    continue
                p_i = inp.parent
                if not p_i:
                    continue
                open.add((-p_i.rank, get_id(p_i), p_i))

        # If the final grads has None, then None becomes zero Variable(s).
        for i in range(len(grads)):
            if grads[i]:
                continue
            grads[i] = F.constant(0, wrt_inputs[i].shape)
        return grads
Beispiel #13
0
def capsule_layer(u, num_j=10, out_channels=16, num_routing_iter=3, grad_dynamic_routing=False, fix_parameters=False):
    '''
    Takes PrimaryCapules output and produces DigitsCapsules.

    Args:
        u (nnabla.Variable): A shape of [B, in_capsules, in_channels]
        num_j (int): Number of output capsules.
        out_channels (int): Number of units in each capsule of the output.
        num_routing_iter (int): Dynamic routing iterations.
        grad_dynamic_routing (bool): If False, it doesn't compute gradients of
            dynamic routing coefficients as if they are given as
            hyperparameters.
        fix_parameters (bool): Fix parameters (Set need_grad=False).

    Returns:
        nn.Variable: A shape [B, num_j, out_channels].

    '''
    assert num_routing_iter > 0
    batch_size = u.shape[0]
    num_i = u.shape[1]  # 32 * 6 * 6
    in_channels = u.shape[2]

    # Routing u_hat = W u in eq 2.
    # Implementing with broadcast and batch_matmul. Maybe not efficient.

    # Create a parameter tensor
    # Note: Consider num input channels multiplied by num input capsules
    from nnabla.initializer import UniformInitializer, calc_uniform_lim_glorot
    from nnabla.parameter import get_parameter_or_create
    w_init = UniformInitializer(
        calc_uniform_lim_glorot(num_i * in_channels, out_channels))
    w_ij = get_parameter_or_create(
        "W", (1, num_j, num_i, in_channels, out_channels), w_init, not fix_parameters)
    # Tileing w_ij to [batch_size, num_j, num_i, in_channels, out_channels].
    w_ij_tiled = F.broadcast(w_ij, (batch_size,) + w_ij.shape[1:])
    # Tileing u to [batch_size, num_j, num_i, 1, in_channels].
    u = u.reshape((batch_size, 1, num_i, 1, in_channels))
    u_tiled = F.broadcast(u, (batch_size, num_j, num_i, 1, in_channels))
    # Apply batched matrix multiplication:
    # [1, in_channels] * [in_channels, out_channels] --> [1, out_channels]
    # u_hat shape: [batch_size, num_j, num_i, out_channels]
    u_hat = F.batch_matmul(u_tiled, w_ij_tiled).reshape(
        (batch_size, num_j, num_i, out_channels))

    # Dynamic Routing iteration doesn't compute gradients.
    # u_hat only used at the final step of computation of s.
    u_hat_no_grad = u_hat
    if not grad_dynamic_routing:
        u_hat_no_grad = F.identity(u_hat)
        u_hat_no_grad.need_grad = False

    # Dynamic routing described in Procedure 1.
    b = F.constant(0, (batch_size, num_j, num_i, 1))
    for r in range(num_routing_iter):
        # u_hat is only used in the last step.
        uh = u_hat_no_grad
        if r == num_routing_iter - 1:
            uh = u_hat

        # 4: Softmax in eq 3
        c = F.softmax(b, axis=1)
        # 5: Left of eq 2. s shape: [B, num_j, out_channels]
        s = F.sum(c * uh, axis=2)
        # 6: eq 1
        v = squash(s, axis=2)
        if r == num_routing_iter - 1:
            return u_hat, v
        # 7: Update by agreement
        b = b + F.sum(v.reshape((batch_size, num_j, 1, out_channels)) *
                      uh, axis=3, keepdims=True)
Beispiel #14
0
    def call(self, memory, decoder_inputs=None):
        r"""Return mel-spectrograms, gate outputs and an attention matrix.

        Args:
            memory (nn.Variable): A 3D tensor of shape (B, T, C).
            decoder_inputs (nn.Variable, optional): A 3D tensor with shape of (B, T/r, r*n_mels).
                Shifted log melspectrogram of sound files. Defaults to None.

        Returns:
            nn.Variable: The synthetic mel-spectrograms of shape (B, Ty/r, r*n_mels).
            nn.Variable: The gate outputs of shape (B, Ty).
            nn.Variable: The attention matrix of shape (B, Tx, Ty).
        """
        hp = self._hparams
        mel_shape = hp.n_mels * hp.r

        # initialize decoder states
        decoder_input = F.constant(shape=(hp.batch_size, 1, mel_shape))
        decoder_hidden = F.constant(shape=(1, 1, hp.batch_size,
                                           hp.decoder_rnn_dim))
        decoder_cell = F.constant(shape=(1, 1, hp.batch_size,
                                         hp.decoder_rnn_dim))

        # initialize attention states
        attention_weights = F.constant(shape=(hp.batch_size, 1, hp.text_len))
        attention_weights_cum = F.constant(shape=(hp.batch_size, 1,
                                                  hp.text_len))
        attention_context = F.constant(shape=(hp.batch_size, 1,
                                              hp.encoder_embedding_dim))
        attention_hidden = F.constant(shape=(1, 1, hp.batch_size,
                                             hp.attention_rnn_dim))
        attention_cell = F.constant(shape=(1, 1, hp.batch_size,
                                           hp.attention_rnn_dim))

        # store outputs
        mel_outputs, gate_outputs, alignments = [], [], []

        for i in range(hp.mel_len):
            if i > 0:
                decoder_input = (mel_outputs[-1] if decoder_inputs is None else
                                 decoder_inputs[:, i - 1:i, :])
                if decoder_inputs is None:
                    decoder_input = decoder_input[None, ...]
            # decoder of shape (B, 1, prenet_channels=256)
            decoder_input = prenet(decoder_input,
                                   hp.prenet_channels,
                                   is_training=self.training,
                                   scope='prenet')

            with nn.parameter_scope('attention_rnn'):
                # cell_input of shape (B, 1, prenet_channels[-1] + C=768)
                cell_input = F.concatenate(decoder_input,
                                           attention_context,
                                           axis=2)
                _, attention_hidden, attention_cell = PF.lstm(
                    F.transpose(cell_input, (1, 0, 2)),
                    attention_hidden,
                    attention_cell,
                    training=self.training,
                    name='lstm_attention'
                )  # (1, 1, B, attention_hidden), (1, 1, B, attention_hidden)
                if self.training:
                    attention_hidden = F.dropout(attention_hidden,
                                                 hp.p_attention_dropout)

            with nn.parameter_scope('location_attention'):
                attention_weights_cat = F.concatenate(attention_weights,
                                                      attention_weights_cum,
                                                      axis=1)
                attention_context, attention_weights = location_sensitive_attention(
                    F.transpose(attention_hidden[0], (1, 0, 2)),
                    memory,
                    attention_weights_cat,
                    attention_location_kernel_size=hp.
                    attention_location_kernel_size,
                    attention_n_filters=hp.attention_location_n_filters,
                    attention_dim=hp.attention_dim,
                    is_training=self.training,
                    scope='ls_attention')
                attention_weights_cum += attention_weights
                alignments.append(attention_weights)

            with nn.parameter_scope('decoder_rnn'):
                # (1, B, attention_rnn_dim + encoder_embedding_dim)
                inp_decoder = F.concatenate(attention_hidden[0],
                                            F.transpose(
                                                attention_context, (1, 0, 2)),
                                            axis=2)
                _, decoder_hidden, decoder_cell = PF.lstm(
                    inp_decoder,
                    decoder_hidden,
                    decoder_cell,
                    training=self.training,
                    name='lstm_decoder')
                if self.training:
                    decoder_hidden = F.dropout(decoder_hidden,
                                               hp.p_decoder_dropout)

            with nn.parameter_scope('projection'):
                proj_input = F.concatenate(
                    decoder_hidden[0, 0],
                    F.reshape(attention_context, (hp.batch_size, -1),
                              inplace=False),
                    axis=1)  # (B, decoder_rnn_dim + encoder_embedding_dim)
                decoder_output = affine_norm(proj_input,
                                             mel_shape,
                                             base_axis=1,
                                             with_bias=True,
                                             w_init_gain='affine',
                                             scope='affine')
                mel_outputs.append(decoder_output)

            with nn.parameter_scope('gate_prediction'):
                gate_prediction = affine_norm(proj_input,
                                              1,
                                              base_axis=1,
                                              with_bias=True,
                                              w_init_gain='sigmoid',
                                              scope='affine')
                gate_outputs.append(gate_prediction)

        # (B, T2, n_mels*r)
        mel_outputs = F.stack(*mel_outputs, axis=1)
        gate_outputs = F.concatenate(*gate_outputs, axis=1)  # (B, T2)
        alignments = F.concatenate(*alignments, axis=1)  # (B, T1, T2)

        return mel_outputs, gate_outputs, alignments
Beispiel #15
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    lambda_ = args.lambda_

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    one = F.constant(1., log_var.shape)
    loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var)
    reg_sigma = sigma_regularization(ctx, log_var, one)
    loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, 
                                       pred_x_u0, pred_x_u1, log_var0, log_var1)
    reg_sigma0 = sigma_regularization(ctx, log_var0, one)
    reg_sigma1 = sigma_regularization(ctx, log_var1, one)
    reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1)
    loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \
                        + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas
    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch +=1
Beispiel #16
0
def train(batch_size, X_train, max_iter):
    from nnabla.ext_utils import get_extension_context
    context = "cpu"
    ctx = get_extension_context(context, device_id="0", type_config="float")
    nn.set_default_context(ctx)

    z = nn.Variable([batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True
    pred_fake = discriminator(fake)
    labels = func.constant(1, pred_fake.shape)
    loss_gen = func.mean(func.sigmoid_cross_entropy(pred_fake, labels))

    fake_disc = fake.get_unlinked_variable(need_grad=True)
    pred_fake_disc = discriminator(fake_disc)
    disc_fake_label = func.constant(0, pred_fake_disc.shape)
    loss_disc_fake = func.mean(
        func.sigmoid_cross_entropy(pred_fake_disc, disc_fake_label))

    r = nn.Variable([batch_size, 784])
    real_pred = discriminator(r)
    disc_real_label = func.constant(0, real_pred.shape)
    loss_disc_real = func.mean(
        func.sigmoid_cross_entropy(pred_fake_disc, disc_real_label))

    loss_disc = loss_disc_real + loss_disc_fake

    solver_gen = sol.Adam(0.0002, beta1=0.5)
    solver_disc = sol.Adam(0.0002, beta1=0.5)

    for i in range(0, max_iter):
        index = np.random.randint(0, X_train.shape[0], size=batch_size)
        input_image = X_train[index]

        r.d = input_image
        z.d = np.random.randn(*z.shape)

        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(0.0001)
        solver_gen.update()

        solver_disc.zero_grad()
        loss_disc.forward(clear_no_need_grad=True)
        loss_disc.backward(clear_buffer=True)
        solver_disc.weight_decay(0.0001)
        solver_disc.update()

        print(
            "epoch-->[%d]-------loss_generator-->[%f]-------loss_discriminator-->[%f]"
            % (i, loss_gen.d, loss_disc.d))

        if i % 100 == 0:
            with nn.parameter_scope("generator"):
                nn.save_parameters(
                    "/home/vaibhav/deep_learning/gan/code/gen_weights/epoch_%d.h5"
                    % i)
            with nn.parameter_scope("discriminator"):
                nn.save_parameters(
                    "/home/vaibhav/deep_learning/gan/code/disc_weights/epoch_%d.h5"
                    % i)
Beispiel #17
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = args.n_label
    n_train_data = 73257
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = args.epoch
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    lambda_ = args.lambda_

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    one = F.constant(1., log_var.shape)
    loss_ce = ce_loss(ctx, pred, y_l)
    reg_sigma = sigma_regularization(ctx, log_var, one)
    loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, 
                                       pred_x_u0, pred_x_u1, log_var0, log_var1)
    reg_sigma0 = sigma_regularization(ctx, log_var0, one)
    reg_sigma1 = sigma_regularization(ctx, log_var1, one)
    reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1)
    loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \
                        + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas
    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/svhn/train.mat")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/svhn/l_train.mat")
    u_train_path = os.path.join(home, "datasets/svhn/u_train.mat")
    test_path = os.path.join(home, "datasets/svhn/test.mat")

    # data reader
    data_reader = SVHNDataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=False,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if int((i+1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            ve /= iter_val
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve) * 100)            
            print(msg)
            if ve < ve_best:
                if not os.path.exists(args.model_save_path):
                    os.makedirs(args.model_save_path)
                if save_path_prev != "":
                    os.remove(save_path_prev)
                save_path = os.path.join(
                    args.model_save_path, 'params_%06d.h5' % epoch)
                nn.save_parameters(save_path)
                save_path_prev = save_path
                ve_best = ve
            st = time.time()
            epoch +=1
Beispiel #18
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)
    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "generator_param_%06d.h5" % args.max_iter))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % args.max_iter))
Beispiel #19
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    one = F.constant(1., log_var.shape)
    loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var)
    reg_sigma = sigma_regularization(ctx, log_var, one)
    loss_supervised = loss_ce + reg_sigma

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, 
                                       pred_x_u0, pred_x_u1, log_var0, log_var1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    reg_sigma0 = sigma_regularization(ctx, log_var0, one)
    reg_sigma1 = sigma_regularization(ctx, log_var1, one)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1 \
                        + reg_sigma0 + reg_sigma1

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver_l= S.Adam(alpha=learning_rate)
        solver_l.set_parameters(nn.get_parameters())
        solver_u= S.Adam(alpha=learning_rate)
        solver_u.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        ## for supervised loss
        loss_supervised.forward(clear_no_need_grad=True)
        solver_l.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        solver_l.update()
        ## for unsupervised loss
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver_u.zero_grad()
        loss_unsupervised.backward(clear_buffer=True)
        solver_u.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch +=1
Beispiel #20
0
def test_nan_inf_tracer(batch_size, n_class, ext_name, trace_nan, trace_inf):
    nn.clear_parameters()

    ctx = get_extension_context(ext_name)
    nn.set_default_context(ctx)

    x = nn.Variable.from_numpy_array(
        np.random.normal(size=(batch_size, 3, 16, 16)))
    t = nn.Variable.from_numpy_array(
        np.random.randint(low=0, high=n_class, size=(batch_size, 1)))

    y = simple_cnn(x, t, n_class)

    must_be_inf = y / F.constant(0, shape=y.shape)
    must_be_nan = must_be_inf / must_be_inf

    # Refresh all arrays once so as to ensure all grad values are 0.
    must_be_nan.visit(_refresh_inputs_grad)

    nit = NanInfTracer(trace_nan=trace_nan, trace_inf=trace_inf)

    # can be run at any cases without exception.
    with nit.trace():
        y.forward(clear_no_need_grad=True,
                  function_post_hook=nit.forward_post_hook)
        y.backward(clear_buffer=True,
                   function_post_hook=nit.backward_post_hook)

    nit.check()  # this call can also work without exception.

    # check nan
    if trace_nan:
        with pytest.raises(ValueError):
            with nit.trace():
                must_be_nan.forward(clear_buffer=True,
                                    function_post_hook=nit.forward_post_hook)

        with pytest.raises(ValueError):
            with nit.trace():
                must_be_nan.backward(clear_buffer=True,
                                     function_post_hook=nit.backward_post_hook)

        must_be_nan.forward(clear_buffer=True,
                            function_post_hook=nit.forward_post_hook)
        with pytest.raises(ValueError):
            nit.check()

        must_be_nan.backward(clear_buffer=True,
                             function_post_hook=nit.backward_post_hook)

        with pytest.raises(ValueError):
            nit.check()

    # check inf
    if trace_inf:
        with pytest.raises(ValueError):
            with nit.trace():
                must_be_inf.forward(clear_buffer=True,
                                    function_post_hook=nit.forward_post_hook)

        must_be_inf.forward(clear_buffer=True,
                            function_post_hook=nit.forward_post_hook)
        with pytest.raises(ValueError):
            nit.check()