Exemple #1
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        h = F.mean(h, axis=1)
        r = F.mean(F.squared_error(h, one))
    return r
Exemple #2
0
def vat(x, r, eps, predict, distance):
    """
    Function for calculate LDS Loss, e.g. KL(p(y|x)||KL(p(y|x+n)

    Args:
        x(`~nnabla.Variable`): N-D array 
        r(`~nnabla.Variable`): N-D array of randn/grad
        eps(`~nnabla.Variable`): Scaling factor, xi for power iteration, epsilon for loss 
        predict: pointer of feed-forward-net building function
        distance: pointer of distance function e.g. KL(p(y|x)||KL(p(y|x+n)

    Returns:
        ~nnabla.Variable: LDS loss (KL(p(y|x)||KL(p(y|x+n))
    """
    # Calculate log(p(y|x))
    y = predict(x)

    # For stoping the backprop from this path.
    y1 = y.unlinked()

    # Calculate log(p(y|x+n))
    y2 = predict(x + eps * r)

    # Calculate kl(p(y|x)||p(y|x+n))
    loss = distance(y1, y2)
    loss = F.mean(loss)

    # Returns loss and y
    # y is returned for avoiding duplicated calculation
    return loss, y
Exemple #3
0
def test_graph_logreg(seed):
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4], need_grad=True)
    w = nn.Variable([12, 5], need_grad=True)
    b = nn.Variable([5], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    w.d = rng.randn(*w.shape)
    b.d = rng.randn(*b.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    with nn.auto_forward():
        z = F.affine(x, w, b, 1)
        l = F.softmax_cross_entropy(z, t, 1)
        L = F.mean(l)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.g = 0
    w.g = 0
    b.g = 0
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)

    inputs = [x, w, b]

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1e-2)
Exemple #4
0
def ce_loss_with_uncertainty(ctx, pred, y_l, log_var):
    r = F.randn(0., 1., log_var.shape)
    r = F.pow_scalar(F.exp(log_var), 0.5) * r
    h = pred + r
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(h, y_l))
    return loss_ce
Exemple #5
0
def kl_divergence(ctx, pred, label, log_var):
    with nn.context_scope(ctx):
        s = F.pow_scalar(F.exp(log_var), 0.5)
        elms = softmax_with_temperature(ctx, label, s) \
               * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
Exemple #6
0
def sigmas_regularization(ctx, log_var0, log_var1):
    with nn.context_scope(ctx):
        h0 = F.exp(log_var0)
        h0 = F.pow_scalar(h0, 0.5)
        h1 = F.exp(log_var1)
        h1 = F.pow_scalar(h1, 0.5)
        r = F.mean(F.squared_error(h0, h1))
    return r
Exemple #7
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5
    return loss_sr
Exemple #8
0
def test_graph_model(model, seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    nn.clear_parameters()
    if model == "mlp":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
        z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    elif model == "recurrent":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
            z2 = F.relu(z, inplace=True)
        h = z2
        for _ in range(2):
            with nn.parameter_scope('fc2'):
                h = PF.affine(h, 3)
                h = F.relu(h, inplace=True)
        with nn.parameter_scope('fc3'):
            z3 = PF.affine(h, 5)
    elif model == "convolution":
        with nn.parameter_scope('conv1'):
            z = PF.convolution(x, 3, (2, 2))
            z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    else:
        raise ValueError()
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    L.forward(clear_no_need_grad=True)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)
    parameters = nn.get_parameters()
    for param in parameters.values():
        param.grad.zero()
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1.05e-2)
Exemple #9
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1):
    var0 = F.exp(log_var0)
    var1 = F.exp(log_var1)
    s0 = F.pow_scalar(var0, 0.5)
    s1 = F.pow_scalar(var0, 0.5)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss = F.log(s1/s0) + (var0/var1 + squared_error/var1) * 0.5
        loss_sr = F.mean(loss)
    return loss_sr
Exemple #10
0
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, 
                             log_s0, log_s1):
    v0 = F.exp(log_v0)
    v1 = F.exp(log_v1)
    squared_error = F.squared_error(pred0, pred1)
    s0 = F.exp(log_s0)
    s1 = F.exp(log_s1)
    with nn.context_scope(ctx):
        error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + (s0 / s1 + s1 / s0)
        loss_sr = F.mean(error) * 0.5
    return loss_sr
Exemple #11
0
def sr_loss_with_uncertainty_and_coef(ctx, pred0, pred1, log_var0, log_var1):
    c0 = srwu_learned_coef(ctx, log_var0)
    c1 = srwu_learned_coef(ctx, log_var1)
    sc0 = sigmas_learned_coef(ctx, log_var0, log_var1)
    sc1 = sigmas_learned_coef(ctx, log_var1, log_var0)
    c0.need_grad = False
    c1.need_grad = False
    sc0.need_grad = False
    sc1.need_grad = False

    #TODO: squared error/absolute error
    s0 = F.exp(log_var0)
    s1 = F.exp(log_var1)
    squared_error = F.squared_error(pred0, pred1)
    with nn.context_scope(ctx):
        loss_sr = F.mean(
            squared_error * (c0 / s0 + c1 / s1) + (sc0 * s0 / s1 + sc1 * s1 / s0)) * 0.5
    return loss_sr
Exemple #12
0
def test_forward_backward():
    batch_size, m, h, w = 4, 3, 32, 32
    extension_module = "cpu"
    device_id = 0
    ctx = extension_context(extension_module, device_id=device_id)

    x_l_data = np.random.randn(batch_size, m, h, w)
    y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32)
    x_l = nn.Variable(x_l_data.shape)
    y_l = nn.Variable(y_l_data.shape)
    x_l.d = x_l_data
    y_l.d = y_l_data
    pred = cnn_model_003(ctx, x_l)
    with nn.context_scope(ctx):
        loss = F.mean(F.softmax_cross_entropy(pred, y_l))

    loss.forward()
    loss.backward()
Exemple #13
0
def get_model(args, num_classes, test=False, tiny=False):
    """
    Create computation graph and variables.

    Args:

        tiny: Tiny ImageNet mode if True.
    """
    data_size = 320
    nn_in_size = 224
    if tiny:
        data_size = 64
        nn_in_size = 56
    image = nn.Variable([args.batch_size, 3, data_size, data_size])
    label = nn.Variable([args.batch_size, 1])
    pimage = image_preprocess(image, nn_in_size)
    pred, hidden = model_resnet.resnet_imagenet(
        pimage, num_classes, args.num_layers, args.shortcut_type, test=test, tiny=tiny)
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    Model = namedtuple('Model', ['image', 'label', 'pred', 'loss', 'hidden'])
    return Model(image, label, pred, loss, hidden)
Exemple #14
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            L.forward(clear_no_need_grad=cnng)
            L.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                assert np.all(g == g2)
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

x = nn.Variable((batch_size, sentence_length))
t = nn.Variable((batch_size, sentence_length, 1))
h = PF.embed(x, vocab_size, embedding_size)
h = LSTM(h, hidden, return_sequences=True)
h = TimeDistributed(PF.affine)(h, hidden, name='hidden')
y = TimeDistributed(PF.affine)(h, vocab_size, name='output')

mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
entropy = TimeDistributedSoftmaxCrossEntropy(y, t) * mask
count = F.sum(mask, axis=1)
loss = F.mean(F.div2(F.sum(entropy, axis=1), count))

# Create solver.
solver = S.Momentum(1e-2, momentum=0.9)
solver.set_parameters(nn.get_parameters())

# Create monitor.
from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
monitor = Monitor('./tmp-lstmlm')
monitor_perplexity = MonitorSeries('perplexity', monitor, interval=1)
monitor_perplexity_valid = MonitorSeries('perplexity_valid',
                                         monitor,
                                         interval=1)

for epoch in range(max_epoch):
    train_loss_set = []
Exemple #16
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        pred_x_u0 = F.softmax(pred0)
        pred_x_u1 = F.softmax(pred1)
        loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1))
    return loss_sr
Exemple #17
0
def train():
    parser, args = get_args()

    # Get context.
    ctx = get_extension_context(args.context, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    train_source, valid_source, args = data.load_datasources(
        parser, args, rng=RandomState(42))

    train_iter = data_iterator(train_source,
                               args.batch_size,
                               RandomState(args.seed),
                               with_memory_cache=False,
                               with_file_cache=False)

    valid_iter = data_iterator(valid_source,
                               args.batch_size,
                               RandomState(args.seed),
                               with_memory_cache=False,
                               with_file_cache=False)

    scaler_mean, scaler_std = get_statistics(args, train_source)

    max_bin = utils.bandwidth_to_max_bin(train_source.sample_rate, args.nfft,
                                         args.bandwidth)

    unmix = model.OpenUnmix(input_mean=scaler_mean,
                            input_scale=scaler_std,
                            nb_channels=args.nb_channels,
                            hidden_size=args.hidden_size,
                            n_fft=args.nfft,
                            n_hop=args.nhop,
                            max_bin=max_bin,
                            sample_rate=train_source.sample_rate)

    # Create input variables.
    audio_shape = [args.batch_size] + list(train_source._get_data(0)[0].shape)
    mixture_audio = nn.Variable(audio_shape)
    target_audio = nn.Variable(audio_shape)

    vmixture_audio = nn.Variable(audio_shape)
    vtarget_audio = nn.Variable(audio_shape)

    # create train graph
    pred_spec = unmix(mixture_audio, test=False)
    pred_spec.persistent = True

    target_spec = model.Spectrogram(*model.STFT(target_audio,
                                                n_fft=unmix.n_fft,
                                                n_hop=unmix.n_hop),
                                    mono=(unmix.nb_channels == 1))

    loss = F.mean(F.squared_error(pred_spec, target_spec), axis=1)

    # Create Solver.
    solver = S.Adam(args.lr)
    solver.set_parameters(nn.get_parameters())

    # Training loop.
    t = tqdm.trange(1, args.epochs + 1, disable=args.quiet)
    es = utils.EarlyStopping(patience=args.patience)

    for epoch in t:
        # TRAINING
        t.set_description("Training Epoch")
        b = tqdm.trange(0,
                        train_source._size // args.batch_size,
                        disable=args.quiet)
        losses = utils.AverageMeter()
        for batch in b:
            mixture_audio.d, target_audio.d = train_iter.next()
            b.set_description("Training Batch")
            solver.zero_grad()
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.weight_decay(args.weight_decay)
            solver.update()
            losses.update(loss.d.copy().mean())
            b.set_postfix(train_loss=losses.avg)

        # VALIDATION
        vlosses = utils.AverageMeter()
        for batch in range(valid_source._size):
            # Create new validation input variables for every batch
            vmixture_audio.d, vtarget_audio.d = valid_iter.next()
            # create validation graph
            vpred_spec = unmix(vmixture_audio, test=True)
            vpred_spec.persistent = True

            vtarget_spec = model.Spectrogram(*model.STFT(vtarget_audio,
                                                         n_fft=unmix.n_fft,
                                                         n_hop=unmix.n_hop),
                                             mono=(unmix.nb_channels == 1))
            vloss = F.mean(F.squared_error(vpred_spec, vtarget_spec), axis=1)

            vloss.forward(clear_buffer=True)
            vlosses.update(vloss.d.copy().mean())

        t.set_postfix(train_loss=losses.avg, val_loss=vlosses.avg)

        stop = es.step(vlosses.avg)
        is_best = vlosses.avg == es.best

        # save current model
        nn.save_parameters(
            os.path.join(args.output, 'checkpoint_%s.h5' % args.target))

        if is_best:
            best_epoch = epoch
            nn.save_parameters(os.path.join(args.output,
                                            '%s.h5' % args.target))

        if stop:
            print("Apply Early Stopping")
            break
Exemple #18
0
def augment(batch, aug_list, p_aug=1.0):

    if isinstance(p_aug, float):
        p_aug = nn.Variable.from_numpy_array(p_aug * np.ones((1,)))

    if "flip" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        batch_aug = F.random_flip(batch, axes=(2, 3))
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "lrflip" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        batch_aug = F.random_flip(batch, axes=(3,))
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "translation" in aug_list and batch.shape[2] >= 8:
        rnd = F.rand(shape=[batch.shape[0], ])
        # Currently nnabla does not support random_shift with border_mode="noise"
        mask = np.ones((1, 3, batch.shape[2], batch.shape[3]))
        mask[:, :, :, 0] = 0
        mask[:, :, :, -1] = 0
        mask[:, :, 0, :] = 0
        mask[:, :, -1, :] = 0
        batch_int = F.concatenate(
            batch, nn.Variable().from_numpy_array(mask), axis=0)
        batch_int_aug = F.random_shift(batch_int, shifts=(
            batch.shape[2]//8, batch.shape[3]//8), border_mode="nearest")
        batch_aug = F.slice(batch_int_aug, start=(
            0, 0, 0, 0), stop=batch.shape)
        mask_var = F.slice(batch_int_aug, start=(
            batch.shape[0], 0, 0, 0), stop=batch_int_aug.shape)
        batch_aug = batch_aug * F.broadcast(mask_var, batch_aug.shape)
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "color" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        rnd_contrast = 1.0 + 0.5 * \
            (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1]
                          ) - 1.0)  # from 0.5 to 1.5
        rnd_brightness = 0.5 * \
            (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1]
                          ) - 1.0)  # from -0.5 to 0.5
        rnd_saturation = 2.0 * \
            F.rand(shape=[batch.shape[0], 1, 1, 1])  # from 0.0 to 2.0
        # Brightness
        batch_aug = batch + rnd_brightness
        # Saturation
        mean_s = F.mean(batch_aug, axis=1, keepdims=True)
        batch_aug = rnd_saturation * (batch_aug - mean_s) + mean_s
        # Contrast
        mean_c = F.mean(batch_aug, axis=(1, 2, 3), keepdims=True)
        batch_aug = rnd_contrast * (batch_aug - mean_c) + mean_c
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "cutout" in aug_list and batch.shape[2] >= 16:
        batch = F.random_erase(batch, prob=p_aug.d[0], replacements=(0.0, 0.0))

    return batch
Exemple #19
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        pred_x_u0 = F.softmax(pred0)
        pred_x_u1 = F.softmax(pred1)
        loss_sr = F.mean(F.squared_error(pred_x_u0, pred_x_u1))
    return loss_sr
Exemple #20
0
def recon_loss(ctx, pred, x_l):
    with nn.context_scope(ctx):
        loss_recon = F.mean(F.squared_error(pred, x_l))
    return loss_recon
Exemple #21
0
def ce_loss(ctx, pred, y_l):
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l))
    return loss_ce
Exemple #22
0
    def _build(self):
        # inference graph
        self.infer_obs_t = nn.Variable((1, ) + self.obs_shape)
        with nn.parameter_scope('trainable'):
            infer_dist = policy_network(self.infer_obs_t, self.action_size,
                                        'actor')
        self.infer_act_t, _ = _squash_action(infer_dist)
        self.deterministic_act_t = infer_dist.mean()

        # training graph
        self.obss_t = nn.Variable((self.batch_size, ) + self.obs_shape)
        self.acts_t = nn.Variable((self.batch_size, self.action_size))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size, ) + self.obs_shape)
        self.ters_tp1 = nn.Variable((self.batch_size, 1))

        with nn.parameter_scope('trainable'):
            dist = policy_network(self.obss_t, self.action_size, 'actor')
            squashed_act_t, log_prob_t = _squash_action(dist)
            v_t = v_network(self.obss_t, 'value')
            q_t1 = q_network(self.obss_t, self.acts_t, 'critic/1')
            q_t2 = q_network(self.obss_t, self.acts_t, 'critic/2')
            q_t1_with_actor = q_network(self.obss_t, squashed_act_t,
                                        'critic/1')
            q_t2_with_actor = q_network(self.obss_t, squashed_act_t,
                                        'critic/2')

        with nn.parameter_scope('target'):
            v_tp1 = v_network(self.obss_tp1, 'value')

        # value loss
        q_t = F.minimum2(q_t1_with_actor, q_t2_with_actor)
        v_target = q_t - log_prob_t
        v_target.need_grad = False
        self.value_loss = 0.5 * F.mean(F.squared_error(v_t, v_target))

        # q function loss
        scaled_rews_tp1 = self.rews_tp1 * self.reward_scale
        q_target = scaled_rews_tp1 + self.gamma * v_tp1 * (1.0 - self.ters_tp1)
        q_target.need_grad = False
        q1_loss = 0.5 * F.mean(F.squared_error(q_t1, q_target))
        q2_loss = 0.5 * F.mean(F.squared_error(q_t2, q_target))
        self.critic_loss = q1_loss + q2_loss

        # policy function loss
        mean_loss = 0.5 * F.mean(dist.mean()**2)
        logstd_loss = 0.5 * F.mean(F.log(dist.stddev())**2)
        policy_reg_loss = self.policy_reg * (mean_loss + logstd_loss)
        self.objective_loss = F.mean(log_prob_t - q_t)
        self.actor_loss = self.objective_loss + policy_reg_loss

        # trainable parameters
        with nn.parameter_scope('trainable'):
            with nn.parameter_scope('value'):
                value_params = nn.get_parameters()
            with nn.parameter_scope('critic'):
                critic_params = nn.get_parameters()
            with nn.parameter_scope('actor'):
                actor_params = nn.get_parameters()
        # target parameters
        with nn.parameter_scope('target/value'):
            target_params = nn.get_parameters()

        # target update
        update_targets = []
        sync_targets = []
        for key, src in value_params.items():
            dst = target_params[key]
            updated_dst = (1.0 - self.tau) * dst + self.tau * src
            update_targets.append(F.assign(dst, updated_dst))
            sync_targets.append(F.assign(dst, src))
        self.update_target_expr = F.sink(*update_targets)
        self.sync_target_expr = F.sink(*sync_targets)

        # setup solvers
        self.value_solver = S.Adam(self.value_lr)
        self.value_solver.set_parameters(value_params)
        self.critic_solver = S.Adam(self.critic_lr)
        self.critic_solver.set_parameters(critic_params)
        self.actor_solver = S.Adam(self.actor_lr)
        self.actor_solver.set_parameters(actor_params)
Exemple #23
0
def recon_loss(ctx, pred, x_l):
    with nn.context_scope(ctx):
        loss_recon = F.mean(F.squared_error(pred, x_l))
    return loss_recon
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    from numpy.random import seed
    seed(0)

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == 'lenet':
        mnist_cnn_prediction = mnist_lenet_prediction
    elif args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction
    else:
        raise ValueError("Unknown network type {}".format(args.net))

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    from numpy.random import RandomState
    data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223))
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                vpred.data.cast(np.float32, ctx)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        loss.data.cast(np.float32, ctx)
        pred.data.cast(np.float32, ctx)
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path,
        '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)

    # append F.Softmax to the prediction graph so users see intuitive outputs
    runtime_contents = {
        'networks': [{
            'name': 'Validation',
            'batch_size': args.batch_size,
            'outputs': {
                'y': F.softmax(vpred)
            },
            'names': {
                'x': vimage
            }
        }],
        'executors': [{
            'name': 'Runtime',
            'network': 'Validation',
            'data': ['x'],
            'output': ['y']
        }]
    }
    save.save(
        os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)),
        runtime_contents)
Exemple #25
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create prediction graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create prediction graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    start_point = 0
    if args.checkpoint is not None:
        # load weights and solver state info from specified checkpoint file.
        start_point = load_checkpoint(args.checkpoint, solver)

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)

    # Training loop.
    for i in range(start_point, args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            # save checkpoint file
            save_checkpoint(args.model_save_path, i, solver)
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
def classification_svd():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction_slim

    # TRAIN
    reference = "reference"
    slim = "slim"
    rrate = 0.5  # reduction rate
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create `reference` and "slim" prediction graph.
    model_load_path = args.model_load_path
    pred = mnist_cnn_prediction(image, scope=slim, rrate=rrate, test=False)
    pred.persistent = True

    # Decompose and set parameters
    decompose_network_and_set_params(model_load_path, reference, slim, rrate)
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create reference predition graph.
    vpred = mnist_cnn_prediction(vimage, scope=slim, rrate=rrate, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    with nn.parameter_scope(slim):
        solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    best_ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemple #27
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        r = F.mean(F.abs(h - one))
    return r
Exemple #28
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.abs(pred0 - pred1))
    return loss_sr
Exemple #29
0
def ce_loss(ctx, pred, y_l):
    with nn.context_scope(ctx):
        loss_ce = F.mean(F.softmax_cross_entropy(pred, y_l))
    return loss_ce
Exemple #30
0
def train(args, train_dataset, tokenizer):
    """ Train the model """
    # Load the pretrianed model
    nn.load_parameters(args.pretrained_model)
    # Drop final layer for task-specific fine-tuning
    nn.parameter.pop_parameter('affine_seq_class/affine/W')
    nn.parameter.pop_parameter('affine_seq_class/affine/b')

    train_dataloader = data_iterator(
        train_dataset, batch_size=args.train_batch_size)

    global_step = 0
    train_loss = 0.0
    model = BertForSequenceClassification()

    input_ids = nn.Variable((args.train_batch_size, args.max_seq_length))
    attention_mask = nn.Variable((args.train_batch_size, args.max_seq_length))
    token_type_ids = nn.Variable((args.train_batch_size, args.max_seq_length))
    labels = nn.Variable((args.train_batch_size, ))

    input_ids_eval = nn.Variable((args.eval_batch_size, args.max_seq_length))
    attention_mask_eval = nn.Variable(
        (args.eval_batch_size, args.max_seq_length))
    token_type_ids_eval = nn.Variable(
        (args.eval_batch_size, args.max_seq_length))
    labels_eval = nn.Variable((args.eval_batch_size, ))

    activation = F.gelu
    if args.activation == 'relu':
        activation = F.relu
    loss, _, train_error = model(args, input_ids=input_ids, attention_mask=attention_mask,
                                 token_type_ids=token_type_ids, labels=labels,
                                 num_labels=args.num_labels, vocab_size=args.vocab_size,
                                 num_embed_dim=args.num_embed_dim,
                                 num_pos_ids=args.num_position_ids,
                                 num_attention_layers=args.num_attention_layers,
                                 num_attention_embed_dim=args.num_attention_embed_dim,
                                 num_attention_heads=args.num_attention_heads,
                                 num_attention_dim_feedforward=args.num_attention_dim_feedforward,
                                 attention_activation=activation, pool_outmap=args.num_pool_outmap,
                                 embed_dropout_prob=args.embed_dropout,
                                 attention_dropout_prob=args.attention_dropout,
                                 dropout_prob=args.last_dropout, test=False)

    loss.persistent = True
    if args.solver == 'Adam':
        solver = S.Adam(args.learning_rate, eps=args.adam_epsilon)
    else:
        solver = S.AdamW(args.learning_rate, eps=args.adam_epsilon)
    solver.set_parameters(nn.get_parameters())

    monitor = Monitor(args.output_dir)
    monitor_loss = MonitorSeries(
        "Training Loss", monitor, interval=10)
    monitor_eloss = MonitorSeries(
        "Evaluation Loss", monitor, interval=10)
    monitor_train_error = MonitorSeries(
        "Training Error Rate", monitor, interval=10)
    monitor_lr = MonitorSeries(
        "learning Rate", monitor, interval=10)

    total_steps = train_dataloader.size // args.train_batch_size
    var_linear = total_steps * args.num_train_epochs
    var_warmup = total_steps * (args.num_train_epochs - 1)
    for epoch in range(args.num_train_epochs):
        logger.info("Starting Epoch %d out of %d",
                    epoch+1, args.num_train_epochs)
        for it in range(total_steps):
            batch = train_dataloader.next()
            input_ids.d = batch[0]
            attention_mask.d = batch[1]
            token_type_ids.d = batch[2]
            labels.d = batch[3]

            learning_rate_linear = lr_linear(global_step, var_linear)
            learning_rate = args.learning_rate * learning_rate_linear

            if epoch == 0:
                learning_rate = args.learning_rate * (global_step/total_steps)
            if epoch > 0:
                learning_rate_linear = lr_linear(
                    (global_step-total_steps), var_warmup)
                learning_rate = args.learning_rate * learning_rate_linear

            solver.zero_grad()
            nn.forward_all([loss, train_error], clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.weight_decay(args.weight_decay)
            solver.clip_grad_by_norm(args.max_grad_norm)
            solver.set_learning_rate(learning_rate)
            solver.update()

            monitor_loss.add(
                (train_dataloader.size//args.train_batch_size)*epoch+it,
                loss.d.copy())
            monitor_train_error.add(
                (train_dataloader.size//args.train_batch_size)*epoch+it,
                train_error.d.copy())
            monitor_lr.add(global_step, learning_rate)
            global_step += 1
            train_loss += F.mean(loss.data)

        eval_task_names = (
            "mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,)
        eval_outputs_dirs = (args.output_dir, args.output_dir +
                             '-MM') if args.task_name == "mnli" else (args.output_dir,)

        results = {}
        for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
            print(eval_task)
            eval_dataset = BERTDataSource(
                args, tokenizer, evaluate=True, shuffle=False)
            if not os.path.exists(eval_output_dir):
                os.makedirs(eval_output_dir)

            eval_dataloader = data_iterator(
                eval_dataset, batch_size=args.eval_batch_size)
            total_eval_steps = eval_dataloader.size // args.eval_batch_size
            eval_loss = 0.0
            nb_eval_steps = 0
            preds = None
            out_label_ids = None
            tmp_eval_loss, logits, eval_error = model(args, input_ids=input_ids_eval,
                                                      attention_mask=attention_mask_eval,
                                                      token_type_ids=token_type_ids_eval, labels=labels_eval,
                                                      num_labels=args.num_labels, vocab_size=args.vocab_size,
                                                      num_embed_dim=args.num_embed_dim,
                                                      num_pos_ids=args.num_position_ids,
                                                      num_attention_layers=args.num_attention_layers,
                                                      num_attention_embed_dim=args.num_attention_embed_dim,
                                                      num_attention_heads=args.num_attention_heads,
                                                      num_attention_dim_feedforward=args.num_attention_dim_feedforward,
                                                      attention_activation=activation, pool_outmap=args.num_pool_outmap,
                                                      embed_dropout_prob=args.embed_dropout,
                                                      attention_dropout_prob=args.attention_dropout,
                                                      dropout_prob=args.last_dropout, test=True)

            tmp_eval_loss.persistent = True
            eval_loss += F.mean(tmp_eval_loss)
            for it in range(total_eval_steps):
                print(it, "  ", total_eval_steps)
                batch_eval = eval_dataloader.next()
                input_ids_eval.d = batch_eval[0]
                attention_mask_eval.d = batch_eval[1]
                token_type_ids_eval.d = batch_eval[2]
                labels_eval.d = batch_eval[3]
                nb_eval_steps += 1
                eval_loss.forward()
                monitor_eloss.add(it, eval_loss.d.copy())

                if preds is None:
                    preds = logits.d.copy()
                    out_label_ids = labels_eval.d.copy()
                else:
                    preds = np.append(preds, logits.d.copy(), axis=0)

                    out_label_ids = np.append(
                        out_label_ids, labels_eval.d.copy(), axis=0)
            eval_loss = eval_loss.d / nb_eval_steps
            if args.output_mode == "classification":
                preds = np.argmax(preds, axis=1)
            elif args.output_mode == "regression":
                preds = np.squeeze(preds)

            result = compute_metrics(eval_task, preds, out_label_ids)
            results.update(result)

            output_eval_file = os.path.join(
                eval_output_dir, "", "eval_results.txt")
            with open(output_eval_file, "a") as writer:
                logger.info("***** Evaluation results {} *****".format(""))
                for key in sorted(result.keys()):
                    logger.info("%d  %s = %s\n", epoch +
                                1, key, str(result[key]))
                    writer.write("%d %s = %s\n" %
                                 (epoch+1, key, str(result[key])))
                print("results", results)
    return results
Exemple #31
0
    def _build(self):
        # inference graph
        self.infer_obs_t = nn.Variable((1, ) + self.obs_shape)
        with nn.parameter_scope('trainable'):
            infer_dist = policy_network(self.infer_obs_t, self.action_size,
                                        'actor')
        self.infer_act_t, _ = _squash_action(infer_dist)
        self.deterministic_act_t = infer_dist.mean()

        # training graph
        self.obss_t = nn.Variable((self.batch_size, ) + self.obs_shape)
        self.acts_t = nn.Variable((self.batch_size, self.action_size))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size, ) + self.obs_shape)
        self.ters_tp1 = nn.Variable((self.batch_size, 1))

        with nn.parameter_scope('trainable'):
            self.log_temp = get_parameter_or_create('temp', [1, 1],
                                                    ConstantInitializer(0.0))
            dist_t = policy_network(self.obss_t, self.action_size, 'actor')
            dist_tp1 = policy_network(self.obss_tp1, self.action_size, 'actor')
            squashed_act_t, log_prob_t = _squash_action(dist_t)
            squashed_act_tp1, log_prob_tp1 = _squash_action(dist_tp1)
            q1_t = q_network(self.obss_t, self.acts_t, 'critic/1')
            q2_t = q_network(self.obss_t, self.acts_t, 'critic/2')
            q1_t_with_actor = q_network(self.obss_t, squashed_act_t,
                                        'critic/1')
            q2_t_with_actor = q_network(self.obss_t, squashed_act_t,
                                        'critic/2')

        with nn.parameter_scope('target'):
            q1_tp1 = q_network(self.obss_tp1, squashed_act_tp1, 'critic/1')
            q2_tp1 = q_network(self.obss_tp1, squashed_act_tp1, 'critic/2')

        # q function loss
        q_tp1 = F.minimum2(q1_tp1, q2_tp1)
        entropy_tp1 = F.exp(self.log_temp) * log_prob_tp1
        mask = (1.0 - self.ters_tp1)
        q_target = self.rews_tp1 + self.gamma * (q_tp1 - entropy_tp1) * mask
        q_target.need_grad = False
        q1_loss = 0.5 * F.mean(F.squared_error(q1_t, q_target))
        q2_loss = 0.5 * F.mean(F.squared_error(q2_t, q_target))
        self.critic_loss = q1_loss + q2_loss

        # policy function loss
        q_t = F.minimum2(q1_t_with_actor, q2_t_with_actor)
        entropy_t = F.exp(self.log_temp) * log_prob_t
        self.actor_loss = F.mean(entropy_t - q_t)

        # temperature loss
        temp_target = log_prob_t - self.action_size
        temp_target.need_grad = False
        self.temp_loss = -F.mean(F.exp(self.log_temp) * temp_target)

        # trainable parameters
        with nn.parameter_scope('trainable'):
            with nn.parameter_scope('critic'):
                critic_params = nn.get_parameters()
            with nn.parameter_scope('actor'):
                actor_params = nn.get_parameters()
        # target parameters
        with nn.parameter_scope('target/critic'):
            target_params = nn.get_parameters()

        # target update
        update_targets = []
        sync_targets = []
        for key, src in critic_params.items():
            dst = target_params[key]
            updated_dst = (1.0 - self.tau) * dst + self.tau * src
            update_targets.append(F.assign(dst, updated_dst))
            sync_targets.append(F.assign(dst, src))
        self.update_target_expr = F.sink(*update_targets)
        self.sync_target_expr = F.sink(*sync_targets)

        # setup solvers
        self.critic_solver = S.Adam(self.critic_lr)
        self.critic_solver.set_parameters(critic_params)
        self.actor_solver = S.Adam(self.actor_lr)
        self.actor_solver.set_parameters(actor_params)
        self.temp_solver = S.Adam(self.temp_lr)
        self.temp_solver.set_parameters({'temp': self.log_temp})
Exemple #32
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(pred0, pred1))
    return loss_sr
def cifar10_resnet32_loss(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemple #34
0
def train():
    """
    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Instantiate a communicator and set parameter variables.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * AllReduce for gradients
      * Solver updates parameters by using gradients computed by backprop and all reduce.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    n_valid_samples = 10000
    bs_valid = args.batch_size

    # Create Communicator and Context
    extension_module = "cudnn"
    ctx = get_extension_context(extension_module, type_config=args.type_config)
    comm = C.MultiProcessDataParalellCommunicator(ctx)
    comm.init()
    n_devices = comm.size
    mpi_rank = comm.rank
    mpi_local_rank = comm.local_rank
    device_id = mpi_local_rank
    ctx.device_id = str(device_id)
    nn.set_default_context(ctx)

    # Model
    rng = np.random.RandomState(313)
    comm_syncbn = comm if args.sync_bn else None
    if args.net == "cifar10_resnet23":
        prediction = functools.partial(resnet23_prediction,
                                       rng=rng,
                                       ncls=10,
                                       nmaps=32,
                                       act=F.relu,
                                       comm=comm_syncbn)
        data_iterator = data_iterator_cifar10
    if args.net == "cifar100_resnet23":
        prediction = functools.partial(resnet23_prediction,
                                       rng=rng,
                                       ncls=100,
                                       nmaps=384,
                                       act=F.elu,
                                       comm=comm_syncbn)
        data_iterator = data_iterator_cifar100

    # Create training graphs
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = prediction(image_train, test=False)
    pred_train.persistent = True
    loss_train = (loss_function(pred_train, label_train) /
                  n_devices).apply(persistent=True)
    error_train = F.mean(F.top_n_error(pred_train, label_train,
                                       axis=1)).apply(persistent=True)
    loss_error_train = F.sink(loss_train, error_train)
    input_image_train = {"image": image_train, "label": label_train}

    # Create validation graph
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    label_valid = nn.Variable((args.batch_size, 1))
    pred_valid = prediction(image_valid, test=True)
    error_valid = F.mean(F.top_n_error(pred_valid, label_valid, axis=1))
    input_image_valid = {"image": image_valid, "label": label_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    base_lr = args.learning_rate
    warmup_iter = int(
        1. * n_train_samples / args.batch_size / n_devices) * args.warmup_epoch
    warmup_slope = base_lr * (n_devices - 1) / warmup_iter
    solver.set_learning_rate(base_lr)

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10)
    monitor_verr = MonitorSeries("Validation error", monitor, interval=1)
    monitor_vtime = MonitorTimeElapsed("Validation time", monitor, interval=1)

    # Data Iterator
    rng = np.random.RandomState(device_id)
    _, tdata = data_iterator(args.batch_size, True, rng)
    vsource, vdata = data_iterator(args.batch_size, False)

    # loss_error_train.forward()

    # Training-loop
    ve = nn.Variable()
    for i in range(int(args.max_iter / n_devices)):
        # Validation
        if i % int(n_train_samples / args.batch_size / n_devices) == 0:
            ve_local = 0.
            k = 0
            idx = np.random.permutation(n_valid_samples)
            val_images = vsource.images[idx]
            val_labels = vsource.labels[idx]
            for j in range(int(n_valid_samples / n_devices * mpi_rank),
                           int(n_valid_samples / n_devices * (mpi_rank + 1)),
                           bs_valid):
                image = val_images[j:j + bs_valid]
                label = val_labels[j:j + bs_valid]
                if len(image
                       ) != bs_valid:  # note that smaller batch is ignored
                    continue
                input_image_valid["image"].d = image
                input_image_valid["label"].d = label
                error_valid.forward(clear_buffer=True)
                ve_local += error_valid.d.copy()
                k += 1
            ve_local /= k
            ve.d = ve_local
            comm.all_reduce(ve.data, division=True, inplace=True)

            # Save model
            if device_id == 0:
                monitor_verr.add(i * n_devices, ve.d.copy())
                monitor_vtime.add(i * n_devices)
                if i % int(args.model_save_interval / n_devices) == 0:
                    nn.save_parameters(
                        os.path.join(args.model_save_path,
                                     'params_%06d.h5' % i))

        # Forward/Zerograd
        image, label = tdata.next()
        input_image_train["image"].d = image
        input_image_train["label"].d = label
        loss_error_train.forward(clear_no_need_grad=True)
        solver.zero_grad()

        # Backward/AllReduce
        backward_and_all_reduce(
            loss_error_train,
            comm,
            with_all_reduce_callback=args.with_all_reduce_callback)

        # Solvers update
        solver.update()

        # Linear Warmup
        if i <= warmup_iter:
            lr = base_lr + warmup_slope * i
            solver.set_learning_rate(lr)

        if device_id == 0:  # loss and error locally, and elapsed time
            monitor_loss.add(i * n_devices, loss_train.d.copy())
            monitor_err.add(i * n_devices, error_train.d.copy())
            monitor_time.add(i * n_devices)

        # exit(0)

    if device_id == 0:
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         'params_%06d.h5' % (args.max_iter / n_devices)))
Exemple #35
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create predition graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create predition graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    nnp_file = os.path.join(
        args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batch_size,
             'outputs': {'y': vpred},
             'names': {'x0': vimage0, 'x1': vimage1}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x0', 'x1'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [
                      vimage0, vimage1], vpred, nnp_file)
def ce_soft(pred, label):
    elms = - F.softmax(label, axis=1) * F.log(F.softmax(pred, axis=1))
    loss = F.mean(F.sum(elms, axis=1))
    return loss
Exemple #37
0
def loss_function(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
def train():
    args = get_args()

    # Get context.
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(
        args.context, device_id=args.device_id, type_config=args.type_config)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    if args.net == "cifar10_resnet23_prediction":
        model_prediction = cifar10_resnet23_prediction

    # TRAIN
    maps = 64
    data_iterator = data_iterator_cifar10
    c = 3
    h = w = 32
    n_train = 50000
    n_valid = 10000

    # Create input variables.
    image = nn.Variable([args.batch_size, c, h, w])
    label = nn.Variable([args.batch_size, 1])
    # Create model_prediction graph.
    pred = model_prediction(image, maps=maps, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # SSL Regularization
    loss += ssl_regularization(nn.get_parameters(),
                               args.filter_decay, args.channel_decay)

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, c, h, w])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    vpred = model_prediction(vimage, maps=maps, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=1)

    # Initialize DataIterator
    data = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(int(n_valid / args.batch_size)):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= int(n_valid / args.batch_size)
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(int(n_valid / args.batch_size)):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    ve /= int(n_valid / args.batch_size)
    monitor_verr.add(i, ve)

    parameter_file = os.path.join(
        args.model_save_path, 'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemple #39
0
def vae(x, shape_z, test=False):
    """
    Function for calculate Elbo(evidence lowerbound) loss.
    This sample is a Bernoulli generator version.

    Args:
        x(`~nnabla.Variable`): N-D array
        shape_z(tuple of int): size of z
        test : True=train, False=test

    Returns:
        ~nnabla.Variable: Elbo loss

    """

    #############################################
    # Encoder of 2 fully connected layers       #
    #############################################

    # Normalize input
    xa = x / 256.
    batch_size = x.shape[0]

    # 2 fully connected layers, and Elu replaced from original Softplus.
    h = F.elu(PF.affine(xa, (500, ), name='fc1'))
    h = F.elu(PF.affine(h, (500, ), name='fc2'))

    # The outputs are the parameters of Gauss probability density.
    mu = PF.affine(h, shape_z, name='fc_mu')
    logvar = PF.affine(h, shape_z, name='fc_logvar')
    sigma = F.exp(0.5 * logvar)

    # The prior variable and the reparameterization trick
    if not test:
        # training with reparameterization trick
        epsilon = F.randn(mu=0, sigma=1, shape=(batch_size, ) + shape_z)
        z = mu + sigma * epsilon
    else:
        # test without randomness
        z = mu

    #############################################
    # Decoder of 2 fully connected layers       #
    #############################################

    # 2 fully connected layers, and Elu replaced from original Softplus.
    h = F.elu(PF.affine(z, (500, ), name='fc3'))
    h = F.elu(PF.affine(h, (500, ), name='fc4'))

    # The outputs are the parameters of Bernoulli probabilities for each pixel.
    prob = PF.affine(h, (1, 28, 28), name='fc5')

    #############################################
    # Elbo components and loss objective        #
    #############################################

    # Binarized input
    xb = F.greater_equal_scalar(xa, 0.5)

    # E_q(z|x)[log(q(z|x))]
    # without some constant terms that will canceled after summation of loss
    logqz = 0.5 * F.sum(1.0 + logvar, axis=1)

    # E_q(z|x)[log(p(z))]
    # without some constant terms that will canceled after summation of loss
    logpz = 0.5 * F.sum(mu * mu + sigma * sigma, axis=1)

    # E_q(z|x)[log(p(x|z))]
    logpx = F.sum(F.sigmoid_cross_entropy(prob, xb), axis=(1, 2, 3))

    # Vae loss, the negative evidence lowerbound
    loss = F.mean(logpx + logpz - logqz)

    return loss
Exemple #40
0
def main():

    # Get arguments
    args = get_args()
    data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt"
    model_file = args.work_dir + "model.h5"

    # Load Dataset
    itow, wtoi, dataset = load_ptbset(data_file)

    # Computation environment settings
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create data provider
    n_word = len(wtoi)
    n_dim = args.embed_dim
    batchsize = args.batchsize
    half_window = args.half_window_length
    n_negative = args.n_negative_sample

    di = DataIteratorForEmbeddingLearning(
        batchsize=batchsize,
        half_window=half_window,
        n_negative=n_negative,
        dataset=dataset)

    # Create model
    # - Real batch size including context samples and negative samples
    size = batchsize * (1 + n_negative) * (2 * (half_window - 1))

    # Model for learning
    # - input variables
    xl = nn.Variable((size,))  # variable for word
    yl = nn.Variable((size,))  # variable for context

    # Embed layers for word embedding function
    # - f_embed : word index x to get y, the n_dim vector
    # --  for each sample in a minibatch
    hx = PF.embed(xl, n_word, n_dim, name="e1")  # feature vector for word
    hy = PF.embed(yl, n_word, n_dim, name="e1")  # feature vector for context
    hl = F.sum(hx * hy, axis=1)

    # -- Approximated likelihood of context prediction
    # pos: word context, neg negative samples
    tl = nn.Variable([size, ], need_grad=False)
    loss = F.sigmoid_cross_entropy(hl, tl)
    loss = F.mean(loss)

    # Model for test of searching similar words
    xr = nn.Variable((1,), need_grad=False)
    hr = PF.embed(xr, n_word, n_dim, name="e1")  # feature vector for test

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = M.Monitor(args.work_dir)
    monitor_loss = M.MonitorSeries(
        "Training loss", monitor, interval=args.monitor_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time", monitor, interval=args.monitor_interval)

    # Do training
    max_epoch = args.max_epoch
    for epoch in range(max_epoch):

        # iteration per epoch
        for i in range(di.n_batch):

            # get minibatch
            xi, yi, ti = di.next()

            # learn
            solver.zero_grad()
            xl.d, yl.d, tl.d = xi, yi, ti
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()

            # monitor
            itr = epoch * di.n_batch + i
            monitor_loss.add(itr, loss.d)
            monitor_time.add(itr)

    # Save model
    nn.save_parameters(model_file)

    # Evaluate by similarity
    max_check_words = args.max_check_words
    for i in range(max_check_words):

        # prediction
        xr.d = i
        hr.forward(clear_buffer=True)
        h = hr.d

        # similarity calculation
        w = nn.get_parameters()['e1/embed/W'].d
        s = np.sqrt((w * w).sum(1))
        w /= s.reshape((s.shape[0], 1))
        similarity = w.dot(h[0]) / s[i]

        # for understanding
        output_similar_words(itow, i, similarity)
Exemple #41
0
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train-file", type=str)
    parser.add_argument("--valid-file", type=str)
    parser.add_argument("--num-training-examples", type=int, default=50)
    parser.add_argument("--accum-grad", type=int, default=1)
    parser.add_argument("--valid-interval", type=int, default=200)
    parser.add_argument("--threshold", type=float, default=0.95)
    parser.add_argument("--context", type=str, default="cpu")
    parser.add_argument("--device-id", type=int, default=0)

    args = parser.parse_args()

    from nnabla.ext_utils import get_extension_context
    extension_module = args.context
    ctx = get_extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # prepare data iterators
    tdata = data_iterator(
        BAbI15DataSource(args.train_file,
                         args.num_training_examples,
                         shuffle=True), 1, False, False, False)
    vdata = data_iterator(
        BAbI15DataSource(args.valid_file, 1000, shuffle=True), 1, False, False,
        False)

    # prepare monitors
    monitor = M.Monitor("./bAbI15")
    tloss = M.MonitorSeries("Training Loss", monitor, interval=10)
    terror = M.MonitorSeries("Training Error", monitor, interval=10)
    verror = M.MonitorSeries("Validation Error", monitor, interval=1)

    # prepare solver
    solver = S.Adam()
    solver_initialized = False

    cnt = 0
    while True:
        l = 0.0
        e = 0.0

        solver.zero_grad()
        for _ in range(args.accum_grad):
            # read next data
            x = tdata.next()
            V = x[1][0][0]
            E = x[2][0][0]
            ans = x[3][0][0]

            # construct GGNN
            output = predict(V, E)
            output = F.reshape(output, (1, output.shape[0]))

            # initialize solver
            if not solver_initialized:
                solver.set_parameters(nn.get_parameters())
                solver_initialized = True
                solver.zero_grad()

            # calculate loss/error
            label = nn.Variable((1, 1))
            label.data.data[0, 0] = ans
            output2 = output.unlinked()
            loss = F.mean(F.softmax_cross_entropy(output, label))
            error = F.mean(F.top_n_error(output2, label))
            F.sink(loss, error).forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)

            l += loss.data.data
            e += error.data.data

        # dump log
        tloss.add(cnt, l / args.accum_grad)
        terror.add(cnt, e / args.accum_grad)
        l = 0.0
        e = 0.0

        solver.update()

        cnt += 1
        if cnt % args.valid_interval == 0:
            # validation
            validation_error = 0
            correct_example = None
            wrong_example = None
            for _ in range(vdata.size):
                x = vdata.next()
                id2str = x[0][0][0]
                V = x[1][0][0]
                E = x[2][0][0]
                ans = x[3][0][0]

                output = predict(V, E)
                output = F.reshape(output, (1, output.shape[0]))

                # calculate error
                label = nn.Variable((1, 1))
                label.data.data[0, 0] = ans
                error = F.top_n_error(output, label)
                error.forward(clear_no_need_grad=True)

                if error.data.data > 0.5:
                    if wrong_example is None:
                        wrong_example = (id2str, V, E, ans, output.data.data)
                else:
                    if correct_example is None:
                        correct_example = (id2str, V, E, ans, output.data.data)
                validation_error += error.data.data
            validation_error /= vdata.size
            verror.add(cnt, validation_error)
            accuracy = 1 - validation_error
            if accuracy >= args.threshold:

                def show(example):
                    for i, j in example[2]["is"]:
                        print("{} is {}.".format(example[0][i], example[0][j]))
                    for i, j in example[2]["has_fear"]:
                        print("{} are afraid of {}.".format(
                            example[0][i], example[0][j]))
                    i = np.argmax(example[1])
                    print("What is {} afraid of?".format(example[0][i]))
                    i = np.argmax(example[4])
                    print("Expected: {}, Actual: {}".format(
                        example[0][example[3]], example[0][i]))

                if correct_example is not None:
                    show(correct_example)
                if wrong_example is not None:
                    show(wrong_example)

                break
Exemple #42
0
def ce_loss_soft(ctx, pred, target):
    with nn.context_scope(ctx):
        #todo: devide or not
        loss = - F.mean(F.sum(F.softmax(target) * F.log(F.softmax(pred)), axis=1))
    return loss
Exemple #43
0
def main():
    conf = get_config()
    extension_module = conf.nnabla_context.context
    ctx = get_extension_context(extension_module,
                                device_id=conf.nnabla_context.device_id)
    comm = CommunicatorWrapper(ctx)
    nn.set_default_context(comm.ctx)
    print("#GPU Count: ", comm.n_procs)

    data_iterator_train = jsi_iterator(conf.batch_size, conf, train=True)
    if conf.scaling_factor == 1:
        d_t = nn.Variable((conf.batch_size, 80, 80, 3), need_grad=True)
        l_t = nn.Variable((conf.batch_size, 80, 80, 3), need_grad=True)

    else:
        d_t = nn.Variable((conf.batch_size, 160 / conf.scaling_factor,
                           160 / conf.scaling_factor, 3),
                          need_grad=True)
        l_t = nn.Variable((conf.batch_size, 160, 160, 3), need_grad=True)

    if comm.n_procs > 1:
        data_iterator_train = data_iterator_train.slice(
            rng=None, num_of_slices=comm.n_procs, slice_pos=comm.rank)

    monitor_path = './nnmonitor' + \
        str(datetime.datetime.now().strftime("%Y%m%d%H%M%S"))

    monitor = Monitor(monitor_path)
    jsi_monitor = setup_monitor(conf, monitor)

    with nn.parameter_scope("jsinet"):
        nn.load_parameters(conf.pre_trained_model)
        net = model(d_t, conf.scaling_factor)
        net.pred.persistent = True
    rec_loss = F.mean(F.squared_error(net.pred, l_t))
    rec_loss.persistent = True
    g_final_loss = rec_loss

    if conf.jsigan:
        net_gan = gan_model(l_t, net.pred, conf)
        d_final_fm_loss = net_gan.d_adv_loss
        d_final_fm_loss.persistent = True
        d_final_detail_loss = net_gan.d_detail_adv_loss
        d_final_detail_loss.persistent = True
        g_final_loss = conf.rec_lambda * rec_loss + conf.adv_lambda * (
            net_gan.g_adv_loss + net_gan.g_detail_adv_loss
        ) + conf.fm_lambda * (net_gan.fm_loss + net_gan.fm_detail_loss)
        g_final_loss.persistent = True

    max_iter = data_iterator_train._size // (conf.batch_size)
    if comm.rank == 0:
        print("max_iter", data_iterator_train._size, max_iter)

    iteration = 0
    if not conf.jsigan:
        start_epoch = 0
        end_epoch = conf.adv_weight_point
        lr = conf.learning_rate * comm.n_procs
    else:
        start_epoch = conf.adv_weight_point
        end_epoch = conf.epoch
        lr = conf.learning_rate * comm.n_procs
        w_d = conf.weight_decay * comm.n_procs

    # Set generator parameters
    with nn.parameter_scope("jsinet"):
        solver_jsinet = S.Adam(alpha=lr, beta1=0.9, beta2=0.999, eps=1e-08)
        solver_jsinet.set_parameters(nn.get_parameters())

    if conf.jsigan:
        solver_disc_fm = S.Adam(alpha=lr, beta1=0.9, beta2=0.999, eps=1e-08)
        solver_disc_detail = S.Adam(alpha=lr,
                                    beta1=0.9,
                                    beta2=0.999,
                                    eps=1e-08)
        with nn.parameter_scope("Discriminator_FM"):
            solver_disc_fm.set_parameters(nn.get_parameters())
        with nn.parameter_scope("Discriminator_Detail"):
            solver_disc_detail.set_parameters(nn.get_parameters())

    for epoch in range(start_epoch, end_epoch):
        for index in range(max_iter):
            d_t.d, l_t.d = data_iterator_train.next()

            if not conf.jsigan:
                # JSI-net -> Generator
                lr_stair_decay_points = [200, 225]
                lr_net = get_learning_rate(lr, iteration,
                                           lr_stair_decay_points,
                                           conf.lr_decreasing_factor)
                g_final_loss.forward(clear_no_need_grad=True)
                solver_jsinet.zero_grad()
                if comm.n_procs > 1:
                    all_reduce_callback = comm.get_all_reduce_callback()
                    g_final_loss.backward(
                        clear_buffer=True,
                        communicator_callbacks=all_reduce_callback)
                else:
                    g_final_loss.backward(clear_buffer=True)
                solver_jsinet.set_learning_rate(lr_net)
                solver_jsinet.update()
            else:
                # GAN part (discriminator + generator)
                lr_gan = lr if epoch < conf.gan_lr_linear_decay_point \
                    else lr * (end_epoch - epoch) / (end_epoch - conf.gan_lr_linear_decay_point)
                lr_gan = lr_gan * conf.gan_ratio

                net.pred.need_grad = False

                # Discriminator_FM
                solver_disc_fm.zero_grad()
                d_final_fm_loss.forward(clear_no_need_grad=True)
                if comm.n_procs > 1:
                    all_reduce_callback = comm.get_all_reduce_callback()
                    d_final_fm_loss.backward(
                        clear_buffer=True,
                        communicator_callbacks=all_reduce_callback)
                else:
                    d_final_fm_loss.backward(clear_buffer=True)
                solver_disc_fm.set_learning_rate(lr_gan)
                solver_disc_fm.weight_decay(w_d)
                solver_disc_fm.update()

                # Discriminator_Detail
                solver_disc_detail.zero_grad()
                d_final_detail_loss.forward(clear_no_need_grad=True)
                if comm.n_procs > 1:
                    all_reduce_callback = comm.get_all_reduce_callback()
                    d_final_detail_loss.backward(
                        clear_buffer=True,
                        communicator_callbacks=all_reduce_callback)
                else:
                    d_final_detail_loss.backward(clear_buffer=True)
                solver_disc_detail.set_learning_rate(lr_gan)
                solver_disc_detail.weight_decay(w_d)
                solver_disc_detail.update()

                # Generator
                net.pred.need_grad = True
                solver_jsinet.zero_grad()
                g_final_loss.forward(clear_no_need_grad=True)
                if comm.n_procs > 1:
                    all_reduce_callback = comm.get_all_reduce_callback()
                    g_final_loss.backward(
                        clear_buffer=True,
                        communicator_callbacks=all_reduce_callback)
                else:
                    g_final_loss.backward(clear_buffer=True)
                solver_jsinet.set_learning_rate(lr_gan)
                solver_jsinet.update()

            iteration += 1
            if comm.rank == 0:
                train_psnr = compute_psnr(net.pred.d, l_t.d, 1.)
                jsi_monitor['psnr'].add(iteration, train_psnr)
                jsi_monitor['rec_loss'].add(iteration, rec_loss.d.copy())
                jsi_monitor['time'].add(iteration)

            if comm.rank == 0:
                if conf.jsigan:
                    jsi_monitor['g_final_loss'].add(iteration,
                                                    g_final_loss.d.copy())
                    jsi_monitor['g_adv_loss'].add(iteration,
                                                  net_gan.g_adv_loss.d.copy())
                    jsi_monitor['g_detail_adv_loss'].add(
                        iteration, net_gan.g_detail_adv_loss.d.copy())
                    jsi_monitor['d_final_fm_loss'].add(
                        iteration, d_final_fm_loss.d.copy())
                    jsi_monitor['d_final_detail_loss'].add(
                        iteration, d_final_detail_loss.d.copy())
                    jsi_monitor['fm_loss'].add(iteration,
                                               net_gan.fm_loss.d.copy())
                    jsi_monitor['fm_detail_loss'].add(
                        iteration, net_gan.fm_detail_loss.d.copy())
                    jsi_monitor['lr'].add(iteration, lr_gan)

        if comm.rank == 0:
            if not os.path.exists(conf.output_dir):
                os.makedirs(conf.output_dir)
            with nn.parameter_scope("jsinet"):
                nn.save_parameters(
                    os.path.join(conf.output_dir,
                                 "model_param_%04d.h5" % epoch))
Exemple #44
0
def sigma_regularization(ctx, log_var, one):
    with nn.context_scope(ctx):
        h = F.exp(log_var)
        h = F.pow_scalar(h, 0.5)
        r = F.mean(F.squared_error(h, one))
    return r
Exemple #45
0
def cifar10_resnet23_loss(pred, label):
    loss = F.mean(F.softmax_cross_entropy(pred, label))
    return loss
Exemple #46
0
def sr_loss(ctx, pred0, pred1):
    with nn.context_scope(ctx):
        loss_sr = F.mean(F.squared_error(pred0, pred1))
    return loss_sr
train_data_iter = data_iterator_simple(load_train_func,
                                       len(x_train),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)
valid_data_iter = data_iterator_simple(load_valid_func,
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

x = nn.Variable([batch_size, window_size * 2])
with nn.parameter_scope('W_in'):
    h = PF.embed(x, vocab_size, embedding_size)
h = F.mean(h, axis=1)
h = expand_dims(h, axis=-1)  # (batch_size, embedding_size, 1)
t = nn.Variable([batch_size, 1])
t_neg = nn.Variable([batch_size, k])
with nn.parameter_scope('W_out'):
    _t = PF.embed(t, vocab_size,
                  embedding_size)  # (batch_size, 1, embedding_size)
    _t_neg = PF.embed(t_neg, vocab_size,
                      embedding_size)  # (batch_size, k, embedding_size)

t_score = F.sigmoid(F.reshape(F.batch_matmul(_t, h), shape=(batch_size, 1)))
t_neg_score = F.sigmoid(
    F.reshape(F.batch_matmul(_t_neg, h), shape=(batch_size, k)))

t_loss = F.binary_cross_entropy(t_score, F.constant(1, shape=(batch_size, 1)))
t_neg_loss = F.binary_cross_entropy(t_neg_score,
Exemple #48
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    x1 = nn.Variable([args.batch_size, 1, 28, 28])

    #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1])
    #z = vectorizer(x1,maxh = 1024)
    #fake = generator(z,maxh= 1024)
    z = vectorizer(x1)
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(
        F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape)))
    loss_vec = F.mean(F.squared_error(fake, x1))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(
        F.sigmoid_cross_entropy(pred_fake_dis,
                                F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(
        F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    solver_vec = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("vec"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_vec.set_parameters(nn.get_parameters())
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries("Discriminator loss",
                                       monitor,
                                       interval=10)
    monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile("Fake images",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec1 = M.MonitorImageTile("vec images1",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)
    monitor_vec2 = M.MonitorImageTile("vec images2",
                                      monitor,
                                      normalize_method=lambda x: x + 1 / 2.)

    #data = data_iterator_mnist(args.batch_size, True)
    data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size,
                                         True)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(
                    os.path.join(args.model_save_path,
                                 "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()

        x1.d = image / 255. - 0.5
        # Generator update.
        solver_vec.zero_grad()
        loss_vec.forward(clear_no_need_grad=True)
        loss_vec.backward(clear_buffer=True)
        solver_vec.weight_decay(args.weight_decay)
        solver_vec.update()
        monitor_vec1.add(i, fake)
        monitor_vec2.add(i, x1)
        monitor_loss_vec.add(i, loss_vec.d.copy())

        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    with nn.parameter_scope("gen"):
        nn.save_parameters(
            os.path.join(args.model_save_path, "generator_param_%06d.h5" % i))
    with nn.parameter_scope("dis"):
        nn.save_parameters(
            os.path.join(args.model_save_path,
                         "discriminator_param_%06d.h5" % i))
Exemple #49
0
    def __init__(self,
                 solver,
                 tinput=None,
                 tlabel=None,
                 tpred=None,
                 tdata=None,
                 vinput=None,
                 vlabel=None,
                 vpred=None,
                 vdata=None,
                 monitor_path=None,
                 model_save_path=None,
                 max_epoch=1,
                 iter_per_epoch=None,
                 val_iter=None):
        # Monitors
        monitor = Monitor(monitor_path)
        monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
        monitor_vloss = MonitorSeries("Valid loss", monitor, interval=1)
        monitor_time = MonitorTimeElapsed("Training time",
                                          monitor,
                                          interval=10)

        # Loss
        tpred = tpred.apply(persistent=True)
        tloss = F.mean(F.squared_error(tpred, tlabel))
        vpred = vpred.apply(persistent=True)
        vloss = F.mean(F.squared_error(vpred, vlabel))

        # Updater
        def tdata_feeder():
            tinput.d, tlabel.d = tdata.next()

        def update_callback_on_finish(i):
            monitor_loss.add(i, tloss.d)
            monitor_time.add(i)

        updater = Updater(
            solver,
            tloss,
            data_feeder=tdata_feeder,
            forward_callback_on_finish=forward_callback_on_finish,
            update_callback_on_finish=update_callback_on_finish)

        # Evaluator
        def vdata_feeder():
            vinput.d, vlabel.d = vdata.next()

        def vloss_callback_on_finish(i, v):
            monitor_vloss.add(i, v)

        val_iter = val_iter if val_iter is not None else vdata.size // vdata.batch_size
        evaluator = Evaluator(vloss,
                              data_feeder=vdata_feeder,
                              val_iter=val_iter,
                              callback_on_finish=vloss_callback_on_finish)

        # Trainer
        iter_per_epoch = iter_per_epoch if iter_per_epoch is not None \
            else tdata.size // tdata.batch_size
        self.trainer = Trainer(updater,
                               evaluator,
                               model_save_path,
                               max_epoch=max_epoch,
                               iter_per_epoch=iter_per_epoch)
Exemple #50
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(
                di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(
                solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    nnp_file = os.path.join(
        args.model_save_path, 'vat_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batchsize_v,
             'outputs': {'y': hv},
             'names': {'x': xv}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
Exemple #51
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(F.sigmoid_cross_entropy(
        pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(F.sigmoid_cross_entropy(
        pred_fake_dis, F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real,
                                               F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries(
        "Discriminator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile(
        "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    nnp = os.path.join(
        args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Generator',
             'batch_size': args.batch_size,
             'outputs': {'G': fake},
             'names': {'z': z}},
            {'name': 'Discriminator',
             'batch_size': args.batch_size,
             'outputs': {'D': pred_real},
             'names': {'x': x}}],
        'executors': [
            {'name': 'Generator',
             'network': 'Generator',
             'data': ['z'],
             'output': ['G']},
            {'name': 'Discriminator',
             'network': 'Discriminator',
             'data': ['x'],
             'output': ['D']}]}

    save.save(nnp, runtime_contents)
    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
Exemple #52
0
def kl_divergence(ctx, pred, label):
    with nn.context_scope(ctx):
        elms = F.softmax(label, axis=1) * F.log(F.softmax(pred, axis=1))
        loss = -F.mean(F.sum(elms, axis=1))
    return loss
Exemple #53
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Set parameter gradients zero
      * Execute forwardprop on the training graph.
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    args = get_args(monitor_path='tmp.monitor.bnn')

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_inq_lenet_prediction
    if args.net == 'inq':
        mnist_cnn_prediction = mnist_inq_lenet_prediction
    elif args.net == 'inq_resnet':
        mnist_cnn_prediction = mnist_inq_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    pred = mnist_cnn_prediction(image / 255, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage / 255, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        # Training backward & update
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        # Monitor
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)
Exemple #54
0
def test_recording_to_training(ctx, func_name, seed, precision_mode, graph_ref,
                               graph_act):
    from .graph_converter_test_utils import structure_tester, value_tester

    cfg = QATConfig()
    cfg.bn_folding = True
    cfg.bn_self_folding = True
    cfg.channel_last = False
    cfg.precision_mode = precision_mode
    cfg.skip_inputs_layers = []
    cfg.skip_outputs_layers = []

    # Random number
    np.random.seed(seed)
    rng = np.random.RandomState(seed)

    # Graph
    with nn.context_scope(ctx):
        x_data = rng.randn(batch_size, 3, 32, 32)
        gt_label = nn.Variable((batch_size, 1))
        x = nn.Variable((batch_size, 3, 32, 32))

        y_tgt = graph_act(x, test=False, w_bias=True)
        loss = F.mean(F.softmax_cross_entropy(y_tgt, gt_label))
        solver = S.Adam(0.001)
        solver.set_parameters(nn.get_parameters(grad_only=True))
        # train the float32 network
        for i in range(100):
            input_data = np.random.random((batch_size, 3, 32, 32))
            input_label = np.random.randint(0, 10, size=(batch_size, 1))
            gt_label.d = input_label
            x.d = input_data
            loss.forward()
            loss.backward()
            solver.update()

        # BN folding & BN self folding
        modifiers = []
        if cfg.bn_folding:
            modifiers.append(
                GC.BatchNormalizationFoldingModifier(
                    opposite=False, channel_last=cfg.channel_last))
            modifiers.append(
                GC.BatchNormalizationFoldingModifier(
                    opposite=True, channel_last=cfg.channel_last))
        # Go through BN self folding
        if cfg.bn_self_folding:
            modifiers.append(GC.BatchNormalizationSelfFoldingModifier())
        if len(modifiers) > 0:
            y_tgt_without_bn = GC.GraphConverter(modifiers).convert(y_tgt)
            y_tgt.rewire_on(y_tgt_without_bn)

        # convert to recording
        funcrankrecorder = FunctionsRankRecorder()
        y_tgt.visit(funcrankrecorder)
        modifiers = [
            GC.QuantizeNonQNNToRecordingModifier(
                funcrankrecorder.functions_ranks, config=cfg)
        ]
        y_act_rec = GC.GraphConverter(modifiers).convert(y_tgt)
        y_tgt.rewire_on(y_act_rec)
        y_tgt.need_grad = False
        # solver.clear_parameters()
        solver.set_parameters(nn.get_parameters(grad_only=True))
        for i in range(100):
            input_data = np.random.random((batch_size, 3, 32, 32))
            input_label = np.random.randint(0, 10, size=(batch_size, 1))
            gt_label.d = input_label
            x.d = input_data
            loss.forward()
            loss.backward()
            solver.update()

        # Remove recorder
        modifiers = []
        modifiers.append(
            GC.RemoveFunctionModifier(rm_funcs=[
                cfg.recorder_activation().name(),
                cfg.recorder_weight().name()
            ]))
        y_tgt = GC.GraphConverter(modifiers).convert(y_tgt)

        # Collect functions rank
        funcrankrecorder = FunctionsRankRecorder()
        y_tgt.visit(funcrankrecorder)

        # convert to training
        modifiers = [
            GC.QuantizeRecordingToTrainingModifier(
                funcrankrecorder.functions_ranks, config=cfg)
        ]
        y_act = GC.GraphConverter(modifiers).convert(y_tgt)
        y_act.forward()
        #
        # # Ref Graph
        y_ref = graph_ref(x, cfg, test=True)
        #
        # # Test
        structure_tester(y_ref, y_act)
Exemple #55
0
def recon_loss(x, y):
    return F.mean(F.absolute_error(x, y))
Exemple #56
0
def train():
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction

    # TRAIN
    reference = "reference"
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create `reference` prediction graph.
    pred = mnist_cnn_prediction(image, scope=reference, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create reference predition graph.
    vpred = mnist_cnn_prediction(vimage, scope=reference, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    best_ve = 1.0
    ve = 1.0
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            ve /= args.val_iter
            monitor_verr.add(i, ve)
        if ve < best_ve:
            nn.save_parameters(
                os.path.join(args.model_save_path, 'params_%06d.h5' % i))
            best_ve = ve
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(args.model_save_path,
                                  'params_{:06}.h5'.format(args.max_iter))
    nn.save_parameters(parameter_file)
Exemple #57
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)