Beispiel #1
0
    def test_clear_input_if_no_need_grad_inplace1(self):
        x1 = nn.Variable([1, 5], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1, inplace=True)
        y2 = F.add_scalar(y1)

        answer = []
        answer.append([False])
        answer.append([False])
        answer.append([False])

        y2.forward(clear_no_need_grad=True)
        self.check_input_data_clear_called_flags(answer)
Beispiel #2
0
def weight_normalization_backward(inputs, dim=0, eps=1e-12):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    w = inputs[1]
    g = inputs[2]
    g_shape = g.shape
    dim += w.ndim*(dim < 0)

    # Create inverted norm of w
    sum_axes = list(filter(lambda x: x != dim, range(w.ndim)))
    w_pow = F.pow_scalar(w, 2.0)
    w_sum = F.sum(w_pow, sum_axes, True)
    w_add = F.add_scalar(w_sum, eps)
    w_norm_inv = F.pow_scalar(w_add, -0.5)

    dyw_sum = F.sum(dy * w, sum_axes, True)

    # w.r.t. dw
    g = g.reshape([s if i == dim else 1 for i, s in enumerate(w.shape)])
    dw = (dy - dyw_sum * (w_norm_inv ** 2) * w) * g * w_norm_inv

    # w.r.t. dg
    dg = dyw_sum * w_norm_inv
    dg = dg.reshape(g_shape)

    return dw, dg
Beispiel #3
0
def test_graph_connection_with_setitem(indices):
    import nnabla.functions as F
    x = np.arange(8 * 7).reshape((8, 7))
    x = nn.Variable.from_numpy_array(x, need_grad=True)
    u = np.arange(-1, -7, -1).reshape(3, 2)
    u = nn.Variable.from_numpy_array(u, need_grad=True)
    y = F.mul_scalar(x, 1)
    y[indices] = u
    z = F.add_scalar(y, 0)
    z.forward()
    # '+' signs only to persist visual alignment through autopep8
    assert_allclose(z.d, np.array([[+0, +1, +2, +3, +4, +5, +6],
                                   [+7, +8, +9, 10, 11, 12, 13],
                                   [14, 15, 16, -1, -2, 19, 20],
                                   [21, 22, 23, -3, -4, 26, 27],
                                   [28, 29, 30, -5, -6, 33, 34],
                                   [35, 36, 37, 38, 39, 40, 41],
                                   [42, 43, 44, 45, 46, 47, 48],
                                   [49, 50, 51, 52, 53, 54, 55]]))
    x.grad.zero()
    u.grad.zero()
    z.backward(np.arange(1, 1 + 8 * 7).reshape(8, 7))
    assert_allclose(x.g, np.array([[+1, +2, +3, +4, +5, +6, +7],
                                   [+8, +9, 10, 11, 12, 13, 14],
                                   [15, 16, 17, +0, +0, 20, 21],
                                   [22, 23, 24, +0, +0, 27, 28],
                                   [29, 30, 31, +0, +0, 34, 35],
                                   [36, 37, 38, 39, 40, 41, 42],
                                   [43, 44, 45, 46, 47, 48, 49],
                                   [50, 51, 52, 53, 54, 55, 56]]))
    assert_allclose(u.g, np.array([[18, 19],
                                   [25, 26],
                                   [32, 33]]))
Beispiel #4
0
    def test_clear_input_if_no_need_grad_branch0(self):
        x1 = nn.Variable([1, 5], need_grad=True)
        x2 = nn.Variable([1, 5], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1)  # (1)
        y2 = F.add_scalar(xx1)  # (2)
        y3 = F.add2(y1, y2)  # (3)

        answer = []
        answer.append([False])
        answer.append([False])  # (1) does not clear xx1
        answer.append([True])  # (2) clears xx1
        answer.append([True, True])

        y3.forward(clear_no_need_grad=True)
        self.check_input_data_clear_called_flags(answer)
Beispiel #5
0
    def test_clear_output_grad_inplace(self):
        x1 = nn.Variable([1], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1, inplace=True)
        y2 = F.add_scalar(y1)

        answer_grad = []
        answer_grad.append([True])
        answer_grad.append([True])
        answer_grad.append([True])

        y2.forward(clear_no_need_grad=True)
        clear_called_flag_recorder.deactivate_clear_called_flag_recorder()
        clear_called_flag_recorder.activate_clear_called_flag_recorder()
        y2.backward(clear_buffer=True)

        self.check_grad_cleared_flags(answer_grad)
Beispiel #6
0
    def test_clear_input_if_no_need_grad_branch2(self):
        x1 = nn.Variable([1, 5], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1)
        y2 = F.add_scalar(y1, inplace=True)
        z1 = F.add_scalar(xx1)
        z2 = F.add_scalar(z1)
        y3 = F.add2(y2, z2)

        answer = []
        answer.append([False])
        answer.append([False])
        answer.append([False])
        answer.append([True])
        answer.append([True])
        answer.append([False, True])

        y3.forward(clear_no_need_grad=True)
        self.check_input_data_clear_called_flags(answer)
Beispiel #7
0
    def test_clear_output_grad_prohibit_clear_input(self):
        x1 = nn.Variable([1], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1)
        y2 = F.add_scalar(xx1)
        y3 = F.sink(y1, y2)

        answer_grad = []
        answer_grad.append([True])  # y3
        answer_grad.append([False])  # y2
        answer_grad.append([False])  # y1
        answer_grad.append([True])  # xx1

        y3.forward(clear_no_need_grad=True)
        clear_called_flag_recorder.deactivate_clear_called_flag_recorder()
        clear_called_flag_recorder.activate_clear_called_flag_recorder()
        y3.backward(clear_buffer=True)

        self.check_grad_cleared_flags(answer_grad)
Beispiel #8
0
    def test_clear_output_grad_persistent(self):
        x1 = nn.Variable([1], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1)
        y2 = F.add_scalar(y1)

        xx1.persistent = True
        y2.persistent = True

        answer_grad = []
        answer_grad.append([False])  # y2
        answer_grad.append([True])  # y1
        answer_grad.append([False])  # xx1

        y2.forward(clear_no_need_grad=True)
        clear_called_flag_recorder.deactivate_clear_called_flag_recorder()
        clear_called_flag_recorder.activate_clear_called_flag_recorder()
        y2.backward(clear_buffer=True)

        self.check_grad_cleared_flags(answer_grad)
Beispiel #9
0
    def __call__(self, gen_rgb_out):

        out = conv_layer(gen_rgb_out, inmaps=3,
                         outmaps=self.channels[0], kernel_size=1, name_scope='Discriminator/Convinitial')

        inmaps = self.channels[0]
        for i in range(1, len(self.resolutions)):
            res = out.shape[2]
            outmaps = self.channels[i]
            out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps)
            inmaps = outmaps

        N, C, H, W = out.shape
        group = min(N, self.stddev_group)
        stddev_mean = F.reshape(
            out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W), inplace=False)

        # mean = F.mean(stddev_mean, axis=0, keepdims=True)
        mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True),
                            1.0/stddev_mean.shape[0], inplace=False)

        stddev_mean = F.mean(F.pow_scalar(F.sub2(stddev_mean, F.broadcast(
            mean, stddev_mean.shape)), 2.), axis=0, keepdims=False)
        stddev_mean = F.pow_scalar(F.add_scalar(
            stddev_mean, 1e-8, inplace=False), 0.5, inplace=False)

        stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True)
        stddev_mean = F.reshape(
            stddev_mean, stddev_mean.shape[:2]+stddev_mean.shape[3:], inplace=False)

        out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1)

        out = conv_layer(out, inmaps=out.shape[1], outmaps=self.channels[-1],
                         kernel_size=3, name_scope='Discriminator/Convfinal')

        out = F.reshape(out, (N, -1), inplace=False)

        # Linear Layers
        lrmul = 1
        scale = 1/(out.shape[1]**0.5)*lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], self.channels[-1]), weight_var='Discriminator/final_linear_1/affine')
        out = F.affine(out, W*scale, bias*lrmul)
        out = F.mul_scalar(F.leaky_relu(
            out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False)

        scale = 1/(out.shape[1]**0.5)*lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], 1), weight_var='Discriminator/final_linear_2/affine')
        out = F.affine(out, W*scale, bias*lrmul)

        return out
Beispiel #10
0
def get_sample_and_feedback(args, data_dict):
    """
        Let the controller predict one architecture and test its performance to get feedback.
        Here the feedback is validation accuracy and will be reused to train the controller. 
    """

    skip_weight = args.skip_weight
    entropy_weight = args.entropy_weight
    bl_dec = args.baseline_decay

    arc_seq, log_probs, entropys, skip_penaltys = sample_from_controller(args)

    sample_arch = list()
    for arc in arc_seq:
        sample_arch.extend(arc.tolist())

    show_arch(sample_arch)

    sample_entropy = entropys
    sample_log_prob = log_probs

    nn.set_auto_forward(False)
    val_acc = CNN_run(args, sample_arch, data_dict)  # Execute Evaluation Only
    nn.set_auto_forward(True)

    print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc))

    reward = val_acc  # use validation accuracy as reward

    if entropy_weight is not None:
        reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight),
                              reward).d

    sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate))

    if args.use_variance_reduction:
        baseline = 0.0
        # variance reduction
        baseline = baseline - ((1 - bl_dec) * (baseline - reward))
        reward = reward - baseline

    loss = F.mul_scalar(sample_log_prob, (-1) * reward)

    if skip_weight is not None:
        adding_penalty = F.mul_scalar(skip_penaltys, skip_weight)
        loss = F.add2(loss, adding_penalty)

    return loss, val_acc, sample_arch
def vgg16(x):
    # Input:x -> 3,300,300

    # VGG11/MulScalar
    h = F.mul_scalar(x, 0.01735)
    # VGG11/AddScalar
    h = F.add_scalar(h, -1.99)
    # VGG11/Convolution -> 64,300,300
    h = PF.convolution(h, 64, (3, 3), (1, 1), name='Convolution')
    # VGG11/ReLU
    h = F.relu(h, True)
    # VGG11/MaxPooling -> 64,150,150
    h = F.max_pooling(h, (2, 2), (2, 2))
    # VGG11/Convolution_3 -> 128,150,150
    h = PF.convolution(h, 128, (3, 3), (1, 1), name='Convolution_3')
    # VGG11/ReLU_3
    h = F.relu(h, True)
    # VGG11/MaxPooling_2 -> 128,75,75
    h = F.max_pooling(h, (2, 2), (2, 2))
    # VGG11/Convolution_5 -> 256,75,75
    h = PF.convolution(h, 256, (3, 3), (1, 1), name='Convolution_5')
    # VGG11/ReLU_5
    h = F.relu(h, True)
    # VGG11/Convolution_6
    h = PF.convolution(h, 256, (3, 3), (1, 1), name='Convolution_6')
    # VGG11/ReLU_6
    h = F.relu(h, True)
    # VGG11/MaxPooling_3 -> 256,38,38
    h = F.max_pooling(h, (2, 2), (2, 2), True, (1, 1))
    # VGG11/Convolution_8 -> 512,38,38
    h = PF.convolution(h, 512, (3, 3), (1, 1), name='Convolution_8')
    # VGG11/ReLU_8
    h = F.relu(h, True)
    # VGG11/Convolution_9
    h = PF.convolution(h, 512, (3, 3), (1, 1), name='Convolution_9')
    # VGG11/ReLU_9
    h = F.relu(h, True)
    # # VGG11/MaxPooling_4 -> 512,19,19
    # h = F.max_pooling(h, (2,2), (2,2))
    # # VGG11/Convolution_11
    # h = PF.convolution(h, 512, (3,3), (1,1), name='Convolution_11')
    # # VGG11/ReLU_11
    # h = F.relu(h, True)
    # # VGG11/Convolution_12
    # h = PF.convolution(h, 512, (3,3), (1,1), name='Convolution_12')
    # # VGG11/ReLU_12
    # h = F.relu(h, True)
    return h
Beispiel #12
0
    def test_clear_input_if_no_need_grad2(self):
        x1 = nn.Variable([1, 5], need_grad=True)

        xx1 = F.identity(x1)  # (1)
        y1 = F.tanh(xx1)  # (2)
        y2 = F.add_scalar(y1)  # (3)

        answer = []
        answer.append([False])
        answer.append([True])
        answer.append([False])
        # y1 must not be clear after (3) because y1 is required for backward of (2).

        y2.forward(clear_no_need_grad=True)

        self.check_input_data_clear_called_flags(answer)
Beispiel #13
0
    def test_clear_output_grad_argument(self, grad):
        x1 = nn.Variable([1], need_grad=True)

        xx1 = F.identity(x1)
        y1 = F.add_scalar(xx1)

        answer_grad = []
        if grad is None or isinstance(grad, nn.NdArray):
            answer_grad.append([False])  # y1
        else:
            answer_grad.append([True])  # y1
        answer_grad.append([True])  # xx1

        y1.forward(clear_no_need_grad=True)
        clear_called_flag_recorder.deactivate_clear_called_flag_recorder()
        clear_called_flag_recorder.activate_clear_called_flag_recorder()
        y1.backward(clear_buffer=True, grad=grad)

        self.check_grad_cleared_flags(answer_grad)
        assert y1.grad.clear_called == False
Beispiel #14
0
    def __sub__(self, other):
        """
        Element-wise subtraction.
        Implements the subtraction operator expression ``A - B``, together with :func:`~nnabla.variable.__rsub__` .
        When a scalar is specified for ``other``, this function performs an
        element-wise operation for all elements in ``self``.

        Args:
            other (float or ~nnabla.Variable): Internally calling
                :func:`~nnabla.functions.sub2` or
                :func:`~nnabla.functions.add_scalar` according to the
                type.

        Returns: :class:`nnabla.Variable`

        """
        import nnabla.functions as F
        if isinstance(other, Variable):
            return F.sub2(self, other)
        return F.add_scalar(self, -other)
Beispiel #15
0
def get_sample_and_feedback(args, data_dict):
    """
        Let the controller predict one architecture and test its performance to get feedback.
        Here the feedback is validation accuracy and will be reused to train the controller. 
    """

    entropy_weight = args.entropy_weight
    bl_dec = args.baseline_decay

    both_archs, log_probs, entropys = sample_from_controller(args)

    sample_entropy = entropys
    sample_log_prob = log_probs

    show_arch(both_archs)

    nn.set_auto_forward(False)
    val_acc = CNN_run(args, both_archs, data_dict)
    nn.set_auto_forward(True)

    print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc))

    reward = val_acc

    if entropy_weight is not None:
        reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight),
                              reward).d

    sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate))

    if args.use_variance_reduction:
        baseline = 0.0
        # variance reduction
        baseline = baseline - ((1 - bl_dec) * (baseline - reward))
        reward = reward - baseline

    loss = F.mul_scalar(sample_log_prob, (-1) * reward)

    return loss, val_acc, both_archs
Beispiel #16
0
def test_imperative_i1_o1():
    import nnabla.functions as F
    x = nn.NdArray([2, 3, 4])
    x.fill(1)
    x1 = F.add_scalar(x, 1)
    assert np.allclose(x1.data, 2)
Beispiel #17
0
    def __call__(self,
                 batch_size,
                 style_noises,
                 truncation_psi=1.0,
                 return_latent=False,
                 mixing_layer_index=None,
                 dlatent_avg_beta=0.995):

        with nn.parameter_scope(self.global_scope):
            # normalize noise inputs
            for i in range(len(style_noises)):
                style_noises[i] = F.div2(
                    style_noises[i],
                    F.pow_scalar(F.add_scalar(F.mean(style_noises[i]**2.,
                                                     axis=1,
                                                     keepdims=True),
                                              1e-8,
                                              inplace=False),
                                 0.5,
                                 inplace=False))

            # get latent code
            w = [
                mapping_network(style_noises[0],
                                outmaps=self.mapping_network_dim,
                                num_layers=self.mapping_network_num_layers)
            ]
            w += [
                mapping_network(style_noises[1],
                                outmaps=self.mapping_network_dim,
                                num_layers=self.mapping_network_num_layers)
            ]

            dlatent_avg = nn.parameter.get_parameter_or_create(
                name="dlatent_avg", shape=(1, 512))

            # Moving average update of dlatent_avg
            batch_avg = F.mean((w[0] + w[1]) * 0.5, axis=0, keepdims=True)
            update_op = F.assign(
                dlatent_avg, lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            update_op.name = 'dlatent_avg_update'
            dlatent_avg = F.identity(dlatent_avg) + 0 * update_op

            # truncation trick
            w = [lerp(dlatent_avg, _, truncation_psi) for _ in w]

            # generate output from generator
            constant_bc = nn.parameter.get_parameter_or_create(
                name="G_synthesis/4x4/Const/const",
                shape=(1, 512, 4, 4),
                initializer=np.random.randn(1, 512, 4, 4).astype(np.float32))
            constant_bc = F.broadcast(constant_bc,
                                      (batch_size, ) + constant_bc.shape[1:])

            if mixing_layer_index is None:
                mixing_layer_index_var = F.randint(1,
                                                   len(self.resolutions) * 2,
                                                   (1, ))
            else:
                mixing_layer_index_var = F.constant(val=mixing_layer_index,
                                                    shape=(1, ))
            mixing_switch_var = F.clip_by_value(
                F.arange(0,
                         len(self.resolutions) * 2) - mixing_layer_index_var,
                0, 1)
            mixing_switch_var_re = F.reshape(
                mixing_switch_var, (1, mixing_switch_var.shape[0], 1),
                inplace=False)
            w0 = F.reshape(w[0], (batch_size, 1, w[0].shape[1]), inplace=False)
            w1 = F.reshape(w[1], (batch_size, 1, w[0].shape[1]), inplace=False)
            w_mixed = w0 * mixing_switch_var_re + \
                w1 * (1 - mixing_switch_var_re)

            rgb_output = self.synthesis(w_mixed, constant_bc)

            if return_latent:
                return rgb_output, w_mixed
            else:
                return rgb_output
Beispiel #18
0
def generate_attribute_direction(args, attribute_prediction_model):

    if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
        os.makedirs(args.weights_path, exist_ok=True)
        print(
            "Downloading the pretrained tf-converted weights. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, os.path.join(args.weights_path, 'gen_params.h5'), False)

    nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5'))
    print('Loaded pretrained weights from tensorflow!')

    nn.load_parameters(args.classifier_weight_path)
    print(f'Loaded {args.classifier_weight_path}')

    batches = [
        args.batch_size for _ in range(args.num_images // args.batch_size)
    ]
    if args.num_images % args.batch_size != 0:
        batches.append(args.num_images -
                       (args.num_images // args.batch_size) * args.batch_size)

    w_plus, w_minus = 0.0, 0.0
    w_plus_count, w_minus_count = 0.0, 0.0
    pbar = trange(len(batches))
    for i in pbar:
        batch_size = batches[i]
        z = [F.randn(shape=(batch_size, 512)).data]

        z = [z[0], z[0]]

        for i in range(len(z)):
            z[i] = F.div2(
                z[i],
                F.pow_scalar(F.add_scalar(
                    F.mean(z[i]**2., axis=1, keepdims=True), 1e-8),
                             0.5,
                             inplace=True))

        # get latent code
        w = [mapping_network(z[0], outmaps=512, num_layers=8)]
        w += [mapping_network(z[1], outmaps=512, num_layers=8)]

        # truncation trick
        dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg",
                                                           shape=(1, 512))
        w = [lerp(dlatent_avg, _, 0.7) for _ in w]

        constant_bc = nn.parameter.get_parameter_or_create(
            name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4))
        constant_bc = F.broadcast(constant_bc,
                                  (batch_size, ) + constant_bc.shape[1:])

        gen = synthesis(w, constant_bc, noise_seed=100, mix_after=7)

        classifier_score = F.softmax(attribute_prediction_model(gen, True))
        confidence, class_pred = F.max(classifier_score,
                                       axis=1,
                                       with_index=True,
                                       keepdims=True)

        w_plus += np.sum(w[0].data * (class_pred.data == 0) *
                         (confidence.data > 0.65),
                         axis=0,
                         keepdims=True)
        w_minus += np.sum(w[0].data * (class_pred.data == 1) *
                          (confidence.data > 0.65),
                          axis=0,
                          keepdims=True)

        w_plus_count += np.sum(
            (class_pred.data == 0) * (confidence.data > 0.65))
        w_minus_count += np.sum(
            (class_pred.data == 1) * (confidence.data > 0.65))

        pbar.set_description(f'{w_plus_count} {w_minus_count}')

    # save attribute direction
    attribute_variation_direction = (w_plus / w_plus_count) - (w_minus /
                                                               w_minus_count)
    print(w_plus_count, w_minus_count)
    np.save(f'{args.classifier_weight_path.split("/")[0]}/direction.npy',
            attribute_variation_direction)
Beispiel #19
0
def generate_data(args):

    if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
        os.makedirs(args.weights_path, exist_ok=True)
        print(
            "Downloading the pretrained tf-converted weights. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, os.path.join(args.weights_path, 'gen_params.h5'), False)

    nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5'))
    print('Loaded pretrained weights from tensorflow!')

    os.makedirs(args.save_image_path, exist_ok=True)

    batches = [
        args.batch_size for _ in range(args.num_images // args.batch_size)
    ]
    if args.num_images % args.batch_size != 0:
        batches.append(args.num_images -
                       (args.num_images // args.batch_size) * args.batch_size)

    for idx, batch_size in enumerate(batches):
        z = [
            F.randn(shape=(batch_size, 512)).data,
            F.randn(shape=(batch_size, 512)).data
        ]

        for i in range(len(z)):
            z[i] = F.div2(
                z[i],
                F.pow_scalar(F.add_scalar(
                    F.mean(z[i]**2., axis=1, keepdims=True), 1e-8),
                             0.5,
                             inplace=True))

        # get latent code
        w = [mapping_network(z[0], outmaps=512, num_layers=8)]
        w += [mapping_network(z[1], outmaps=512, num_layers=8)]

        # truncation trick
        dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg",
                                                           shape=(1, 512))
        w = [lerp(dlatent_avg, _, 0.7) for _ in w]

        # Load direction
        if not args.face_morph:
            attr_delta = nn.NdArray.from_numpy_array(
                np.load(args.attr_delta_path))
            attr_delta = F.reshape(attr_delta[0], (1, -1))
            w_plus = [w[0] + args.coeff * attr_delta, w[1]]
            w_minus = [w[0] - args.coeff * attr_delta, w[1]]
        else:
            w_plus = [w[0], w[0]]  # content
            w_minus = [w[1], w[1]]  # style

        constant_bc = nn.parameter.get_parameter_or_create(
            name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4))
        constant_bc = F.broadcast(constant_bc,
                                  (batch_size, ) + constant_bc.shape[1:])

        gen_plus = synthesis(w_plus, constant_bc, noise_seed=100, mix_after=8)
        gen_minus = synthesis(w_minus,
                              constant_bc,
                              noise_seed=100,
                              mix_after=8)
        gen = synthesis(w, constant_bc, noise_seed=100, mix_after=8)

        image_plus = convert_images_to_uint8(gen_plus, drange=[-1, 1])
        image_minus = convert_images_to_uint8(gen_minus, drange=[-1, 1])
        image = convert_images_to_uint8(gen, drange=[-1, 1])

        for j in range(batch_size):
            filepath = os.path.join(args.save_image_path,
                                    f'image_{idx*batch_size+j}')
            imsave(f'{filepath}_o.png', image_plus[j], channel_first=True)
            imsave(f'{filepath}_y.png', image_minus[j], channel_first=True)
            imsave(f'{filepath}.png', image[j], channel_first=True)
            print(f"Genetated. Saved {filepath}")
Beispiel #20
0
def test_imperative_i1_o1():
    import nnabla.functions as F
    x = nn.NdArray([2, 3, 4])
    x.fill(1)
    x1 = F.add_scalar(x, 1)
    assert np.allclose(x1.data, 2)
Beispiel #21
0
def CNN_run(args, ops, arch_dict):
    """
        Based on the given model architecture,
        construct CNN and execute training.
        input:
            args: arguments set by user.
            ops: operations used in the network.
            arch_dict: a dictionary containing architecture information.
    """

    data_iterator = data_iterator_cifar10
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)

    # CIFAR10 statistics, mean and variance
    CIFAR_MEAN = np.reshape([0.49139968, 0.48215827, 0.44653124], (1, 3, 1, 1))
    CIFAR_STD = np.reshape([0.24703233, 0.24348505, 0.26158768], (1, 3, 1, 1))

    channels, image_height, image_width = 3, 32, 32
    batch_size = args.batch_size
    initial_model_lr = args.model_lr

    one_epoch = tdata.size // batch_size
    max_iter = args.epoch * one_epoch
    val_iter = 10000 // batch_size

    # Create monitor.
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=100)
    monitor_err = MonitorSeries("Training error", monitor, interval=100)
    monitor_vloss = MonitorSeries("Test loss", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=100)

    # prepare variables and graph used for test
    image_valid = nn.Variable(
        (batch_size, channels, image_height, image_width))
    label_valid = nn.Variable((batch_size, 1))
    input_image_valid = {"image": image_valid, "label": label_valid}
    pred_valid, _ = construct_networks(args,
                                       ops,
                                       arch_dict,
                                       image_valid,
                                       test=True)
    loss_valid = loss_function(pred_valid, label_valid)

    # set dropout rate in advance
    nn.parameter.get_parameter_or_create("drop_rate",
                                         shape=(1, 1, 1, 1),
                                         need_grad=False)
    initial_drop_rate = nn.Variable((1, 1, 1, 1)).apply(d=args.dropout_rate)
    nn.parameter.set_parameter("drop_rate", initial_drop_rate)

    # prepare variables and graph used for training
    image_train = nn.Variable(
        (batch_size, channels, image_height, image_width))
    label_train = nn.Variable((batch_size, 1))
    input_image_train = {"image": image_train, "label": label_train}
    pred_train, aux_logits = construct_networks(args,
                                                ops,
                                                arch_dict,
                                                image_train,
                                                test=False)
    loss_train = loss_function(pred_train, label_train, aux_logits,
                               args.auxiliary_weight)

    # prepare solvers
    model_params_dict = nn.get_parameters()
    solver_model = S.Momentum(initial_model_lr)
    solver_model.set_parameters(model_params_dict,
                                reset=False,
                                retain_state=True)

    # Training-loop
    for curr_epoch in range(args.epoch):
        print("epoch {}".format(curr_epoch))

        curr_dropout_rate = F.add_scalar(
            F.mul_scalar(initial_drop_rate, (curr_epoch / args.epoch)), 1e-8)
        nn.parameter.set_parameter("drop_rate", curr_dropout_rate)

        for i in range(one_epoch):
            image, label = tdata.next()
            image = image / 255.0
            image = (image - CIFAR_MEAN) / CIFAR_STD
            if args.cutout:
                image = cutout(image, args)
            input_image_train["image"].d = image
            input_image_train["label"].d = label
            loss_train.forward(clear_no_need_grad=True)

            e = categorical_error(pred_train.d, input_image_train["label"].d)
            monitor_loss.add(one_epoch * curr_epoch + i, loss_train.d.copy())
            monitor_err.add(one_epoch * curr_epoch + i, e)

            if args.lr_control_model:
                new_lr = learning_rate_scheduler(one_epoch * curr_epoch + i,
                                                 max_iter, initial_model_lr, 0)
                solver_model.set_learning_rate(new_lr)

            solver_model.zero_grad()
            loss_train.backward(clear_buffer=True)

            if args.with_grad_clip_model:
                for k, v in model_params_dict.items():
                    v.grad.copy_from(
                        F.clip_by_norm(v.grad, args.grad_clip_value_model))

            # update parameters
            solver_model.weight_decay(args.weight_decay_model)
            solver_model.update()

            if (one_epoch * curr_epoch + i) % args.model_save_interval == 0:
                nn.save_parameters(
                    os.path.join(
                        args.model_save_path,
                        'params_{}.h5'.format(one_epoch * curr_epoch + i)))

        # Validation during training.
        ve = 0.
        vloss = 0.
        for j in range(val_iter):
            image, label = vdata.next()
            image = image / 255.0
            image = (image - CIFAR_MEAN) / CIFAR_STD
            input_image_valid["image"].d = image
            input_image_valid["label"].d = label
            loss_valid.forward(clear_no_need_grad=True)
            vloss += loss_valid.d.copy()
            ve += categorical_error(pred_valid.d.copy(), label)
        ve /= val_iter
        vloss /= val_iter
        monitor_vloss.add(one_epoch * curr_epoch + i, vloss)
        monitor_verr.add(one_epoch * curr_epoch + i, ve)

    return
    def __call__(self, gen_rgb_out, patch_switch=False, index=0):

        out = conv_layer(gen_rgb_out,
                         inmaps=3,
                         outmaps=self.channels[0],
                         kernel_size=1,
                         name_scope='Discriminator/Convinitial')

        inmaps = self.channels[0]
        out_list = [out]
        for i in range(1, len(self.resolutions)):
            res = out.shape[2]
            outmaps = self.channels[i]
            out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps)
            inmaps = outmaps
            out_list.append(out)

        if patch_switch:
            GV_class = GetVariablesOnGraph(out)
            GF_class = GetFunctionFromInput(out, func_type_list=['LeakyReLU'])
            feature_dict = OrderedDict()
            for key in GV_class.coef_dict_on_graph:
                if ('res_block' in key and '/W' in key) and ('Conv1' in key or
                                                             'Conv2' in key):
                    feature_var = GF_class.functions[key][0].outputs[
                        0].function_references[0].outputs[0]
                    if feature_var.shape[2:] in ((32, 32), (16, 16)):
                        feature_dict[key] = GF_class.functions[key][0].outputs[
                            0].function_references[0].outputs[0]

        N, C, H, W = out.shape
        group = min(N, self.stddev_group)
        stddev_mean = F.reshape(
            out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W),
            inplace=False)

        mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True),
                            1.0 / stddev_mean.shape[0],
                            inplace=False)

        stddev_mean = F.mean(F.pow_scalar(
            F.sub2(stddev_mean, F.broadcast(mean, stddev_mean.shape)), 2.),
                             axis=0,
                             keepdims=False)
        stddev_mean = F.pow_scalar(F.add_scalar(stddev_mean,
                                                1e-8,
                                                inplace=False),
                                   0.5,
                                   inplace=False)

        stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True)
        stddev_mean = F.reshape(stddev_mean,
                                stddev_mean.shape[:2] + stddev_mean.shape[3:],
                                inplace=False)

        out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1)

        out = conv_layer(out,
                         inmaps=out.shape[1],
                         outmaps=self.channels[-1],
                         kernel_size=3,
                         name_scope='Discriminator/Convfinal')

        out = F.reshape(out, (N, -1), inplace=False)

        # Linear Layers
        lrmul = 1
        scale = 1 / (out.shape[1]**0.5) * lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], self.channels[-1]),
            weight_var='Discriminator/final_linear_1/affine')
        out = F.affine(out, W * scale, bias * lrmul)
        out = F.mul_scalar(F.leaky_relu(out, alpha=0.2, inplace=False),
                           np.sqrt(2),
                           inplace=False)

        scale = 1 / (out.shape[1]**0.5) * lrmul
        W, bias = weight_init_fn(
            (out.shape[-1], 1),
            weight_var='Discriminator/final_linear_2/affine')
        out = F.affine(out, W * scale, bias * lrmul)

        if patch_switch:
            return out, list(feature_dict.values())[index]
        else:
            return out