Esempio n. 1
0
def test_max_with_index(seed, ctx, func_name, inshape, axis, keepdims):
    x = np.random.RandomState(seed).randn(*inshape).astype(np.float32)
    x = nn.Variable.from_numpy_array(x)
    with nn.context_scope(ctx), nn.auto_forward(True):
        val, idx = F.max(x, axis, keepdims, with_index=True)
    assert_allclose(val.d, np.amax(x.d, axis, keepdims=keepdims))
    shape = [a for i, a in enumerate(x.d.shape) if i not in axis] + [-1]
    assert np.all(idx.d == x.d.reshape(*shape).argmax(-1).reshape(idx.d.shape))
    with nn.context_scope(ctx), nn.auto_forward(True):
        idx = F.max(x, axis, keepdims, only_index=True)
    shape = [a for i, a in enumerate(x.d.shape) if i not in axis] + [-1]
    assert np.all(idx.d == x.d.reshape(*shape).argmax(-1).reshape(idx.d.shape))
Esempio n. 2
0
    def chamfer_hausdorff_oneside_dists(X0, X1):
        b0 = X0.shape[0]
        b1 = X1.shape[0]

        sum_ = 0
        max_ = nn.NdArray.from_numpy_array(np.array(-np.inf))
        n = 0
        for i in tqdm.tqdm(range(0, b0, sub_batch_size),
                           desc="cdist-outer-loop"):
            x0 = nn.NdArray.from_numpy_array(X0[i:i + sub_batch_size])
            norm_x0 = F.sum(x0**2.0, axis=1, keepdims=True)
            min_ = nn.NdArray.from_numpy_array(np.ones(x0.shape[0]) * np.inf)
            for j in tqdm.tqdm(range(0, b1, sub_batch_size),
                               desc="cdist-inner-loop"):
                x1 = nn.NdArray.from_numpy_array(X1[j:j + sub_batch_size])
                # block pwd
                norm_x1 = F.transpose(F.sum(x1**2.0, axis=1, keepdims=True),
                                      (1, 0))
                x1_T = F.transpose(x1, (1, 0))
                x01 = F.affine(x0, x1_T)
                bpwd = (norm_x0 + norm_x1 - 2.0 * x01)**0.5
                # block min
                min_ = F.minimum2(min_, F.min(bpwd, axis=1))
            # sum/max over cols
            sum_ += F.sum(min_)
            n += bpwd.shape[0]
            max_ = F.maximum2(max_, F.max(min_))
        ocd = sum_.data / n
        ohd = max_.data
        return ocd, ohd
Esempio n. 3
0
    def build_train_graph(self, batch):
        self.solver = S.Adam(self.learning_rate)

        obs, action, reward, terminal, newobs = batch
        # Create input variables
        s = nn.Variable(obs.shape)
        a = nn.Variable(action.shape)
        r = nn.Variable(reward.shape)
        t = nn.Variable(terminal.shape)
        snext = nn.Variable(newobs.shape)
        with nn.parameter_scope(self.name_q):
            q = self.q_builder(s, self.num_actions, test=False)
            self.solver.set_parameters(nn.get_parameters())
        with nn.parameter_scope(self.name_qnext):
            qnext = self.q_builder(snext, self.num_actions, test=True)
        qnext.need_grad = False
        clipped_r = F.minimum_scalar(F.maximum_scalar(
            r, -self.clip_reward), self.clip_reward)
        q_a = F.sum(
            q * F.one_hot(F.reshape(a, (-1, 1), inplace=False), (q.shape[1],)), axis=1)
        target = clipped_r + self.gamma * (1 - t) * F.max(qnext, axis=1)
        loss = F.mean(F.huber_loss(q_a, target))
        Variables = namedtuple(
            'Variables', ['s', 'a', 'r', 't', 'snext', 'q', 'loss'])
        self.v = Variables(s, a, r, t, snext, q, loss)
        self.sync_models()
        self.built = True
Esempio n. 4
0
    def random_generate(self, num_images, path):

        # Generate from the uniform prior of the base model
        indices = F.randint(low=0,
                            high=self.num_embedding,
                            shape=[num_images] + self.latent_shape)
        indices = F.reshape(indices, (-1, ), inplace=True)
        quantized = F.embed(indices, self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_uniform_prior = self.base_model(quantized,
                                                quantized_as_input=True,
                                                test=True)

        # Generate images using pixelcnn prior
        indices = nn.Variable.from_numpy_array(
            np.zeros(shape=[num_images] + self.latent_shape))
        labels = F.randint(low=0, high=self.num_classes, shape=(num_images, 1))
        labels = F.one_hot(labels, shape=(self.num_classes, ))

        # Sample from pixelcnn - pixel by pixel
        import torch  # Numpy behavior is different and not giving correct output
        for i in range(self.latent_shape[0]):
            for j in range(self.latent_shape[1]):
                quantized = F.embed(indices.reshape((-1, )),
                                    self.base_model.vq.embedding_weight)
                quantized = F.transpose(
                    quantized.reshape([num_images] + self.latent_shape +
                                      [quantized.shape[-1]]), (0, 3, 1, 2))
                indices_sample = self.prior(quantized, labels)
                indices_prob = F.reshape(indices_sample,
                                         indices.shape +
                                         (indices_sample.shape[-1], ),
                                         inplace=True)[:, i, j]
                indices_prob = F.softmax(indices_prob)

                indices_prob_tensor = torch.from_numpy(indices_prob.d)
                sample = indices_prob_tensor.multinomial(1).squeeze().numpy()
                indices[:, i, j] = sample

        print(indices.d)
        quantized = F.embed(indices.reshape((-1, )),
                            self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_pixelcnn_prior = self.base_model(quantized,
                                                 quantized_as_input=True,
                                                 test=True)

        self.save_image(img_gen_uniform_prior,
                        os.path.join(path, 'generate_uniform.png'))
        self.save_image(img_gen_pixelcnn_prior,
                        os.path.join(path, 'generate_pixelcnn.png'))

        print('Random labels generated for pixelcnn prior:',
              list(F.max(labels, axis=1, only_index=True).d))
Esempio n. 5
0
    def _build(self):
        # infer variable
        self.infer_obs_t = nn.Variable((1, 4, 84, 84))
        # inference output
        self.infer_qs_t = self.q_function(self.infer_obs_t, self.num_actions,
                                          self.num_heads, 'q_func')
        self.infer_all = F.sink(*self.infer_qs_t)

        # train variables
        self.obss_t = nn.Variable((self.batch_size, 4, 84, 84))
        self.acts_t = nn.Variable((self.batch_size, 1))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size, 4, 84, 84))
        self.ters_tp1 = nn.Variable((self.batch_size, 1))
        self.weights = nn.Variable((self.batch_size, self.num_heads))

        # training output
        qs_t = self.q_function(self.obss_t, self.num_actions, self.num_heads,
                               'q_func')
        qs_tp1 = q_function(self.obss_tp1, self.num_actions, self.num_heads,
                            'target')
        stacked_qs_t = F.transpose(F.stack(*qs_t), [1, 0, 2])
        stacked_qs_tp1 = F.transpose(F.stack(*qs_tp1), [1, 0, 2])

        # select one dimension
        a_one_hot = F.reshape(F.one_hot(self.acts_t, (self.num_actions, )),
                              (-1, 1, self.num_actions))
        # mask output
        q_t_selected = F.sum(stacked_qs_t * a_one_hot, axis=2)
        q_tp1_best = F.max(stacked_qs_tp1, axis=2)
        q_tp1_best.need_grad = False

        # reward clipping
        clipped_rews_tp1 = clip_by_value(self.rews_tp1, -1.0, 1.0)

        # loss calculation
        y = clipped_rews_tp1 + self.gamma * q_tp1_best * (1.0 - self.ters_tp1)
        td = F.huber_loss(q_t_selected, y)
        self.loss = F.mean(F.sum(td * self.weights, axis=1))

        # optimizer
        self.solver = S.RMSprop(self.lr, 0.95, 1e-2)

        # weights and biases
        with nn.parameter_scope('q_func'):
            self.params = nn.get_parameters()
            self.head_params = []
            for i in range(self.num_heads):
                with nn.parameter_scope('head%d' % i):
                    self.head_params.append(nn.get_parameters())
            with nn.parameter_scope('shared'):
                self.shared_params = nn.get_parameters()
        with nn.parameter_scope('target'):
            self.target_params = nn.get_parameters()

        # set q function parameters to solver
        self.solver.set_parameters(self.params)
Esempio n. 6
0
    def forward_impl(self, inputs, outputs):
        x = inputs[0].data
        M = inputs[1].data
        y = outputs[0].data
        y.copy_from(x)

        if not self.training:
            return
        Mb = F.max(x, keepdims=True)
        F.identity(self.decay * M + (1 - self.decay) * Mb, outputs=[M])
Esempio n. 7
0
    def forward_impl(self, inputs, outputs):
        x = inputs[0].data
        M = inputs[1].data
        y = outputs[0].data
        y.copy_from(x)

        if not self.training:
            return
        Mb = F.max(x, keepdims=True)
        F.maximum2(M, Mb, outputs=[M])
Esempio n. 8
0
def encode_text(text):
    param_dict = nn.get_parameters()

    embed_dim = param_dict['text_projection'].shape[1]
    context_length = param_dict['positional_embedding'].shape[0]
    vocab_size = param_dict['token_embedding/W'].shape[0]
    transformer_width = param_dict['ln_final/W'].shape[0]
    transformer_heads = transformer_width // 64
    transformer_layers = len(
        set(
            k.split('/')[2] for k in param_dict.keys()
            if k.startswith(f'transformer/resblocks')))

    token_embedding = nn.parameter.get_parameter_or_create(
        name='token_embedding/W', shape=(vocab_size, transformer_width))
    x = F.embed(text, token_embedding)  # [batch_size, n_ctx, d_model]

    positional_embedding = nn.parameter.get_parameter_or_create(
        name='positional_embedding',
        shape=(context_length, transformer_width)).reshape(
            (1, context_length, transformer_width))
    x = x + positional_embedding

    x = F.transpose(x, (1, 0, 2))  # NLD -> LND

    x = transformer(x,
                    transformer_width,
                    transformer_layers,
                    transformer_heads,
                    attn_mask=build_attn_mask(context_length))

    x = F.transpose(x, (1, 0, 2))  # LND -> NLD

    ln_final_W = nn.parameter.get_parameter_or_create(
        name='ln_final/W', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    ln_final_b = nn.parameter.get_parameter_or_create(
        name='ln_final/b', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1))

    idx = F.max(text, axis=-1, only_index=True)
    idx.forward()
    x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1))
    text_projection = nn.parameter.get_parameter_or_create(
        name='text_projection', shape=(transformer_width, embed_dim)).reshape(
            (1, transformer_width, embed_dim))
    x = F.batch_matmul(x, text_projection)

    x = x.reshape((-1, embed_dim))

    return x
Esempio n. 9
0
    def ray_march(self, camloc, raydir, t0, t1, N, n_chunks, t_argmin=False):
        # Points computation
        BR, _ = t0.shape
        t0 = F.reshape(t0, (BR, 1, 1))
        t1 = F.reshape(t1, (BR, 1, 1))
        camloc = F.reshape(camloc, (BR, 1, 3))
        raydir = F.reshape(raydir, (BR, 1, 3))
        step = (t1 - t0) / (N - 1)
        intervals = F.reshape(F.arange(0, N), (1, N, 1))
        ts = t0 + step * intervals
        points = camloc + ts * raydir
        points = F.reshape(points, (BR * N, 3))

        # SDF computation
        sdf_points = []
        batch = (BR * N) // n_chunks
        for r in range(0, BR * N, batch):
            sdf_points.append(self.sdf(points[r:r + batch, :]))
        sdf_points = F.reshape(F.concatenate(*sdf_points, axis=0), (BR, N, 1)) if n_chunks != 1 else \
            F.reshape(sdf_points[0], (BR, N, 1))

        # t_argmin computation
        if t_argmin:
            idx_min = F.min(sdf_points, axis=1, keepdims=True, only_index=True)
            t_argmin = F.reshape(F.gather(ts, idx_min, axis=1, batch_dims=1),
                                 (BR, 1))
            return t_argmin

        # Intersection check
        points = F.reshape(points, (BR, N, 3))
        sdf_pos = F.greater_equal_scalar(sdf_points[:, :-1, :], 0)
        sdf_neg = F.less_equal_scalar(sdf_points[:, 1:, :], 0)
        mask_hit = sdf_pos * sdf_neg

        decreasing_consts = F.reshape(F.arange(N, 1, -1), (1, N - 1, 1))
        vals = mask_hit * decreasing_consts
        idx_max = F.max(vals, axis=1, only_index=True)

        points = points[:, :-1, :]
        x_hit = F.gather(points, idx_max, axis=1, batch_dims=1)
        x_hit = F.reshape(x_hit, (BR, 3))
        mask_hit = F.greater_scalar(F.sum(mask_hit, axis=1), 0)
        mask_hit = F.reshape(mask_hit, (BR, 1))

        x_hit_rm0 = x_hit
        step = F.reshape(step, (BR, 1))
        raydir = F.reshape(raydir, (BR, 3))
        x_hit_rm1 = x_hit_rm0 + step * raydir

        return x_hit_rm0, x_hit_rm1, mask_hit
Esempio n. 10
0
    def visualize_discrete_image(self, var, filename):
        assert var.ndim < 3, 'The discrete image should only consist of indices of the codebook vectors'
        if var.ndim == 2 and var.shape[1] > 1:
            var = F.max(var, axis=1, only_index=True)

        var = F.reshape(var, [-1, 1] + self.latent_shape, inplace=True)
        var = var / self.num_embedding

        img = nn.monitor.tile_images(var.d)
        plt.imshow(img, cmap='magma')
        plt.axis('off')
        plt.savefig(filename, bbox_inches='tight')
        plt.close()

        print('Reconstruction saved at {}'.format(filename))
Esempio n. 11
0
def pointer_net(query_embed, query_embed_mask, decoder_states, hidden_dim):
    """
    query_embed: (batch_size, max_query_length, E1)
    decoder_states: (batch_size, max_action_length, E2)
    """
    with nn.parameter_scope("pointer_net"):
        batch_size, max_query_length, _ = query_embed.shape
        _, max_action_length, _ = decoder_states.shape
        with nn.parameter_scope("layer1_input"):
            query_embed_trans = dense(query_embed,
                                      hidden_dim,
                                      base_axis=2,
                                      activation=lambda x: x)
        with nn.parameter_scope("layer1_h"):
            h_trans = dense(decoder_states,
                            hidden_dim,
                            base_axis=2,
                            activation=lambda x: x)

        query_embed_trans = F.reshape(
            query_embed_trans, (batch_size, 1, max_query_length, hidden_dim))
        query_embed_trans = F.broadcast(
            query_embed_trans,
            (batch_size, max_action_length, max_query_length, hidden_dim))
        h_trans = F.reshape(h_trans,
                            (batch_size, max_action_length, 1, hidden_dim))
        h_trans = F.broadcast(
            h_trans,
            (batch_size, max_action_length, max_query_length, hidden_dim))
        dense1_trans = F.tanh(query_embed_trans + h_trans)

        with nn.parameter_scope("layer2"):
            # scores: (batch_size, max_action_length, max_query_length, 1)
            scores = dense(dense1_trans,
                           1,
                           base_axis=3,
                           activation=lambda x: x)
        # scores: (batch_size, max_action_length, max_query_length)
        scores = F.reshape(scores,
                           (batch_size, max_action_length, max_query_length))
        scores = F.exp(scores - F.max(scores, axis=2, keepdims=True))
        mask = F.reshape(query_embed_mask, (batch_size, 1, max_query_length))
        mask = F.broadcast(mask,
                           (batch_size, max_action_length, max_query_length))
        scores = scores * mask
        scores = scores / F.sum(scores, axis=2, keepdims=True)

        return scores
Esempio n. 12
0
def get_preds_fromhm(hm, center=None, scale=None):
    """Obtain (x,y) coordinates given a set of N heatmaps. If the center
    and the scale is provided the function will return the points also in
    the original coordinate frame.

    Arguments:
        hm {numpy.array} -- the predicted heatmaps, of shape [B, N, W, H]

    Keyword Arguments:
        center {numpy.array} -- the center of the bounding box (default: {None})
        scale {float} -- face scale (default: {None})
    """
    idx = F.max(F.reshape(
        hm, (hm.shape[0], hm.shape[1], hm.shape[2] * hm.shape[3])),
                axis=2,
                only_index=True)
    idx.d += 1
    idx = F.reshape(idx, (1, 68, 1))
    preds = F.concatenate(idx, idx, axis=2)
    preds.d[...,
            0] = preds[...,
                       0].apply(d=(preds[..., 0].d - 1) % hm.shape[3] + 1).d
    preds.d[...,
            1] = preds[...,
                       1].apply(d=(preds[..., 1].d + 1) // hm.shape[2] + 1).d

    for i in range(preds.shape[0]):
        for j in range(preds.shape[1]):
            hm_ = hm[i, j, :]
            pX, pY = int(preds[i, j, 0].d) - 1, int(preds[i, j, 1].d) - 1

            if pX > 0 and pX < 63 and pY > 0 and pY < 63:
                preds.d[i,
                        j] += np.sign(hm_.d[pY, pX + 1] - hm_.d[pY, pX - 1]
                                      ) * .25, np.sign(hm_.d[pY + 1, pX] -
                                                       hm_.d[pY - 1, pX]) * .25

    preds.d -= .5
    preds_orig = F.constant(shape=preds.shape)
    if center is not None and scale is not None:
        for i in range(hm.shape[0]):
            for j in range(hm.shape[1]):
                d = transform(list(preds.d[i][j]), center, scale, hm.shape[2],
                              True)
                preds_orig.d[i, j] = d[0], d[1]

    return preds, preds_orig
Esempio n. 13
0
    def _build(self):
        # infer variable
        self.infer_obs_t = nn.Variable((1, 4, 84, 84))
        # inference output
        self.infer_q_t = self.q_function(self.infer_obs_t,
                                         self.num_actions,
                                         scope='q_func')

        # train variables
        self.obss_t = nn.Variable((self.batch_size, 4, 84, 84))
        self.acts_t = nn.Variable((self.batch_size, 1))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size, 4, 84, 84))
        self.ters_tp1 = nn.Variable((self.batch_size, 1))
        self.weights = nn.Variable((self.batch_size, 1))

        # training output
        q_t = self.q_function(self.obss_t, self.num_actions, scope='q_func')
        q_tp1 = self.q_function(self.obss_tp1,
                                self.num_actions,
                                scope='target_q_func')

        # select one dimension
        a_t_one_hot = F.one_hot(self.acts_t, (self.num_actions, ))
        q_t_selected = F.sum(q_t * a_t_one_hot, axis=1, keepdims=True)
        q_tp1_best = F.max(q_tp1, axis=1, keepdims=True)

        # loss calculation
        y = self.rews_tp1 + self.gamma * q_tp1_best * (1.0 - self.ters_tp1)
        self.td = q_t_selected - y
        self.loss = F.sum(F.huber_loss(q_t_selected, y) * self.weights)
        self.loss_sink = F.sink(self.td, self.loss)

        # optimizer
        self.solver = S.RMSprop(self.lr, 0.95, 1e-2)

        # weights and biases
        with nn.parameter_scope('q_func'):
            self.params = nn.get_parameters()
        with nn.parameter_scope('target_q_func'):
            self.target_params = nn.get_parameters()

        # set q function parameters to solver
        self.solver.set_parameters(self.params)
Esempio n. 14
0
def network_size_activations():
    """
    Returns total number of activations
    and size in KBytes (NNabla variable using `max` or `sum` operator)
    """
    kbytes = []
    num_activations = 0

    # get all parameters
    ps = nn.get_parameters(grad_only=False)
    for p in ps:
        if "Asize" in p:
            print(f"{p}\t{ps[p].d}")

            num_activations += ps[p].d

            if cfg.a_quantize is not None:
                if cfg.a_quantize in ['fp_relu', 'pow2_relu']:
                    # fixed quantization
                    n = nn.Variable((), need_grad=False)
                    n.d = cfg.a_bitwidth
                elif cfg.a_quantize in [
                        'parametric_fp_relu', 'parametric_fp_b_xmax_relu',
                        'parametric_fp_d_b_relu',
                        'parametric_pow2_b_xmax_relu',
                        'parametric_pow2_b_xmin_relu'
                ]:
                    # parametric quantization
                    s = p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/n")
                    n = F.round(
                        clip_scalar(ps[s], cfg.a_bitwidth_min,
                                    cfg.a_bitwidth_max))
                elif cfg.a_quantize in ['parametric_fp_d_xmax_relu']:
                    # these quantization methods do not have n, so we need to compute it!
                    # parametric quantization
                    d = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/d")]
                    xmax = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmax")]

                    # ensure that stepsize is in specified range and a power of two
                    d_q = quantize_pow2(
                        clip_scalar(d, cfg.a_stepsize_min, cfg.a_stepsize_max))

                    # ensure that dynamic range is in specified range
                    xmax = clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max)

                    # compute real `xmax`
                    xmax = F.round(xmax / d_q) * d_q

                    n = F.maximum_scalar(F.ceil(log2(xmax / d_q + 1.0)),
                                         cfg.a_bitwidth_min)
                elif cfg.a_quantize in ['parametric_pow2_xmin_xmax_relu']:
                    # these quantization methods do not have n, so we need to compute it!
                    # parametric quantization
                    xmin = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmin")]
                    xmax = ps[p.replace(
                        "/Asize", "/Aquant/" +
                        cfg.a_quantize.replace("_relu", "") + "/xmax")]

                    # ensure that dynamic ranges are in specified range and a power-of-two
                    xmin = quantize_pow2(
                        clip_scalar(xmin, cfg.a_xmin_min, cfg.a_xmin_max))
                    xmax = quantize_pow2(
                        clip_scalar(xmax, cfg.a_xmax_min, cfg.a_xmax_max))

                    # use ceil rounding
                    n = F.maximum_scalar(
                        F.ceil(log2(log2(xmax / xmin) + 1.) + 1.),
                        cfg.a_bitwidth_min)
                else:
                    raise ValueError("Unknown quantization method {}".format(
                        cfg.a_quantize))
            else:
                # float precision
                n = nn.Variable((), need_grad=False)
                n.d = 32.

            kbytes.append(
                F.reshape(n * ps[p].d / 8. / 1024., (1, ), inplace=False))

    if cfg.target_activation_type == 'max':
        _kbytes = F.max(F.concatenate(*kbytes))
    elif cfg.target_activation_type == 'sum':
        _kbytes = F.sum(F.concatenate(*kbytes))
    return num_activations, _kbytes
Esempio n. 15
0
def nonlocal_net(B_lab_map,
                 relu_layers,
                 temperature=0.001 * 5,
                 detach_flag=False,
                 WTA_scale_weight=1,
                 feature_noise=0):

    batch_size = B_lab_map.shape[0]
    channel = B_lab_map.shape[1]
    image_height = B_lab_map.shape[2]
    image_width = B_lab_map.shape[3]
    feature_height = int(image_height / 4)
    feature_width = int(image_width / 4)

    feature_channel = 64
    in_channels = feature_channel * 4
    inter_channels = 256

    # layer2_1
    A_feature2_1 = layer2_1(relu_layers[0])
    B_feature2_1 = layer2_1(relu_layers[4])
    # layer3_1
    A_feature3_1 = layer3_1(relu_layers[1])
    B_feature3_1 = layer3_1(relu_layers[5])
    # layer4_1
    A_feature4_1 = layer4_1(relu_layers[2])
    B_feature4_1 = layer4_1(relu_layers[6])
    # layer5_1
    A_feature5_1 = layer5_1(relu_layers[3])
    B_feature5_1 = layer5_1(relu_layers[7])

    if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
        A_feature5_1 = pad_replicate(A_feature5_1)
        B_feature5_1 = pad_replicate(B_feature5_1)
    A_features = layer(
        F.concatenate(
            A_feature2_1,
            A_feature3_1,
            A_feature4_1,
            A_feature5_1,
            axis=1),
        feature_channel * 4)
    B_features = layer(
        F.concatenate(
            B_feature2_1,
            B_feature3_1,
            B_feature4_1,
            B_feature5_1,
            axis=1),
        feature_channel * 4)
    # pairwise cosine similarity
    theta = PF.convolution(
        A_features, inter_channels, kernel=(
            1, 1), stride=(
            1, 1), name='theta')
    theta_re = F.reshape(theta, (batch_size, inter_channels, -1))
    theta_re = theta_re - F.mean(theta_re, axis=2,
                                 keepdims=True)  # center the feature
    theta_norm = F.norm(
        theta_re,
        p=2,
        axis=1,
        keepdims=True) + sys.float_info.epsilon
    theta_re = F.div2(theta_re, theta_norm)
    # 2*(feature_height*feature_width)*256
    theta_permute = F.transpose(theta_re, (0, 2, 1))
    phi = PF.convolution(
        B_features, inter_channels, kernel=(
            1, 1), stride=(
            1, 1), name='phi')
    phi_re = F.reshape(phi, (batch_size, inter_channels, -1))
    # center the feature
    phi_re = phi_re - F.mean(phi_re, axis=2, keepdims=True)
    phi_norm = F.norm(phi_re, p=2, axis=1, keepdims=True) + \
        sys.float_info.epsilon
    phi_re = F.div2(phi_re, phi_norm)
    # 2*(feature_height*feature_width)*(feature_height*feature_width)
    f = F.batch_matmul(theta_permute, phi_re)

    f_shape = f.shape
    f = F.reshape(f, (1,) + f_shape)
    f_similarity = F.reshape(f, (1,) + f_shape)
    similarity_map = F.max(f_similarity, axis=3, keepdims=True)
    similarity_map = F.reshape(
        similarity_map, (batch_size, 1, feature_height, feature_width))

    # f can be negative
    # if WTA_scale_weight == 1:
    f_WTA = f

    f_WTA = f_WTA / temperature

    f_WTA_sp = f_WTA.shape
    f_WTA = F.reshape(f_WTA, (f_WTA_sp[1], f_WTA_sp[2], f_WTA_sp[3]))
    # 2*1936*1936; softmax along the horizontal line (dim=-1)
    f_div_C = F.softmax(f_WTA, axis=2)

    # downsample the reference color
    B_lab = F.average_pooling(B_lab_map, (4, 4))
    B_lab = F.reshape(B_lab, (batch_size, channel, -1))
    B_lab = F.transpose(B_lab, (0, 2, 1))  # 2*1936*channel

    # multiply the corr map with color
    y = F.batch_matmul(f_div_C, B_lab)  # 2*1936*channel
    y = F.transpose(y, (0, 2, 1))
    y = F.reshape(
        y,
        (batch_size,
         channel,
         feature_height,
         feature_width))  # 2*3*44*44
    y = F.interpolate(y, scale=(4, 4), mode='nearest', align_corners=False)
    similarity_map = F.interpolate(
        similarity_map, scale=(
            4, 4), mode='nearest', align_corners=False)

    return y, similarity_map
Esempio n. 16
0
def cond_att_lstm(x,
                  parent_index,
                  mask,
                  context,
                  context_mask,
                  state_size,
                  att_hidden_size,
                  initial_state=None,
                  initial_cell=None,
                  hist=None,
                  dropout=0,
                  train=True,
                  w_init=None,
                  inner_w_init=None,
                  b_init=I.ConstantInitializer(0),
                  forget_bias_init=I.ConstantInitializer(1)):
    """
    x: (batch_size, length, input_size)
    parent_index: (batch_size, length)
    mask: (batch_size, length)
    context: (batch_size, context_length, context_size)
    context_mask: (batch_size, context_length)
    hist: (batch_size, l, state_size)
    """
    batch_size, length, input_size = x.shape
    _, context_length, context_size = context.shape

    if w_init is None:
        w_init = I.UniformInitializer(
            I.calc_uniform_lim_glorot(input_size, state_size))
    if inner_w_init is None:
        inner_w_init = orthogonal

    retain_prob = 1.0 - dropout
    z_w = nn.Variable((batch_size, 4, input_size), need_grad=False)
    z_w.d = 1
    z_u = nn.Variable((batch_size, 4, state_size), need_grad=False)
    z_u.d = 1

    if dropout > 0:
        if train:
            z_w = F.dropout(z_w, p=retain_prob)
            z_u = F.dropout(z_u, p=retain_prob)
        z_w *= retain_prob
        z_u *= retain_prob

    z_w = F.reshape(z_w, (batch_size, 4, 1, input_size))
    z_w = F.broadcast(z_w, (batch_size, 4, length, input_size))
    z_w = F.split(z_w, axis=1)
    z_u = F.split(z_u, axis=1)
    xi = z_w[0] * x
    xf = z_w[1] * x
    xc = z_w[2] * x
    xo = z_w[3] * x

    with nn.parameter_scope("cond_att_lstm"):
        # (batch_size, length, state_size)
        with nn.parameter_scope("lstm"):
            xi = PF.affine(
                xi,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wi")
            xf = PF.affine(
                xf,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=forget_bias_init,
                name="Wf")
            xc = PF.affine(
                xc,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wc")
            xo = PF.affine(
                xo,
                state_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="Wo")

        with nn.parameter_scope("context"):
            # context_att_trans: (batch_size, context_size, att_hidden_size)
            context_att_trans = PF.affine(
                context,
                att_hidden_size,
                base_axis=2,
                w_init=w_init,
                b_init=b_init,
                name="layer1_c")

    if initial_state is None:
        h = nn.Variable((batch_size, state_size), need_grad=False)
        h.data.zero()
    else:
        h = initial_state

    if initial_cell is None:
        c = nn.Variable((batch_size, state_size), need_grad=False)
        c.data.zero()
    else:
        c = initial_cell

    if hist is None:
        hist = nn.Variable((batch_size, 1, state_size), need_grad=False)
        hist.data.zero()

    # (batch_size, state_size)
    xi = split(xi, axis=1)
    xf = split(xf, axis=1)
    xc = split(xc, axis=1)
    xo = split(xo, axis=1)
    mask = F.reshape(mask, [batch_size, length, 1])  # (batch_size, length, 1)
    mask = F.broadcast(mask, [batch_size, length, state_size])
    # (batch_size, state_size)
    mask = split(mask, axis=1)
    # (batch_size, max_action_length)
    parent_index = parent_index + 1  # index == 0 means that parent is root
    # (batch_size)
    parent_index = split(parent_index, axis=1)

    hs = []
    cs = []
    ctx = []

    for i, f, c2, o, m, p in zip(xi, xf, xc, xo, mask, parent_index):
        h_num = hist.shape[1]
        with nn.parameter_scope("context"):
            h_att_trans = PF.affine(
                h,
                att_hidden_size,
                with_bias=False,
                w_init=w_init,
                name="layer1_h")  # (batch_size, att_hidden_size)
            h_att_trans = F.reshape(h_att_trans,
                                    (batch_size, 1, att_hidden_size))
            h_att_trans = F.broadcast(
                h_att_trans, (batch_size, context_length, att_hidden_size))
            att_hidden = F.tanh(context_att_trans + h_att_trans)
            att_raw = PF.affine(
                att_hidden, 1, base_axis=2, w_init=w_init,
                b_init=b_init)  # (batch_size, context_length, 1)
            att_raw = F.reshape(att_raw, (batch_size, context_length))
            ctx_att = F.exp(att_raw - F.max(att_raw, axis=1, keepdims=True))
            ctx_att = ctx_att * context_mask
            ctx_att = ctx_att / F.sum(ctx_att, axis=1, keepdims=True)
            ctx_att = F.reshape(ctx_att, (batch_size, context_length, 1))
            ctx_att = F.broadcast(ctx_att,
                                  (batch_size, context_length, context_size))
            ctx_vec = F.sum(
                context * ctx_att, axis=1)  # (batch_size, context_size)

        # parent_history
        p = F.reshape(p, (batch_size, 1))
        p = F.one_hot(p, (h_num, ))
        p = F.reshape(p, (batch_size, 1, h_num))
        par_h = F.batch_matmul(p, hist)  # [batch_size, 1, state_size]
        par_h = F.reshape(par_h, (batch_size, state_size))

        with nn.parameter_scope("lstm"):
            i_t = PF.affine(
                z_u[0] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Ui")
            i_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Ci")
            i_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pi")
            i_t = F.sigmoid(i + i_t)
            f_t = PF.affine(
                z_u[1] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uf")
            f_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cf")
            f_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pf")
            f_t = F.sigmoid(f + f_t)
            c_t = PF.affine(
                z_u[2] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uc")
            c_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Cc")
            c_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Pc")
            c_t = f_t * c + i_t * F.tanh(c2 + c_t)
            o_t = PF.affine(
                z_u[3] * h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Uo")
            o_t += PF.affine(
                ctx_vec,
                state_size,
                w_init=inner_w_init(context_size, state_size),
                with_bias=False,
                name="Co")
            o_t += PF.affine(
                par_h,
                state_size,
                w_init=inner_w_init(state_size, state_size),
                with_bias=False,
                name="Po")
            o_t = F.sigmoid(o + o_t)
            h_t = o_t * F.tanh(c_t)

            h_t = (1 - m) * h + m * h_t
            c_t = (1 - m) * c + m * c_t
            h = h_t
            c = c_t
            h_t = F.reshape(h_t, (batch_size, 1, state_size), inplace=False)
            c_t = F.reshape(c_t, (batch_size, 1, state_size), inplace=False)
            ctx_vec = F.reshape(
                ctx_vec, (batch_size, 1, context_size), inplace=False)
            hs.append(h_t)
            cs.append(c_t)
            ctx.append(ctx_vec)

            hist = F.concatenate(
                hist, h_t, axis=1)  # (batch_size, h_num + 1, state_size)

    return concatenate(
        *hs, axis=1), concatenate(
            *cs, axis=1), concatenate(
                *ctx, axis=1), hist
Esempio n. 17
0
    def _build(self):
        # infer variable
        self.infer_obs_t = infer_obs_t = nn.Variable((1, 4, 84, 84))
        # inference output
        self.infer_q_t,\
        self.infer_probs_t, _ = self.q_function(infer_obs_t, self.num_actions,
                                                self.min_v, self.max_v,
                                                self.num_bins, 'q_func')
        self.infer_t = F.sink(self.infer_q_t, self.infer_probs_t)

        # train variables
        self.obss_t = nn.Variable((self.batch_size, 4, 84, 84))
        self.acts_t = nn.Variable((self.batch_size, 1))
        self.rews_tp1 = nn.Variable((self.batch_size, 1))
        self.obss_tp1 = nn.Variable((self.batch_size, 4, 84, 84))
        self.ters_tp1 = nn.Variable((self.batch_size, 1))

        # training output
        q_t, probs_t, dists = self.q_function(self.obss_t, self.num_actions,
                                              self.min_v, self.max_v,
                                              self.num_bins, 'q_func')
        q_tp1, probs_tp1, _ = self.q_function(self.obss_tp1, self.num_actions,
                                              self.min_v, self.max_v,
                                              self.num_bins, 'target_q_func')

        expand_last = lambda x: F.reshape(x, x.shape + (1, ))
        flat = lambda x: F.reshape(x, (-1, 1))

        # extract selected dimension
        a_t_one_hot = expand_last(F.one_hot(self.acts_t, (self.num_actions, )))
        probs_t_selected = F.max(probs_t * a_t_one_hot, axis=1)
        # extract max dimension
        _, indices = F.max(q_tp1, axis=1, keepdims=True, with_index=True)
        a_tp1_one_hot = expand_last(F.one_hot(indices, (self.num_actions, )))
        probs_tp1_best = F.max(probs_tp1 * a_tp1_one_hot, axis=1)

        # clipping reward
        clipped_rews_tp1 = clip_by_value(self.rews_tp1, -1.0, 1.0)

        disc_q_tp1 = F.reshape(dists, (1, -1)) * (1.0 - self.ters_tp1)
        t_z = clip_by_value(clipped_rews_tp1 + self.gamma * disc_q_tp1,
                            self.min_v, self.max_v)

        # update indices
        b = (t_z - self.min_v) / ((self.max_v - self.min_v) /
                                  (self.num_bins - 1))
        l = F.floor(b)
        l_mask = F.reshape(F.one_hot(flat(l), (self.num_bins, )),
                           (-1, self.num_bins, self.num_bins))
        u = F.ceil(b)
        u_mask = F.reshape(F.one_hot(flat(u), (self.num_bins, )),
                           (-1, self.num_bins, self.num_bins))

        m_l = expand_last(probs_tp1_best * (1 - (b - l)))
        m_u = expand_last(probs_tp1_best * (b - l))
        m = F.sum(m_l * l_mask + m_u * u_mask, axis=1)
        m.need_grad = False

        self.loss = -F.mean(F.sum(m * F.log(probs_t_selected + 1e-10), axis=1))

        # optimizer
        self.solver = S.RMSprop(self.lr, 0.95, 1e-2)

        # weights and biases
        with nn.parameter_scope('q_func'):
            self.params = nn.get_parameters()
        with nn.parameter_scope('target_q_func'):
            self.target_params = nn.get_parameters()

        # set q function parameters to solver
        self.solver.set_parameters(self.params)
Esempio n. 18
0
def generate_attribute_direction(args, attribute_prediction_model):

    if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')):
        os.makedirs(args.weights_path, exist_ok=True)
        print(
            "Downloading the pretrained tf-converted weights. Please wait...")
        url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5"
        from nnabla.utils.data_source_loader import download
        download(url, os.path.join(args.weights_path, 'gen_params.h5'), False)

    nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5'))
    print('Loaded pretrained weights from tensorflow!')

    nn.load_parameters(args.classifier_weight_path)
    print(f'Loaded {args.classifier_weight_path}')

    batches = [
        args.batch_size for _ in range(args.num_images // args.batch_size)
    ]
    if args.num_images % args.batch_size != 0:
        batches.append(args.num_images -
                       (args.num_images // args.batch_size) * args.batch_size)

    w_plus, w_minus = 0.0, 0.0
    w_plus_count, w_minus_count = 0.0, 0.0
    pbar = trange(len(batches))
    for i in pbar:
        batch_size = batches[i]
        z = [F.randn(shape=(batch_size, 512)).data]

        z = [z[0], z[0]]

        for i in range(len(z)):
            z[i] = F.div2(
                z[i],
                F.pow_scalar(F.add_scalar(
                    F.mean(z[i]**2., axis=1, keepdims=True), 1e-8),
                             0.5,
                             inplace=True))

        # get latent code
        w = [mapping_network(z[0], outmaps=512, num_layers=8)]
        w += [mapping_network(z[1], outmaps=512, num_layers=8)]

        # truncation trick
        dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg",
                                                           shape=(1, 512))
        w = [lerp(dlatent_avg, _, 0.7) for _ in w]

        constant_bc = nn.parameter.get_parameter_or_create(
            name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4))
        constant_bc = F.broadcast(constant_bc,
                                  (batch_size, ) + constant_bc.shape[1:])

        gen = synthesis(w, constant_bc, noise_seed=100, mix_after=7)

        classifier_score = F.softmax(attribute_prediction_model(gen, True))
        confidence, class_pred = F.max(classifier_score,
                                       axis=1,
                                       with_index=True,
                                       keepdims=True)

        w_plus += np.sum(w[0].data * (class_pred.data == 0) *
                         (confidence.data > 0.65),
                         axis=0,
                         keepdims=True)
        w_minus += np.sum(w[0].data * (class_pred.data == 1) *
                          (confidence.data > 0.65),
                          axis=0,
                          keepdims=True)

        w_plus_count += np.sum(
            (class_pred.data == 0) * (confidence.data > 0.65))
        w_minus_count += np.sum(
            (class_pred.data == 1) * (confidence.data > 0.65))

        pbar.set_description(f'{w_plus_count} {w_minus_count}')

    # save attribute direction
    attribute_variation_direction = (w_plus / w_plus_count) - (w_minus /
                                                               w_minus_count)
    print(w_plus_count, w_minus_count)
    np.save(f'{args.classifier_weight_path.split("/")[0]}/direction.npy',
            attribute_variation_direction)