コード例 #1
0
    def test_basic_ops_value(self):
        np.random.seed(12082518)
        x = K.variable(np.random.randn(8, 8))
        y = K.variable(np.random.randn(8, 8))
        z = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool)
        w = K.variable(np.random.randint(0, 2, size=(8, 8)), dtype=np.bool)

        self.assertEqual(round(np.sum(K.eval(K.relu(x, alpha=0.12))) * 10000),
                         276733)
        self.assertEqual(round(np.sum(K.eval(K.elu(x, alpha=0.12))) * 10000),
                         289202)
        self.assertEqual(np.sum(K.eval(K.softmax(x))), 8.0)
        self.assertEqual(round(np.sum(K.eval(K.softplus(x))) * 10000), 554564)
        self.assertEqual(round(np.sum(K.eval(K.softsign(x))) * 100000), 211582)
        self.assertEqual(round(np.sum(K.eval(K.sigmoid(x))) * 10000), 330427)
        self.assertEqual(round(np.sum(K.eval(K.hard_sigmoid(x))) * 10000),
                         330836)
        self.assertEqual(round(np.sum(K.eval(K.tanh(x))) * 100000), 290165)
        self.assertEqual(round(np.sum(K.eval(K.square(x))) * 10000), 744492)
        self.assertEqual(round(np.sum(K.eval(K.sqrt(x))) * 10000), 300212)
        self.assertEqual(round(np.sum(K.eval(K.abs(x))) * 10000), 559979)
        self.assertEqual(np.sum(K.eval(K.sign(x))), 6.0)
        self.assertEqual(round(np.sum(K.eval(K.inv(x))) * 1000), 495838)
        self.assertEqual(round(np.sum(K.eval(K.exp(x))) * 1000), 122062)
        self.assertEqual(round(np.sum(K.eval(K.log(K.abs(x)))) * 10000),
                         -344491)
        self.assertEqual(np.sum(K.eval(K.round(x))), 5.0)
        self.assertEqual(round(np.sum(K.eval(K.pow(x, 8))) * 100), 398153)
        self.assertEqual(
            round(np.sum(K.eval(K.clip(x, -0.12, 0.12))) * 1000000), 620529)
        # TODO: pygpu (libgpuarray) still not support diag
        # self.assertEqual(round(np.sum(K.eval(K.diag(x))) * 100000), 325289)
        self.assertEqual(np.sum(K.eval(K.eye(12, 8))), 8.0)

        self.assertEqual(np.sum(K.eval(K.eq(z, w))), 38)
        self.assertEqual(np.sum(K.eval(K.neq(z, w))), 26)
        self.assertEqual(np.sum(K.eval(K.gt(x, y))), 33)
        self.assertEqual(np.sum(K.eval(K.ge(x, y))), 33)
        self.assertEqual(np.sum(K.eval(K.lt(x, y))), 31)
        self.assertEqual(np.sum(K.eval(K.le(x, y))), 31)
        self.assertEqual(round(np.sum(K.eval(K.switch(z, x, y))) * 100000),
                         139884)
コード例 #2
0
    def score(self,
              query,
              key=None,
              scale=1,
              window_width=None,
              q_proj=None,
              target_proj=None):
        r"""
    Arguments:
      query: Query (or target sequence) tensor of shape
        `[batch_size, Tq, dim]` or `[num_heads, batch_size, Tq, dim]` in case
        of multi-heads attention.
      key: Key (or source sequence) tensor of shape
        `[batch_size, Tv, dim]` or `[num_heads, batch_size, Tv, dim]` in case
        of multi-heads attention.
      scale: single `Scalar` or `Tensor` of shape `[dim]` for scaling
        the attention scores, suggested `1/sqrt(dim)` in (Vaswani et al. 2017).
      window_width : `None`, `Integer` or `Float` ([0, 1]). The total number of
        frames for a single window in local attention (i.e. `left + 1 + right`)
        Can be given as a fixed number of frames (`int`), or percentage of
        the sequence length (`float`). If `None`, use `Tq`
      q_proj : `Dense`, instance of dense or fully connected layer
        - for `ScoreLocation`, the number of hidden unit is `1`
        - for `ScoreGeneral`, the number of hidden unit is `dim`
      target_proj : `Dense`, for predictive local attention, applying
        a fully connected network on target sequence (i.e. the query) to
        predict the position on source sequence (i.e. the key).
        The layer must has output dimension equal to 1 and return logit value.

    Returns:
      Tensor of shape `[num_heads, batch_size, Tq, Tv]`, or
       `[num_heads, batch_size, Tq, 1]` if `ScoreLocation`
    """
        ### Check if multi-head attention is used
        num_heads = _get_num_heads(query)
        if num_heads > 0:
            query = bk.reshape(query, [-1] + [i for i in query.shape[2:]])
            if key is not None:
                key = bk.reshape(key, [-1] + [i for i in key.shape[2:]])
        Tq = query.shape[1]
        Tv = Tq if key is None else key.shape[1]
        # scale shape is `[]` or `[dim]`
        scale = bk.array(scale, dtype=query.dtype)
        ### Check the window width
        if window_width is None:
            window_width = Tq
        elif window_width < 1:
            window_width = window_width * Tv
        window_width = int(window_width)
        ### Locative attention
        if AttentionMechanism.ScoreLocation in self:
            if PosLocalM in self or PosLocalP in self:
                raise NotImplementedError(
                    "ScoreLocation only support Global attention, but given: %s"
                    % str(self))
            # [batch_size * num_heads, Tq, dim]
            scores = bk.reduce_mean(scale) * q_proj(query)
            assert scores.shape[-1] == 1, \
              " q_proj must have only 1 hidden unit, but given %d" % scores.shape[-1]
        ### Other score mode need the key tensor
        else:
            if key is None:
                raise ValueError(
                    "key must be provided for attention type: %s" % str(self))
            ### Attention position (local or global)
            if PosLocalM in self:
                key = key[:, -window_width:]
            elif PosLocalP in self:
                pt = bk.sigmoid(target_proj(bk.reshape(query, ([0], -1))))
                assert pt.shape[-1] == 1, \
                  "target_proj must project the query [., Tq * dim] to [., 1], i.e. " + \
                    "predicting the attention position on source sequence using " + \
                      "knowledge from target sequence."
                pt = Tv * pt  # `[batch_size * num_heads, 1]`
                # `[batch_size * num_heads, Tv]`
                # Eq (10) (Luong et al. 2015)
                gauss_est = bk.exp(
                    -bk.square(bk.arange(Tv, dtype=pt.dtype) - pt) /
                    (2 * bk.square(window_width / 2)))
                # `[batch_size * num_heads, 1, Tv]`
                gauss_est = bk.expand_dims(gauss_est, axis=1)
            ### Additive or concat method
            if AttentionMechanism.ScoreAdditive in self:
                # [batch_size * num_heads, Tq, 1, dim]
                q = bk.expand_dims(query, axis=2)
                # [batch_size * num_heads, 1, Tv, dim]
                k = bk.expand_dims(key, axis=1)
                # [batch_size * num_heads, Tq, Tv]
                scores = bk.reduce_sum(scale * bk.tanh(q + k), axis=-1)
            ### Dot product or multiplicative scoring
            elif AttentionMechanism.ScoreDotProd in self:
                # this is a trick to make attention_scale broadcastable when
                # scale_tied=False
                scores = bk.matmul(scale * query, bk.swapaxes(key, 1, 2))
            ### cosine scoring
            elif AttentionMechanism.ScoreCosine in self:
                # [batch_size * num_heads, Tq, 1, dim]
                q = bk.expand_dims(query, axis=2)
                # [batch_size * num_heads, 1, Tv, dim]
                k = bk.expand_dims(key, axis=1)
                # [batch_size * num_heads, Tq, Tv, dim]
                scores = (q * k) / (bk.norm(q, p=2) * bk.norm(k, p=2))
                scores = bk.reduce_sum(scale * scores, axis=-1, keepdims=False)
            ### general method with only project on the query
            elif AttentionMechanism.ScoreGeneral in self:
                query = q_proj(query)
                assert query.shape[-1] == key.shape[-1], \
                  " q_proj must have %d hidden units, but given %d units" % \
                    (key.shape[-1], query.shape[-1])
                scores = bk.matmul(scale * query, bk.swapaxes(key, 1, 2))
            else:
                raise NotImplementedError(
                    "No support for attention_type='%s'" % str(self))
            ### applying the local-predictive attention
            if PosLocalP in self:
                scores = scores * gauss_est
        ### get back the multi-heads shape
        if num_heads > 0:
            scores = bk.reshape(scores,
                                shape=[num_heads, -1] +
                                [i for i in scores.shape[1:]])
        return scores
コード例 #3
0
ファイル: models_variational.py プロジェクト: imito/odin
def convolutional_vae(X, saved_states, **kwargs):
    """ convolutional_vae

    Return
    ------
    [y_encoder, y_decoder]

    States
    ------
    [f_inference (encoder), f_generative (decoder)]

    """
    n = kwargs.get('n', 10)
    batch_size = K.get_shape(X)[0]
    if batch_size is None:
        raise ValueError("You must specify batch_size dimension for the input placeholder.")
    # ====== init ====== #
    if saved_states is None:
        # Encoder
        f_inference = N.Sequence([
            N.Reshape(shape=(-1, 28, 28, 1)),
            N.Conv(num_filters=32, filter_size=3, strides=1, pad='valid',
                   b_init=init_ops.constant_initializer(0.), activation=K.elu),
            N.Conv(num_filters=64, filter_size=5, strides=2, pad='same',
                   b_init=init_ops.constant_initializer(0.), activation=K.elu),

            N.Dropout(level=0.1),
            N.Flatten(outdim=2),

            N.Dense(num_units=n * 2, b_init=None),
            N.BatchNorm(axes=0)
        ], debug=True, name='Encoder')
        # Decoder
        f_generative = N.Sequence([
            N.Dimshuffle(pattern=(0, 'x', 'x', 1)),
            N.TransposeConv(num_filters=64, filter_size=3, strides=1, pad='valid',
                            b_init=init_ops.constant_initializer(0.), activation=K.elu),
            N.TransposeConv(num_filters=32, filter_size=5, strides=2, pad='same',
                            b_init=init_ops.constant_initializer(0.), activation=K.elu),
            N.TransposeConv(num_filters=1, filter_size=13, strides=3, pad='valid',
                            b_init=None),
            N.BatchNorm(activation=K.linear),

            N.Flatten(outdim=3)
        ], debug=True, name="Decoder")
    else:
        f_inference, f_generative = saved_states
    # ====== Perfrom ====== #
    # Encoder
    y_encoder = f_inference(K.cast(X, 'float32'))
    mu = y_encoder[:, :n]
    sigma = K.softplus(y_encoder[:, n:])
    qz = Normal(mu=mu, sigma=sigma, name='Normal_qz')
    # Decoder
    z = Normal(mu=K.zeros(shape=(batch_size, n)),
               sigma=K.ones(shape=(batch_size, n)), name="Normal_pz")
    logits = f_generative(z)
    X_reconstruct = Bernoulli(logits=logits)
    # inference
    params = f_inference.parameters + f_generative.parameters
    inference = ed.KLqp(latent_vars={z: qz}, data={X_reconstruct: X})
    # ====== get cost for training ====== #
    # Bind p(x, z) and q(z | x) to the same placeholder for x.
    if K.is_training():
        import tensorflow as tf
        inference.initialize()
        if True:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            updates = optimizer.apply_gradients(
                optimizer.compute_gradients(inference.loss, var_list=params))
            init = tf.global_variables_initializer()
            init.run()
            f_train = K.function(X, inference.loss, updates)
        else:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            inference.initialize(optimizer=optimizer, var_list=params)
            init = tf.global_variables_initializer()
            init.run()
            f_train = lambda x: inference.update(feed_dict={X: x})['loss']
    samples = K.sigmoid(logits)
    return (samples, z, qz), (f_inference, f_generative)
コード例 #4
0
def convolutional_vae(X, saved_states, **kwargs):
    """ convolutional_vae

    Return
    ------
    [y_encoder, y_decoder]

    States
    ------
    [f_inference (encoder), f_generative (decoder)]

    """
    n = kwargs.get('n', 10)
    batch_size = K.get_shape(X)[0]
    if batch_size is None:
        raise ValueError(
            "You must specify batch_size dimension for the input placeholder.")
    # ====== init ====== #
    if saved_states is None:
        # Encoder
        f_inference = N.Sequence([
            N.Reshape(shape=(-1, 28, 28, 1)),
            N.Conv(num_filters=32,
                   filter_size=3,
                   strides=1,
                   pad='valid',
                   b_init=init_ops.constant_initializer(0.),
                   activation=K.elu),
            N.Conv(num_filters=64,
                   filter_size=5,
                   strides=2,
                   pad='same',
                   b_init=init_ops.constant_initializer(0.),
                   activation=K.elu),
            N.Dropout(level=0.1),
            N.Flatten(outdim=2),
            N.Dense(num_units=n * 2, b_init=None),
            N.BatchNorm(axes=0)
        ],
                                 debug=True,
                                 name='Encoder')
        # Decoder
        f_generative = N.Sequence([
            N.Dimshuffle(pattern=(0, 'x', 'x', 1)),
            N.TransposeConv(num_filters=64,
                            filter_size=3,
                            strides=1,
                            pad='valid',
                            b_init=init_ops.constant_initializer(0.),
                            activation=K.elu),
            N.TransposeConv(num_filters=32,
                            filter_size=5,
                            strides=2,
                            pad='same',
                            b_init=init_ops.constant_initializer(0.),
                            activation=K.elu),
            N.TransposeConv(num_filters=1,
                            filter_size=13,
                            strides=3,
                            pad='valid',
                            b_init=None),
            N.BatchNorm(activation=K.linear),
            N.Flatten(outdim=3)
        ],
                                  debug=True,
                                  name="Decoder")
    else:
        f_inference, f_generative = saved_states
    # ====== Perfrom ====== #
    # Encoder
    y_encoder = f_inference(K.cast(X, 'float32'))
    mu = y_encoder[:, :n]
    sigma = K.softplus(y_encoder[:, n:])
    qz = Normal(mu=mu, sigma=sigma, name='Normal_qz')
    # Decoder
    z = Normal(mu=K.zeros(shape=(batch_size, n)),
               sigma=K.ones(shape=(batch_size, n)),
               name="Normal_pz")
    logits = f_generative(z)
    X_reconstruct = Bernoulli(logits=logits)
    # inference
    params = f_inference.parameters + f_generative.parameters
    inference = ed.KLqp(latent_vars={z: qz}, data={X_reconstruct: X})
    # ====== get cost for training ====== #
    # Bind p(x, z) and q(z | x) to the same placeholder for x.
    if K.is_training():
        import tensorflow as tf
        inference.initialize()
        if True:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            updates = optimizer.apply_gradients(
                optimizer.compute_gradients(inference.loss, var_list=params))
            init = tf.global_variables_initializer()
            init.run()
            f_train = K.function(X, inference.loss, updates)
        else:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            inference.initialize(optimizer=optimizer, var_list=params)
            init = tf.global_variables_initializer()
            init.run()
            f_train = lambda x: inference.update(feed_dict={X: x})['loss']
    samples = K.sigmoid(logits)
    return (samples, z, qz), (f_inference, f_generative)