Beispiel #1
0
def simple_context(X, mask):

    desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters.
                                                      max_len_desc:, :]

    head_activations, head_words = head[:, :, :parameters.
                                        activation_rnn_size], head[:, :,
                                                                   parameters.
                                                                   activation_rnn_size:]
    desc_activations, desc_words = desc[:, :, :parameters.
                                        activation_rnn_size], desc[:, :,
                                                                   parameters.
                                                                   activation_rnn_size:]

    activation_energies = K.batch_dot(head_activations,
                                      desc_activations,
                                      axes=(2, 2))

    activation_energies = activation_energies + -1e20 * K.expand_dims(
        1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1)

    activation_energies = K.reshape(activation_energies,
                                    (-1, parameters.max_len_desc))
    activation_weights = K.softmax(activation_energies)
    activation_weights = K.reshape(
        activation_weights,
        (-1, parameters.max_len_head, parameters.max_len_desc))

    desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
    return K.concatenate((desc_avg_word, head_words))
Beispiel #2
0
def simple_context(X, mask):
    """
    Simple context calculation layer logic
    X = (batch_size, time_steps, units)
    time_steps are nothing but number of words in our case.
    """
    # segregrate heading and desc
    desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters.max_len_desc:, :]
    # segregrate activation and context part
    head_activations, head_words = head[:, :, :parameters.activation_rnn_size], head[:, :, parameters.activation_rnn_size:]
    desc_activations, desc_words = desc[:, :, :parameters.activation_rnn_size], desc[:, :, parameters.activation_rnn_size:]

    # p=(bacth_size, length_desc_words, rnn_units)
    # q=(bacth_size, length_headline_words, rnn_units)
    # K.dot(p,q) = (bacth_size, length_desc_words,length_headline_words)
    activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))

    # make sure we dont use description words that are masked out
    activation_energies = activation_energies + -1e20 * K.expand_dims(1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1)

    # for every head word compute weights for every desc word
    activation_energies = K.reshape(activation_energies, (-1, parameters.max_len_desc))
    activation_weights = K.softmax(activation_energies)
    activation_weights = K.reshape(activation_weights, (-1, parameters.max_len_head, parameters.max_len_desc))

    # for every head word compute weighted average of desc words
    desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
    return K.concatenate((desc_avg_word, head_words))
Beispiel #3
0
def pairwise_attention_dot(x1,
                           x2,
                           x1_mask=None,
                           x2_mask=None,
                           return_score=False,
                           scope_name='pairwise_attention_dot',
                           reuse=tf.AUTO_REUSE):
    '''

    :param x1: [N, S1, d]
    :param x2: [N, S2, d]
    :param x1_mask: [N, S1]
    :param x2_mask: [N, S2], 1 as valid position
    :return:
    '''
    with tf.variable_scope(scope_name, reuse=reuse):
        alpha = tf.matmul(x1, tf.transpose(x2, perm=[0, 2, 1]))  # [N, S1, S2]
        alpha1 = alpha
        if not x2_mask is None:
            alpha1 = add_mask(alpha, x2_mask, expand_axis=(1, ))

        alpha2 = alpha
        if not x1_mask is None:
            alpha2 = add_mask(alpha, x1_mask, expand_axis=(2, ))

        alpha1 = tf.nn.softmax(alpha1, axis=2)
        alpha2 = tf.nn.softmax(alpha2, axis=1)
        x1_att = K.batch_dot(alpha1, x2, axes=[2, 1])
        x2_att = K.batch_dot(alpha2, x1, axes=[1, 1])
        if return_score:
            return x1_att, x2_att, alpha1, alpha2, alpha
        return x1_att, x2_att
Beispiel #4
0
    def call(self, inputs, mask=None):
        # output = softmax(score)
        k, q = inputs
        if len(q.shape) == 2:
            q = K.expand_dims(q, axis=1)
        # k: (?, K_LEN, EMBED_DIM,)
        # q: (?, Q_LEN, EMBED_DIM,)
        # score: (?, Q_LEN, K_LEN,)
        if self.score_function == 'scaled_dot_product':
            kt = K.permute_dimensions(k, (0, 2, 1))
            qkt = K.batch_dot(q, kt)
            score = qkt / self.EMBED_DIM
        elif self.score_function == 'mlp':
            kq = K.concatenate([k, q], axis=1)
            kqw2 = K.tanh(K.dot(kq, self.W2))
            score = K.permute_dimensions(K.dot(self.W1, kqw2), (1, 0, 2))
        elif self.score_function == 'bi_linear':
            qw = K.dot(q, self.W)
            kt = K.permute_dimensions(k, (0, 2, 1))
            score = K.batch_dot(qw, kt)
        else:
            raise RuntimeError('invalid score_function')
        score = K.softmax(score)
        # if mask is not None:
        #     score *= K.cast(mask[0], K.floatx())
        # output: (?, Q_LEN, EMBED_DIM,)
        output = K.batch_dot(score, k)

        return output
Beispiel #5
0
    def call(self, u_vecs, **kwargs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            o = K.batch_dot(c, u_hat_vecs, [2, 2])
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                b = K.batch_dot(o, u_hat_vecs, [2, 3])
                if K.backend() == 'theano':
                    b = K.sum(b, axis=1)

        return self.activation(o)
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        #xw = K.reshape(K.dot(x[0], K.reshape(self.W, (features_dim, features_dim))), (-1, features_dim))
        #yavg=K.reshape(K.mean(K.mean(x[1], axis=1, keepdims=True),axis=0, keepdims=True), (features_dim,-1))
        xw1 = K.dot(x[0], K.reshape(self.W1, (features_dim, features_dim)))
        xw2 = K.dot(x[1], K.reshape(self.W2, (features_dim, features_dim)))
        xw1t = K.permute_dimensions(xw1, [0, 2, 1])
        xw2t = K.permute_dimensions(xw2, [0, 2, 1])
        xw11 = K.batch_dot(xw1, xw1t) / (step_dim**0.5)
        xw12 = K.batch_dot(xw1, xw2t) / (step_dim**0.5)

        s11 = self.ll * K.softmax(xw11)
        s12 = (1 - self.ll) * K.softmax(xw12)

        eij = s11 + s12
        print(eij.get_shape())
        V = x[0] * K.mean(eij, axis=2, keepdims=True)
        if self.get_alpha:
            return eij
        else:
            if self.get_sequence:
                return V
            else:
                return K.sum(V, axis=1)
Beispiel #7
0
    def call(self, inputs, mask=None, training=None):
        inputs, relatives, memories, bias_context, bias_relative = inputs
        full = K.concatenate([memories, inputs], axis=1)      # (batch, prev_len + seq_len, units)
        w_q = K.dot(inputs, self.kernel_q)                    # (batch, seq_len, units)
        w_kv = K.dot(full, self.kernel_kv)                    # (batch, prev_len + seq_len, units * 2)
        w_r = K.dot(relatives, self.kernel_r)                 # (batch, prev_len + seq_len, units)
        if self.use_bias:
            w_q = K.bias_add(w_q, self.bias_q)
            w_kv = K.bias_add(w_kv, self.bias_kv)
            w_r = K.bias_add(w_r, self.bias_r)
        if self.activation is not None:
            w_q = self.activation(w_q)
            w_kv = self.activation(w_kv)
            w_r = self.activation(w_r)

        w_k = w_kv[:, :, :self.units]                         # (batch, prev_len + seq_len, units)
        w_v = w_kv[:, :, self.units:]                         # (batch, prev_len + seq_len, units)

        w_qc = K.bias_add(w_q, bias_context)
        w_qc = self._reshape_to_batches(w_qc)                 # (batch * n_head, seq_len, units_head)
        w_k = self._reshape_to_batches(w_k)                   # (batch * n_head, prev_len + seq_len, units_head)
        a_context = K.batch_dot(w_qc, w_k, axes=2)            # (batch * n_head, seq_len, prev_len + seq_len)

        w_qr = K.bias_add(w_q, bias_relative)
        w_qr = self._reshape_to_batches(w_qr)                 # (batch * n_head, seq_len, units_head)
        w_r = self._reshape_to_batches(w_r)                   # (batch * n_head, prev_len + seq_len, units_head)
        a_relative = K.batch_dot(w_qr, w_r, axes=2)           # (batch * n_head, seq_len, prev_len + seq_len)
        a_relative = self._relative_shift(a_relative)         # (batch * n_head, seq_len, prev_len + seq_len)

        att = (a_context + a_relative) / K.sqrt(K.constant(self.units_head, dtype=K.floatx()))
        exp = K.exp(att - K.max(att, axis=-1, keepdims=True))

        q_len, k_len = K.shape(w_q)[1], K.shape(w_k)[1]
        indices = K.expand_dims(K.arange(0, k_len), axis=0)
        upper = K.expand_dims(K.arange(k_len - q_len, k_len), axis=-1)
        exp *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0)
        if mask is not None and mask[0] is not None:
            mask = K.cast(mask[0], K.floatx())
            mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask], axis=1)
            exp *= K.expand_dims(self._reshape_mask(mask), axis=1)

        att = exp / K.sum(exp, axis=-1, keepdims=True)
        if self.att_drop_layer is not None:
            att = self.att_drop_layer(att, training=training)
        w_v = self._reshape_to_batches(w_v)                   # (batch * n_head, prev_len + seq_len, units_head)
        w_o = K.batch_dot(att, w_v)                           # (batch * n_head, seq_len, units_head)

        w_o = self._reshape_from_batches(w_o)                 # (batch, seq_len, units)
        w_o = K.dot(w_o, self.kernel_o)                       # (batch, seq_len, units)
        if self.use_bias:
            w_o = K.bias_add(w_o, self.bias_o)
        if self.activation is not None:
            w_o = self.activation(w_o)

        # Add shape information to tensor when using `tf.keras`
        input_shape = K.int_shape(inputs)
        if input_shape[1] is not None:
            w_o = K.reshape(w_o, (-1,) + input_shape[1:])
        return w_o
Beispiel #8
0
 def __call__(self, q, k, v, mask):
     attn = Lambda(
         lambda x: K.batch_dot(x[0], x[1], axes=[2, 2]) / self.temper)(
             [q, k])
     if mask is not None:
         mmask = Lambda(lambda x: (-1e+10) * (1 - x))(mask)
         attn = Add()([attn, mmask])
     attn = Activation('softmax')(attn)
     attn = self.dropout(attn)
     output = Lambda(lambda x: K.batch_dot(x[0], x[1]))([attn, v])
     return output, attn
Beispiel #9
0
    def call(self, u_vecs, scores=None):
        # if self.share_weights:
        #     u_hat_vecs = K.conv1d(u_vecs, self.W)
        # else:
        #     u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])
        u_hat_vecs = u_vecs
        batch_size = K.shape(u_vecs)[0]

        input_num_capsule = K.shape(u_vecs)[1]
        if scores is not None:
            scores = K.permute_dimensions(scores, (0, 2, 1))
            u_hat_vecs = u_hat_vecs * scores

        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))

        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]

        # biases = self.add_weight(name='capsule_kernel',
        #                          shape=(batch_size1, self.num_capsule, self.dim_capsule),
        #                          # shape=self.kernel_size,
        #                          dtype=tf.float32,
        #                          initializer='glorot_uniform',
        #                          trainable=True)
        # biases = tf.get_variable(name='bias',
        # shape=(self.num_capsule, self.dim_capsule), initializer='glorot_uniform',)
        for i in range(self.routings):
            # b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            # c = K.softmax(b)
            leak = tf.zeros_like(b, optimize=True)
            leak = tf.reduce_sum(leak, axis=1, keep_dims=True)
            leaky_logits = tf.concat([leak, b], axis=1)
            leaky_routing = tf.nn.softmax(leaky_logits, dim=1)
            c = tf.split(leaky_routing, [1, self.num_capsule], axis=1)[1]

            # c = K.permute_dimensions(c, (0, 2, 1))
            # b = K.permute_dimensions(b, (0, 2, 1))
            o = K.batch_dot(c, u_hat_vecs, [2, 2])  # + self.biases

            outputs = self.activation(o)

            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])
        # self.c = scores
        return outputs
Beispiel #10
0
def selfattoptions(args):
    q = args[0]
    k = args[1]
    v = args[2]

    q = tf.expand_dims(q, -1)
    k = tf.expand_dims(k, -1)
    v = tf.expand_dims(v, -1)

    QK = K.batch_dot(q, K.permute_dimensions(k, [0, 2, 1]))
    QK = QK / (20**0.5)
    QK = K.softmax(QK)
    MV = K.batch_dot(QK, v)
    MV = tf.squeeze(MV, -1)
    return MV
Beispiel #11
0
def gram_matrix(x, norm_by_channels=False):
    '''
    Returns the Gram matrix of the tensor x.
    '''
    if K.ndim(x) == 3:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
        shape = K.shape(x)
        C, H, W = shape[0], shape[1], shape[2]
        gram = K.dot(features, K.transpose(features))
    elif K.ndim(x) == 4:
        # Swap from (H, W, C) to (B, C, H, W)
        x = K.permute_dimensions(x, (0, 3, 1, 2))
        shape = K.shape(x)
        B, C, H, W = shape[0], shape[1], shape[2], shape[3]
        # Reshape as a batch of 2D matrices with vectorized channels
        features = K.reshape(x, K.stack([B, C, H * W]))
        # This is a batch of Gram matrices (B, C, C).
        gram = K.batch_dot(features, features, axes=2)
    else:
        raise ValueError(
            'The input tensor should be either a 3d (H, W, C) or 4d (B, H, W, C) tensor.'
        )
    # Normalize the Gram matrix
    if norm_by_channels:
        denominator = C * H * W  # Normalization from Johnson
    else:
        denominator = H * W  # Normalization from Google
    gram = gram / K.cast(denominator, x.dtype)

    return gram
Beispiel #12
0
 def _merge_function(self, inputs):
     base_layer_utils.no_ragged_support(inputs, self.name)
     if len(inputs) != 2:
         raise ValueError(
             'A `Dot` layer should be called on exactly 2 inputs')
     x1 = inputs[0]
     x2 = inputs[1]
     if isinstance(self.axes, int):
         if self.axes < 0:
             axes = [
                 self.axes % backend.ndim(x1), self.axes % backend.ndim(x2)
             ]
         else:
             axes = [self.axes] * 2
     else:
         axes = []
         for i in range(len(self.axes)):
             if self.axes[i] < 0:
                 axes.append(self.axes[i] % backend.ndim(inputs[i]))
             else:
                 axes.append(self.axes[i])
     if self.normalize:
         x1 = nn.l2_normalize(x1, axis=axes[0])
         x2 = nn.l2_normalize(x2, axis=axes[1])
     output = backend.batch_dot(x1, x2, axes)
     return output
Beispiel #13
0
    def call(self, x, mask=None):
        '''
        shape=(batch_size,new_time_step,filters)
     x_cont=Tensor("layer_dropout_5/cond/Identity:0", shape=(None, None, 128), dtype=float32)
x_ques=Tensor("layer_dropout_11/cond/Identity:0", shape=(None, None, 128), dtype=float32)
c_mask=Tensor("batch_slice_4/Slice:0", shape=(None, None), dtype=bool)#
q_mask=Tensor("batch_slice_5/Slice:0", shape=(None, None), dtype=bool)
        '''
        x_cont, x_ques, c_mask, q_mask = x
        # get similarity matrix S
        ##K.dot(x_cont, self.W0)维度变化: [batch_size,time_step,dim] *[dim,1] =[batch_size,time_step,1]
        subres0 = K.tile(K.dot(x_cont, self.W0), [1, 1, self.q_maxlen])
        subres1 = K.tile(
            K.permute_dimensions(K.dot(x_ques, self.W1), pattern=(0, 2, 1)),
            [1, self.c_maxlen, 1])
        subres2 = K.batch_dot(x_cont * self.W2,
                              K.permute_dimensions(x_ques, pattern=(0, 2, 1)))
        S = subres0 + subres1 + subres2
        S += self.bias
        q_mask = tf.expand_dims(q_mask, 1)
        #默认是对最后一维度,即axis=-1
        S_ = tf.nn.softmax(self.mask_logits(S, q_mask))
        c_mask = tf.expand_dims(c_mask, 2)
        S_T = K.permute_dimensions(
            tf.nn.softmax(self.mask_logits(S, c_mask), axis=1), (0, 2, 1))
        c2q = tf.matmul(S_, x_ques)
        q2c = tf.matmul(tf.matmul(S_, S_T), x_cont)
        result = K.concatenate([x_cont, c2q, x_cont * c2q, x_cont * q2c],
                               axis=-1)

        return result
Beispiel #14
0
 def call(self, x, mask=None):
     energy = self.activation(K.dot(x, self.W0) + self.b0)
     #energy=self.activation(K.dot(energy, self.W) + self.b)
     energy = K.dot(energy, self.W) + self.b
     energy = K.reshape(energy, (-1, self.input_length))
     energy = K.softmax(energy)
     xx = K.batch_dot(energy, x, axes=(1, 1))
     all = K.concatenate([xx, energy])
     return all
Beispiel #15
0
    def call(self, inputs):
        X = inputs[0]
        A = inputs[1]

        A_t = A + self.I
        D_t = tf.linalg.diag(tf.pow(K.sum(A_t, axis=2), -0.5))
        A_t = K.batch_dot(K.batch_dot(D_t, A_t), D_t)

        X_p = tf.tensordot(K.batch_dot(A_t, X), self.W, axes=[[-1], [0]])

        if self.activation is not None:
            X_p = self.activation(X_p)

        if self.output_adjacency:
            outputs = [X_p, A]
        else:
            outputs = X_p

        return outputs
Beispiel #16
0
    def fallback_metric(self, y_true, y_pred):
        #grab the most confident prediction
        predictions = K.max(y_pred, axis=-1)

        #fill a tensor with our threshold_value
        threshold_tensor = tf.fill(tf.shape(predictions), self.threshold)

        #Are we confident in our prediction?
        threshold_high = predictions > threshold_tensor
        threshold_high = tf.cast(threshold_high, tf.int32)

        #Do we have low confidence in our prediction?
        threshold_low = predictions <= threshold_tensor
        threshold_low = tf.cast(threshold_low, tf.int32)

        idx_true = K.argmax(y_true, -1)
        idx_pred = K.argmax(y_pred, -1)

        #For our confident predictions, compare the top prediction to the label of the true value
        high_correct = math_ops.equal(idx_true, idx_pred)
        high_correct = tf.cast(high_correct, tf.int32)

        #For our less confident predictions, grab the top 2 most confident predictions
        _, max_pred = tf.math.top_k(y_pred, k=2)

        #Gather the lineages of those top 2 predictions using the transpose of the hierarchy's adjaency matrix because the adjacency only points from ancestor to descendant
        lineages = tf.gather(K.transpose(self.hierarchy.A), max_pred)
        lineages = K.cast(lineages, tf.int32)

        #Grab the first two columns of this matrix
        fallback = tf.bitwise.bitwise_and(lineages[:, 0], lineages[:, 1])

        #Gather the lineage of the true value
        actual = tf.gather(K.transpose(self.hierarchy.A), K.argmax(y_true))
        actual = K.cast(actual, tf.int32)

        #Multiply the two together
        overlap_score = K.batch_dot(fallback, actual)

        #Are either of the top 2 predictions in the lineage of the true value? If so, overlap_score should be >1 and we count the result as correct
        low_correct = overlap_score > 1
        low_correct = tf.cast(low_correct, tf.int32)
        low_correct = tf.squeeze(low_correct)

        #results for the high confidence predictions
        high_accuracy = tf.math.multiply(threshold_high, high_correct)

        #results for the low confidence predictions
        low_accuracy = tf.math.multiply(threshold_low, low_correct)

        # total accuracy vector
        correct = high_accuracy + low_accuracy

        #return batch accuracy value
        return K.mean(K.cast(correct, tf.float32))
Beispiel #17
0
def _outer_product(x):
    '''Calculate outer-products of two tensors.

    Args:
        x: a list of two tensors.
        Assume that each tensor has shape = (size_minibatch, total_pixels, size_filter)

    Returns:
        Outer-products of two tensors.
    '''
    return keras_backend.batch_dot(x[0], x[1], axes=[1, 1]) / x[0].get_shape().as_list()[1]
Beispiel #18
0
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        t1 = x[:, 0, :]
        t1 = K.expand_dims(t1, 1)
        # t1 = K.tile(t1, [1, step_dim, 1])
        print(t1)
        eij = K.batch_dot(x, t1, (2, 2))  #(?,500,1)
        # eij = K.tile(eij, [1, 1, features_dim])
        print(eij)
        a = K.exp(eij)
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        print(a)
        weighted_input = x * a
        temp = K.sum(weighted_input, axis=1)
        temp = K.expand_dims(temp, 1)
        temp = K.tile(temp, [1, 1, features_dim])
        print(temp)
        alltemp = temp

        for i in range(1, step_dim):
            t1 = x[:, i, :]
            t1 = K.expand_dims(t1, 1)
            # t1 = K.tile(t1, [1, 2, 1])
            eij = K.batch_dot(x, t1, (2, 2))
            # eij = K.tile(eij, [1, 1, features_dim])
            a = K.exp(eij)
            a /= K.cast(
                K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
            weighted_input = x * a
            temp = K.sum(weighted_input, axis=1)
            temp = K.expand_dims(temp, 1)
            temp = K.tile(temp, [1, 1, features_dim])
            alltemp = keras.layers.concatenate([alltemp, temp], 1)

        temp = keras.layers.concatenate([x, alltemp])
        return temp
Beispiel #19
0
    def call(self, inputs, **kwargs):
        batch_size, input_len, _ = inputs.shape
        q = K.expand_dims(K.dot(inputs, self.Wq), 2)
        k = K.expand_dims(K.dot(inputs, self.Wk), 1)
        h = tf.tanh(q + k + self.bh)

        e = K.dot(h, self.Wv) + self.ba
        # e = K.reshape(e, shape=(batch_size, input_len, input_len))
        e = tf.reshape(e, shape=(batch_size, input_len, input_len))
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        s = K.sum(e, axis=-1, keepdims=True)
        a = e / (s + K.epsilon())
        v = K.batch_dot(a, inputs)
        return v
def loss_function_rate(y_true, y_pred):
    # y_true is actually the concatenation of the perfect CSI, h_real and the SNR, sigma^{-2}.
    # y_pred is the phases of obtained analog precoder v_RF.' (the transpose is due to the NN);
    h_real = tf.slice(
        y_true, [0, 0],
        [-1, Nt])  # the real partition of the complex-valued h_real.
    h_imag = tf.slice(
        y_true, [0, Nt],
        [-1, Nt])  # the imaginary partition of the complex-valued h_real.
    signal_power = tf.slice(y_true, [0, Nt * 2], [-1, 1])  # sigma^{-2}
    phase_vrf = tf.transpose(
        y_pred)  # the NN output is 1*Nt, but actual vrf is Nt*1.
    # transfer the y_pred (the phases) into exact complex v_RF (the lambda layer in the letter)
    v_real = tf.cos(phase_vrf)
    v_imag = tf.sin(phase_vrf)
    # compute the value of norm(hv_RF)^2
    # backend.batch_dot only compute the diagonal elements which is really required and thus reduce complexity.
    hvrf_2 = tf.pow(backend.batch_dot(h_real, v_real) - backend.batch_dot(h_imag, v_imag), 2) + \
        tf.pow(backend.batch_dot(h_real, v_imag) + backend.batch_dot(h_imag, v_real), 2)
    # compute the spectral efficiency with real CSI
    rate = tf.log(1 + hvrf_2 / Nt * signal_power) / tf.log(2.0)
    # since the NN is trained to minimize the objective, so we are aiming at minimizing the minus rate.
    return -rate
Beispiel #21
0
 def gram_matrix(self, X):
     """グラム行列の算出"""
     X_sw = K.permute_dimensions(
         X, (0, 3, 2, 1)
     )  # 軸の入れ替え
     s = K.shape(X_sw)
     new_shape = (s[0], s[1], s[2]*s[3])
     X_rs = K.reshape(X_sw, new_shape)
     X_rs_t = K.permute_dimensions(
         X_rs, (0, 2, 1)
     )  # 行列の転置
     dot = K.batch_dot(X_rs, X_rs_t)  # 内積の計算
     norm = K.prod(K.cast(s[1:], 'float32'))
     return dot/norm
Beispiel #22
0
 def call(self, x, **kwargs):
     # 如果只传入Q_seq,K_seq,V_seq,那么就不做Mask
     # 如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask
     if len(x) == 3:
         Q_seq, K_seq, V_seq = x
         Q_len, V_len = None, None
     elif len(x) == 5:
         Q_seq, K_seq, V_seq, Q_len, V_len = x
     else:
         Q_seq, K_seq, V_seq = x
         Q_len, V_len = None, None
     # 对Q、K、V做线性变换
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(
         Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(
         K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(
         V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))
     # 计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.size_per_head**0.5
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = K.softmax(A)
     # 输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
     O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     return O_seq
Beispiel #23
0
def gram_matrix(X):
    # 軸の入れ替え => batch, channel, height, width
    axis_replaced_X = K.permute_dimensions(X, (0, 3, 2, 1))
    replaced_shape = K.shape(axis_replaced_X)
    # 特徴マップ(高さと幅を1つの軸に展開)の内積をとるためのshape
    dot_shape = (replaced_shape[0], replaced_shape[1],
                 replaced_shape[2] * replaced_shape[3])
    # 実際に内積を計算する行列
    dot_X = K.reshape(axis_replaced_X, dot_shape)
    # 転置行列
    dot_X_t = K.permute_dimensions(dot_X, (0, 2, 1))
    # 行列の内積
    dot = K.batch_dot(dot_X, dot_X_t)
    norm = K.prod(K.cast(replaced_shape[1:], 'float32'))
    return dot / norm
Beispiel #24
0
    def call(self, inputs):

        # inputs_trans = (batch_size, the number of filters, sentence_length)
        inputs_trans = tf.transpose(inputs, [0, 2, 1])

        # at = (batch_size, the number of classes, sentence_length)
        at = tf.matmul(self.Wa, inputs_trans)

        # Softmax
        at = K.exp(at - K.max(at, axis=-1, keepdims=True))
        at = at / K.sum(at, axis=-1, keepdims=True)

        # weighted sum
        # v = (batch_size, the number of classes, the number of filters)
        v = K.batch_dot(at, inputs)

        return v
        def loss(y_true, y_pred):
            y_true = K.squeeze(y_true, axis=-1)
            # Squeeze y_pred; i.e remove last layer which is of dim 1
            y_pred_squeezed = K.squeeze(y_pred, axis=-1)

            """
            Reconstructing x_org
            """
            # reversed_wnorm = Lambda(lambda x: )
            # reversed_wnorm = dict(map(reversed, wnorm.items()))
            # x_org = Lambda(lambda x: [tf.reshape(tf.where(tf.equal(wnorm, word)), [-1])[0] for sent in x for word in sent])(y_true)
            # x_org =
            # x_org = [reversed_wnorm.get(word) for sent in y_true for word in sent]
            x_org = raw_input
            print(raw_input.shape)
            x_temp = Lambda(lambda x: tf.cast(tf.reshape(x, [-1, ]), dtype=tf.int32))(x_org)

            K.print_tensor(K.shape(y_pred_squeezed), message='y_pred_squeezed are ')
            print(f'Inside decoder....After reshape of x_norm is {y_pred_squeezed.shape}')

            # Calc prob logits
            print(type(y_pred_squeezed))
            print(type(wnorm))
            print(f'wnorm shape is {wnorm.shape}')
            prob_logits = K.batch_dot(y_pred_squeezed, wnorm, axes=[2, 1])
            prob = Lambda(lambda x: tf.nn.log_softmax(x * 100, axis=-1, name='prob_lambda'))(prob_logits)
            print(f'Prob shape is {prob.shape}')
            prob = Lambda(lambda x: tf.reshape(x, [-1, n_words]))(prob)
            # prob = K.reshape(prob, [-1, wnorm.shape[0]])
            print(f'Prob reshaped is {prob.shape}')

            """
            Get prob of all the words
            """
            idx = Lambda(lambda x: tf.range(K.shape(x)[0], K.shape(x)[1]))(y_pred_squeezed)
            all_idx = K.transpose(K.stack([idx, x_temp]))
            all_prob = Lambda(lambda prob_idx_list: tf.gather_nd(prob_idx_list[0], prob_idx_list[1]))([prob, all_idx])

            K.print_tensor(K.shape(all_prob), message='all_prob shape is: ')
            recons_loss = Lambda(lambda x: -tf.reduce_mean(x))(all_prob)

            # K.print_tensor(loss, message='Loss is: ')
            # weighted_recons_loss = loss_weight * recons_loss

            return recons_loss
Beispiel #26
0
 def compute_win(self, y_true, y_pred, to_numpy=False):
     if self.N > self.num_leaves:
         # if there are more leaf nodes than total nodes in the hierarchy (should always be the case,
         # but allowed to work either way) then pad with a zero for each non-leaf node in the taxonomy
         y_true = self._pad(y_true)
         y_pred = self._pad(y_pred)
     # propagate the probabilities (algo 1)
     propagated_probabilities = K.dot(self.A, K.transpose(y_pred))
     # find the index from the actual label
     win_idx = self.select_correct_idx(y_true)
     # find the mask associated with that label
     win_mask = tf.gather(self.W, win_idx)
     # win is q . w (algo 2)
     win = K.batch_dot(win_mask, K.transpose(propagated_probabilities))
     # win is in [0.5,1], remap to [0,1]:
     remapped = 2 * (win - 0.5)
     if to_numpy:
         remapped = K.reshape(remapped, []).numpy()
     return remapped
Beispiel #27
0
 def call(self, inputs, mask=None):
     x, u = inputs
     if u is None:
         u = self.add_weight(name="u_{:s}".format(self.name),
                             shape=(self.ATTENTION_SIZE, ),
                             initializer="glorot_normal",
                             trainable=True)
     # u: (?, ATTENTION_SIZE,)
     # x: (?, MAX_TIMESTEPS, EMBED_SIZE)
     # ut: (?, MAX_TIMESTEPS, ATTENTION_SIZE)
     ut = K.tanh(K.dot(x, self.W) + self.b)
     # at: (?, MAX_TIMESTEPS,)
     at = K.batch_dot(ut, u)
     at = K.softmax(at)
     if mask is not None:
         at *= K.cast(mask, K.floatx())
     # ot: (?, MAX_TIMESTEPS, EMBED_SIZE,)
     atx = K.expand_dims(at, axis=-1)
     ot = atx * x
     # output: (?, EMBED_SIZE,)
     output = K.sum(ot, axis=1)
     return output
Beispiel #28
0
    def call(self, x):

        Wx_b = K.dot(x, self.w) + self.b
        a = tf.nn.softmax(Wx_b)

        rows = []

        for k in range(self.k_centers):
            error = x - self.c[:, k]

            row = K.batch_dot(a[:, :, k], error)
            row = tf.nn.l2_normalize(row, dim=1)
            rows.append(row)

        output = tf.stack(rows)
        output = tf.transpose(output, perm=[1, 0, 2])
        output = tf.reshape(
            output,
            [tf.shape(output)[0],
             tf.shape(output)[1] * tf.shape(output)[2]])

        return output
Beispiel #29
0
 def _merge_function(self, inputs):
   if len(inputs) != 2:
     raise ValueError('A `Dot` layer should be called ' 'on exactly 2 inputs')
   x1 = inputs[0]
   x2 = inputs[1]
   if isinstance(self.axes, int):
     if self.axes < 0:
       axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)]
     else:
       axes = [self.axes] * 2
   else:
     axes = []
     for i in range(len(self.axes)):
       if self.axes[i] < 0:
         axes.append(self.axes[i] % K.ndim(inputs[i]))
       else:
         axes.append(self.axes[i])
   if self.normalize:
     x1 = nn.l2_normalize(x1, axis=axes[0])
     x2 = nn.l2_normalize(x2, axis=axes[1])
   output = K.batch_dot(x1, x2, axes)
   return output
    def call(self, inputs):
        batch_size = K.shape(inputs)[0]
        num_rows = K.int_shape(inputs)[1]
        num_cols = K.int_shape(inputs)[2]
        num_channels = K.int_shape(inputs)[3]
        n = num_rows * num_cols
        X = K.reshape(inputs, (batch_size, num_channels, n))
        factor = K.cast(1 / n, K.floatx())
        I_hat = factor * (K.eye(n) - factor * K.ones((n, n)))
        I_hat = K.tile(
            K.expand_dims(I_hat, axis=0),
            (batch_size, 1, 1))  # One identity matrix per sample in batch
        Sigma = K.batch_dot(K.batch_dot(X, I_hat),
                            K.permute_dimensions(X, (0, 2, 1)))

        # Pre-normalization
        trace = K.sum(K.sum(K.eye(num_channels) * Sigma, axis=1,
                            keepdims=True),
                      axis=2,
                      keepdims=True)
        A = Sigma / trace

        # Newton-Schulz Iteration
        Y = A
        Z = K.eye(num_channels)
        Z = K.tile(K.expand_dims(Z, axis=0), (batch_size, 1, 1))
        I3 = 3 * K.eye(num_channels)
        I3 = K.tile(K.expand_dims(I3, axis=0), (batch_size, 1, 1))
        for i in range(self.num_iter):
            Y = 0.5 * K.batch_dot(Y, I3 - K.batch_dot(Z, Y))
            Z = 0.5 * K.batch_dot(I3 - K.batch_dot(Z, Y), Z)

        # Post-compensation
        C = K.sqrt(trace) * Y

        # Extract upper triangular matrix as vector
        ones = K.ones((num_channels, num_channels))
        mask = tf.matrix_band_part(ones, 0,
                                   -1)  # Upper triangular matrix of 0s and 1s
        mask = K.cast(mask, 'bool')  # Convert integer mask to boolean mask
        triuvec = tf.boolean_mask(
            C, mask, axis=1)  # Apply mask to 2nd and 3rd dimension
        triuvec.set_shape((None, num_channels * (num_channels + 1) /
                           2))  # Set correct shape manually

        return triuvec
Beispiel #31
0
    def _attention_layer(self, memory_plus_inputs, ws):
        from_length = self.num_memory_slots + 1
        to_length = self.num_memory_slots + 1

        q_bias, k_bias, v_bias = array_ops.split(ws["attention_bias"], 3, axis=0)

        # [B, F, N, H]
        query_layer = K.dot(
            memory_plus_inputs, ws["attention_kernel"][:, :self.units])
        query_layer = K.bias_add(query_layer, q_bias)
        query_layer = array_ops.reshape(
            query_layer,
            [-1, from_length, self.num_attention_heads, self.size_per_head])
        # [B, N, F, H]
        query_layer1 = array_ops.transpose(query_layer, perm=[0, 2, 1, 3])
        # [B*N, F, H]
        query_layer = array_ops.reshape(
            query_layer1, shape=[-1, from_length, self.size_per_head])

        # [B, T, N, H]
        key_layer = K.dot(
            memory_plus_inputs, ws["attention_kernel"][:, self.units:self.units * 2])
        key_layer = K.bias_add(key_layer, k_bias)
        key_layer = array_ops.reshape(
            key_layer,
            [-1, to_length, self.num_attention_heads, self.size_per_head])
        # [B, N, T, H]
        key_layer = array_ops.transpose(key_layer, perm=[0, 2, 1, 3])
        # [B*N, T, H]
        key_layer = array_ops.reshape(
            key_layer, shape=[-1, to_length, self.size_per_head])

        # [B, T, N, H]
        value_layer = K.dot(
            memory_plus_inputs, ws["attention_kernel"][:, self.units * 2:self.units * 3])
        value_layer = K.bias_add(value_layer, v_bias)
        value_layer = array_ops.reshape(
            value_layer,
            [-1, to_length, self.num_attention_heads, self.size_per_head])
        # [B, N, T, H]
        value_layer = array_ops.transpose(value_layer, perm=[0, 2, 1, 3])
        # [B*N, T, H]
        value_layer = array_ops.reshape(
            value_layer, shape=[-1, to_length, self.size_per_head])

        # [B*N, F, T]
        attention_scores = K.batch_dot(query_layer, key_layer, axes=[2, 2])

        if self.use_relative_position:
            # [F+T-1, N*H]
            r = K.dot(self.rel_table, ws["rel_kernel"])
            # [F+T-1, N, H]
            r = array_ops.reshape(
                r, [-1, self.num_attention_heads, self.size_per_head])
            # [B, N, F, F+T-1]
            bd = tf.einsum("bnfh,lnh->bnfl", query_layer1, r)
            # [B*N, F, F+T-1]
            bd = array_ops.reshape(
                bd, [-1, from_length, from_length + to_length - 1])
            # [B*N, F, T]
            bd = tf.einsum("bfl,ftl->bft", bd, self.pos_table)
            # [B*N, F, T]
            attention_scores += bd

        # [B*N, F, T]
        attention_scores = attention_scores / K.cast(self.size_per_head, tf.float32)

        # [B*N, F, T]
        attention_probs = K.softmax(attention_scores)

        # [B*N, F, H]
        context_layer = K.batch_dot(attention_probs, value_layer, axes=[2, 1])

        # [B, N, F, H]
        context_layer = array_ops.reshape(
            context_layer,
            [-1, self.num_attention_heads, from_length, self.size_per_head])

        # [B, F, N, H]
        context_layer = array_ops.transpose(context_layer, perm=[0, 2, 1, 3])

        # [B, F, N*H]
        context_layer = array_ops.reshape(
            context_layer,
            [-1, from_length, self.num_attention_heads * self.size_per_head])

        return context_layer