コード例 #1
0
    def call(self, x):
        def hw_flatten(x):
            return K.reshape(x,
                             shape=[
                                 K.shape(x)[0],
                                 K.shape(x)[1] * K.shape(x)[2],
                                 K.shape(x)[3]
                             ])

        f = K.conv2d(x, kernel=self.kernel_f, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        g = K.conv2d(x, kernel=self.kernel_g, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        h = K.conv2d(x, kernel=self.kernel_h, strides=(1, 1),
                     padding='same')  # [bs, h, w, c]

        s = K.batch_dot(hw_flatten(g),
                        K.permute_dimensions(hw_flatten(f),
                                             (0, 2, 1)))  # # [bs, N, N]

        beta = K.softmax(s, axis=-1)  # attention map

        o = K.batch_dot(beta, hw_flatten(h))  # [bs, N, C]

        o = K.reshape(o, shape=K.shape(x))  # [bs, h, w, C]
        x = self.gamma * o + x

        return x
コード例 #2
0
ファイル: layers.py プロジェクト: jharrang/fenwicks
def MultiHeadsAttModel(l=8 * 8, d=512, dv=64, dout=512, nv=8):
    v1 = tf.keras.layers.Input(shape=(l, d))
    q1 = tf.keras.layers.Input(shape=(l, d))
    k1 = tf.keras.layers.Input(shape=(l, d))

    v2 = tf.keras.layers.Dense(d, activation="relu")(v1)
    q2 = tf.keras.layers.Dense(d, activation="relu")(q1)
    k2 = tf.keras.layers.Dense(d, activation="relu")(k1)

    v = tf.keras.layers.Reshape([l, nv, dv])(v2)
    q = tf.keras.layers.Reshape([l, nv, dv])(q2)
    k = tf.keras.layers.Reshape([l, nv, dv])(k2)

    att = tf.keras.layers.Lambda(
        lambda x: K.batch_dot(x[0], x[1], axes=[-1, -1]) / np.sqrt(dv),
        output_shape=(l, nv, nv))([q, k])
    att = tf.keras.layers.Lambda(lambda x: K.softmax(x),
                                 output_shape=(l, nv, nv))(att)
    out = tf.keras.layers.Lambda(
        lambda x: K.batch_dot(x[0], x[1], axes=[4, 3]),
        output_shape=(l, nv, dv))([att, v])
    out = tf.keras.layers.Reshape([l, d])(out)
    out = tf.keras.layers.Add()([out, q1])
    out = tf.keras.layers.Dense(dout, activation="relu")(out)
    return tf.keras.models.Model(inputs=[q1, k1, v1], outputs=out)
コード例 #3
0
    def call(self, x):
        f = K.conv2d(x, kernel=self.kernel_f, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        g = K.conv2d(x, kernel=self.kernel_g, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        h = K.conv2d(x, kernel=self.kernel_h, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']

        f_ = K.permute_dimensions(self._hw_flatten(f),
                                  (0, 2, 1))  # [bs, 3c', N]
        s = K.batch_dot(self._hw_flatten(g), f_)  # [bs, N, N]
        beta = K.softmax(s, axis=-1)  # attention map

        double_attn = K.batch_dot(f_, self._hw_flatten(x))  # [bs, 3c', 3c]
        double_attn = K.softmax(double_attn, axis=1)

        h_tmp, shape_tmp = self._hw_flatten(h,
                                            return_shape=True)  # [bs, N, 3c']
        o_tmp = K.batch_dot(beta, h_tmp)  # [bs, N, 3c']
        o = K.batch_dot(o_tmp, double_attn)  # [bs, N, 3c]
        o = self._hw_recover(o, shape_tmp)  # [bs, h, w, C]

        x = self.gamma * o + x

        return x
コード例 #4
0
 def call(self, inputs, mask=None, **kwargs):
     if len(inputs) == 4:
         query, key, value, prev = inputs
         mask = mask[1]
     else:
         query = key = value = inputs[0]
         prev = inputs[1]
         mask = mask[0]
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(
         K.cast(feature_dim, dtype=K.floatx()))
     new_prev = e = e + prev
     if self.history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         indices = K.expand_dims(K.arange(0, key_len), axis=0)
         upper = K.expand_dims(K.arange(0, query_len), axis=-1)
         e -= 10000.0 * K.expand_dims(K.cast(indices > upper, K.floatx()),
                                      axis=0)
     if mask is not None:
         e -= 10000.0 * (1.0 -
                         K.cast(K.expand_dims(mask, axis=-2), K.floatx()))
     self.intensity = e
     e = K.exp(e - K.max(e, axis=-1, keepdims=True))
     self.attention = e / K.sum(e, axis=-1, keepdims=True)
     v = K.batch_dot(self.attention, value)
     output = [v, new_prev]
     if self.return_attention:
         output.append(self.attention)
     return output
コード例 #5
0
 def call(self,
          inputs: tensorflow.Tensor,
          mask: Optional[tensorflow.Tensor] = None,
          **kwargs) -> tensorflow.Tensor:
     if isinstance(inputs, list):
         query, key, value = inputs
     else:
         query = key = value = inputs
     if isinstance(mask, list):
         mask = mask[1]
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(
         K.cast(feature_dim, dtype=K.floatx()))
     e = K.exp(e - K.max(e, axis=-1, keepdims=True))
     if self.history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         indices = K.tile(K.expand_dims(K.arange(key_len), axis=0),
                          [query_len, 1])
         upper = K.expand_dims(K.arange(key_len), axis=-1)
         e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0)
     if mask is not None:
         e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx())
     a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon())
     v = K.batch_dot(a, value)
     if self.return_attention:
         return [v, a]
     return v
コード例 #6
0
    def call(self, inputs, masks, n_head):
        q, k, v = inputs
        q = self.reshape_to_attention_shape(q, n_head)
        k = self.reshape_to_attention_shape(k, n_head)
        v = self.reshape_to_attention_shape(v, n_head)

        # every mask is the same
        mask = masks[0]

        emb_dim = K.shape(q)[-1]

        # [N * n_head, max_len, max_len]
        scores = K.batch_dot(q, k, axes=2) / K.sqrt(K.cast(emb_dim, K.floatx()))

        # softmax 1
        scores = K.exp(scores - K.max(scores, axis=-1, keepdims=True))

        if mask is not None:
            mask = self.reshape_mask(mask, n_head)
            # [N * n_head, max_len, max_len] * [N * n_head, 1, max_len]
            scores *= mask

        # softmax 2
        scores /= (K.sum(scores, axis=-1, keepdims=True) + K.epsilon())

        # [N * n_head, max_len, emb_dim]
        y = K.batch_dot(scores, v)
        return y
コード例 #7
0
def symmetric_cross_entropy(y_actual, y_pred, A=-6, alpha=0.1, beta=1):
    '''Define the symmetric cross entropy that will be used for training '''
    q = K.one_hot(K.cast(y_actual, 'uint8'), 10)  # 200 or 10
    custom_loss = -alpha * K.mean(
        K.batch_dot(q, K.maximum(K.log(y_pred + 1e-15), A))) - beta * K.mean(
            K.batch_dot(K.maximum(K.log(q + 1e-15), A), y_pred))
    return custom_loss
コード例 #8
0
ファイル: model_defination.py プロジェクト: sunnycia/DFHSal
def pam(x):
    gamma = K.variable(np.array([0]), dtype='float32', name='gamma')

    # channel = 2048
    # spatial_size = height = width = 7
    batch, height, width, channel = x.get_shape().as_list()
    assert height == width, "height and width not equal."

    proj_query = Conv2D(height, 1, padding='same', strides=1)(x)
    proj_query = Reshape((height * width, height))(proj_query)
    # print(proj_query.get_shape());exit()
    proj_query = K.permute_dimensions(proj_query, (0, 2, 1))

    proj_key = Conv2D(height, 1, padding='same', strides=1)(x)
    proj_key = Reshape((height * width, height))(proj_key)

    proj_value = Conv2D(channel, 1, padding='same', strides=1)(x)
    proj_value = Reshape((height * width, channel))(proj_value)
    energy = K.batch_dot(proj_key, proj_query)
    attention = K.softmax(energy)
    attention = K.permute_dimensions(attention, (0, 2, 1))
    out = K.batch_dot(attention, proj_value)

    out = Reshape((height, width, channel))(out)
    # out = Add()([Multiply()([gamma,out]), x])
    out = x + gamma * out

    return out
コード例 #9
0
    def __call__(self, q, k, v, mask, idx):
        """Applies scaled dot product attention.

    Args:
      q: Queries
      k: Keys
      v: Values
      mask: Masking if required -- sets softmax to very large value

    Returns:
      Tuple of (layer outputs, attention weights)
    """
        temper = tf.sqrt(tf.cast(tf.shape(k)[-1], dtype='float32'))
        attn = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[2, 2]) / temper,
                      name=f"ScaledDotProdAttenLambda{idx}")(
                          [q, k])  # shape=(batch, q, k)
        if mask is not None:
            mmask = Lambda(lambda x: (-1e+9) * (1. - K.cast(x, 'float32')),
                           name=f"ScaledDotProdAttenLambdaMask{idx}")(
                               mask)  # setting to infinity
            attn = Add(name=f'SDPA_ADD_{idx}')([attn, mmask])
        attn = self.activation(attn)
        attn = self.dropout(attn)
        output = Lambda(lambda x: K.batch_dot(x[0], x[1]),
                        name=f"ScaledDotProdAttenOutput{idx}")([attn, v])
        return output, attn
コード例 #10
0
    def call(self, inputs):
        if self.share_weights:
            u_hat_vectors = K.conv1d(inputs, self.W)
        else:
            u_hat_vectors = K.local_conv1d(inputs, self.W, [1], [1])

        # u_hat_vectors : The spatially transformed input vectors (with local_conv_1d)

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        u_hat_vectors = K.reshape(u_hat_vectors,
                                  (batch_size, input_num_capsule,
                                   self.num_capsule, self.dim_capsule))

        u_hat_vectors = K.permute_dimensions(u_hat_vectors, (0, 2, 1, 3))
        routing_weights = K.zeros_like(u_hat_vectors[:, :, :, 0])

        for i in range(self.routings):
            capsule_weights = K.softmax(routing_weights, 1)
            outputs = K.batch_dot(capsule_weights, u_hat_vectors, [2, 2])
            if K.ndim(outputs) == 4:
                outputs = K.sum(outputs, axis=1)
            if i < self.routings - 1:
                outputs = K.l2_normalize(outputs, -1)
                routing_weights = K.batch_dot(outputs, u_hat_vectors, [2, 3])
                if K.ndim(routing_weights) == 4:
                    routing_weights = K.sum(routing_weights, axis=1)

        return self.activation(outputs)
コード例 #11
0
    def call(self, x, training=False):
        fea_map, fea_vec = self.backbone(x, training=training)
        if self.region_attn:
            cls_fea_map_ori = call_layers(self.conv_bn_relu_list[1], fea_vec, training)
            cls_fea_map, HxW = flatten_hw(cls_fea_map_ori)

            attr_fea_map_i = call_layers(self.conv_bn_relu_list[0], fea_vec, training)
            attr_pool_i = call_layers(self.pool_bn_relu_dropout, attr_fea_map_i, training)
            attr_pool_i = tf.expand_dims(attr_pool_i, -1)  # (n, hidden_dim, 1)

            # TODO: `fea_map` -> `cls_fea_map`
            fea_map_ = K.permute_dimensions(cls_fea_map, (0, 2, 1))  # (n, hidden_dim, HxW)
            # fea_map_, HxW = flatten_hw(fea_vec)   # (n, HxW, fea_dim)
            # fea_map_ = K.permute_dimensions(fea_map_, (0, 2, 1))  # (n, fea_dim, HxW)
            attn_cls = K.batch_dot(cls_fea_map, attr_pool_i)  # (n, HxW, 1)
            region_attn_map = self.sigmoid(attn_cls)
            region_attn_map /= tf.cast(HxW, tf.float32)
            region_fea = K.batch_dot(fea_map_, region_attn_map)  # (n, hidden_dim, 1)
            fea_vec = tf.squeeze(region_fea, -1)  # (n, hidden_dim)
            fea_vec = self.region_bn(fea_vec, training=training)
        else:
            if self.add_linear:
                fea_vec = self.fc(fea_vec)  # (n, embedding_dim)
                fea_vec = tf.nn.relu(fea_vec)
            fea_vec = self.pool(fea_vec)
        
        return fea_map, fea_vec
コード例 #12
0
def attention_k(q_w_q, k_w_k, v_w_v, mask=None, dropout=None):
    """

    Parameters
    ----------
    q_w_q: (batch size, num heads, num tokens in sentence, d_model / d_k), (5, 2, 4, 6)
    k_w_k
    v_w_v
    mask: (5, 1, 1, 4)
    dropout: dropout layer, not dropout rate

    Returns
    -------

    """
    def masked_fill(x, mask, target_mask_val, filled_value=-1e9):
        return x * (x != target_mask_val) + (mask
                                             == target_mask_val) * filled_value

    d_k = q_w_q.shape.as_list()[-1]
    scores = K.batch_dot(q_w_q, k_w_k, axes=[3, 3]) / math.sqrt(
        d_k)  # (5, 2, 4, 4)
    if mask is not None:
        scores = masked_fill(scores, mask, 0, -1e9)
    p_attn = K.softmax(scores)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return K.batch_dot(p_attn, v_w_v, axes=[3, 2]), p_attn
コード例 #13
0
 def call(self, x):
     #         print("in call!")
     #         print("x =", x)
     if len(x) == 3:
         Q_seq, K_seq, V_seq = x
         Q_len, V_len = None, None
     elif len(x) == 5:
         Q_seq, K_seq, V_seq, Q_len, V_len = x
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(
         Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     #         print("Q_seq1 =", Q_seq)
     Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))
     #         print("Q_seq2 =", Q_seq)
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(
         K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(
         V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))
     A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.size_per_head**0.5
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = K.softmax(A)
     O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
     O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     #         print("\n\n\n\n", O_seq)
     return O_seq
コード例 #14
0
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(
                b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs
コード例 #15
0
    def _control_circuit(self, psi, action):
        """
        Args:
            psi (Tensor([batch_size,N], c64)): batch of states
            action (dict, 'alpha' : Tensor([batch_size,2], tf.float32),
                          'beta'  : Tensor([batch_size,2], tf.float32),
                          'phi'   : Tensor([batch_size,1], tf.float32),
                          'theta' : Tensor([batch_size,1], tf.float32))

        Returns: see parent class docs

        """
        # extract parameters
        alpha = hf.vec_to_complex(action['alpha'])
        beta = hf.vec_to_complex(action['beta'])
        phi = action['phi']
        Rotation = self.rotate(action['theta'])

        Kraus = {}
        T = {'a': self.translate(alpha), 'b': self.translate(beta / 2.0)}
        Kraus[0] = 1 / 2 * (tf.linalg.adjoint(T['b']) +
                            self.phase(phi) * T['b'])
        Kraus[1] = 1 / 2 * (tf.linalg.adjoint(T['b']) -
                            self.phase(phi) * T['b'])

        psi = self.simulate(psi, self.t_feedback)
        psi = batch_dot(T['a'], psi)
        psi_cached = batch_dot(Rotation, psi)
        psi = self.simulate(psi_cached, self.t_round + self.t_idle)
        psi_final, msmt = measurement(psi, Kraus)

        return psi_final, psi_cached, msmt
コード例 #16
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)

        reduction_axes = list(range(0, len(input_shape)))
        if self.axis is not None:
            del reduction_axes[self.axis]
        del reduction_axes[0]

        # Put axis last
        inputs = K.permute_dimensions(
            inputs, tuple([0] + reduction_axes + [self.axis]))

        # Collapse all other dims into dim 1
        cinp = K.reshape(inputs,
                         (K.shape(inputs)[0], -1, input_shape[self.axis]))
        n_reduced = K.shape(cinp)[1]

        # Calculate dot product
        pure_gram = K.batch_dot(cinp, cinp, 1)
        scaled_gram = pure_gram / K.cast(
            2 * n_reduced * input_shape[self.axis], 'float32')

        return scaled_gram
        #return K.sqrt(scaled_gram)

        # Calculate covariance
        means = K.mean(cinp, [1], keepdims=True)
        mean_mat = K.batch_dot(means, means, 1)
        cov = scaled_gram - mean_mat

        return cov
コード例 #17
0
 def scaled_dot_product_attention(
         inputs,      
         mask=None, 
         return_attention=False, 
         history_only=False
     ):
     
     query, key, value, query_group_ids, key_group_ids = inputs
     
     if isinstance(mask, list):
         mask = mask[1]
     
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx()))
     
     group_mask = tf.equal(query_group_ids[:, :, None], key_group_ids[:, None, :])
     e -= (1.0 - tf.cast(group_mask, tf.float32)) * 1e9
     
     if history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         ones = tf.ones((query_len, key_len))
         e -= (ones - tf.matrix_band_part(ones, -1, 0)) * 1e9
     
     if mask is not None:
         e -= (1.0 - K.cast(K.expand_dims(mask, axis=-2), K.floatx())) * 1e9
 
     a = tf.keras.activations.softmax(e)
     v = K.batch_dot(a, value, axes=[2, 1])
 
     if return_attention:
         return [v, a]
     return v
コード例 #18
0
ファイル: layers.py プロジェクト: yujun001/hub-recsys
    def call(self, inputs):
        if self._masking:
            assert len(
                inputs
            ) == 4, "inputs should be set [queries, keys, values, masks]."
            queries, keys, values, masks = inputs
        else:
            assert len(
                inputs) == 3, "inputs should be set [queries, keys, values]."
            queries, keys, values = inputs

        if K.dtype(queries) != 'float32': queries = K.cast(queries, 'float32')
        if K.dtype(keys) != 'float32': keys = K.cast(keys, 'float32')
        if K.dtype(values) != 'float32': values = K.cast(values, 'float32')

        matmul = K.batch_dot(queries, tf.transpose(keys, [0, 2, 1]))  # MatMul
        scaled_matmul = matmul / int(queries.shape[-1])**0.5  # Scale
        if self._masking:
            scaled_matmul = self.mask(scaled_matmul, masks)  # Mask(opt.)

        if self._future:
            scaled_matmul = self.future_mask(scaled_matmul)

        softmax_out = K.softmax(scaled_matmul)  # SoftMax
        # Dropout
        out = K.dropout(softmax_out, self._dropout_rate)

        outputs = K.batch_dot(out, values)

        return outputs
コード例 #19
0
def attention(x_inner, x_outer, n_factor, dropout):
    x_Q = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_inner)
    x_K = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_outer)
    x_V = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_outer)
    x_KT = L.Permute((2, 1))(x_K)
    res = L.Lambda(lambda c: K.batch_dot(c[0], c[1]) / np.sqrt(n_factor))(
        [x_Q, x_KT])
    #     res = tf.expand_dims(res, axis = 3)
    #     res = L.Conv2D(16, 3, 1, padding = "same", activation = "relu")(res)
    #     res = L.Conv2D(1, 3, 1, padding = "same", activation = "relu")(res)
    #     res = tf.squeeze(res, axis = 3)
    att = L.Lambda(lambda c: K.softmax(c, axis=-1))(res)
    att = L.Lambda(lambda c: K.batch_dot(c[0], c[1]))([att, x_V])
    return att
コード例 #20
0
    def call(self, u_ves):
        print(self.W_kernel.shape)
        print("*****", u_ves.shape)
        u_ves = tf.transpose(u_ves, perm=[0, 2, 1])

        print("*****", u_ves.shape)
        u_hat_vecs = K.conv1d(u_ves, self.W_kernel)
        print("*****", u_hat_vecs.shape)
        batch_size = tf.shape(u_ves)[0]
        input_num_capsule = tf.shape(u_ves)[1]
        u_hat_vecs = tf.reshape(u_hat_vecs,
                                (batch_size, input_num_capsule,
                                 self.out_num_capsule, self.out_dim_capusle))
        u_hat_vecs = tf.transpose(
            u_hat_vecs, perm=[0, 2, 1, 3]
        )  # finally shape = [N0ne,out_num_capsule,input_num_capsule,out_dim_capsule]

        # Dynamic routing
        b = tf.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  #shape = [N0ne,out_num_capsule,input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            output = K.batch_dot(c, u_hat_vecs, [2, 2])
            output = self.activation(output)
            if i < self.routings - 1:
                #                 o = tf.nn.l2_normalize(o,-1)
                b = b + K.batch_dot(output, u_hat_vecs, [2, 3])
        pose = output
        print("pose is :", pose.shape)
        return pose
コード例 #21
0
ファイル: CNN_capsule.py プロジェクト: leakyH/mia
    def call(self, inputs, **kwargs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.
        This change can improve the feature representation of the capsule.
        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to get standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        print(self.routings)
        for i in range(self.routings):
            c = K.softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)
        return o
コード例 #22
0
def cfam_module(input,classes=6,channel=128,channel1=64):
    input_shape = input.get_shape().as_list()
    _,H,W,_ = input_shape
    N = classes
    C = channel
    C1 = channel1
    x = Conv2D(C,3,padding='same',use_bias=False)(input)
    x1 = Conv2D(C1,1,padding='same',use_bias=False)(x)
    x1 = tf.transpose(K.reshape(x1,(-1,H*W,C1)),(0,2,1))
    p = Conv2D(N,1,padding='same',use_bias=False)(x)
    p1 = Activation('softmax')(p)
    p1 = K.reshape(p1,(-1,H*W,N))
    A = K.batch_dot(x1,p1)
    A = Activation('softmax')(A)
    p1 = tf.transpose(p1,(0,2,1))
    x2 = K.batch_dot(A,p1)
    x2 = K.reshape(tf.transpose(x2,(0,2,1)),(-1,H,W,C1))
    x2 = Conv2D(C,(1,1),padding='same',use_bias=False)(x2)
    x2 = BatchNormalization(epsilon=1e-3)(x2)
    x2 = Activation('relu')(x2)
    x3 = Concatenate()([x2,x])
    y = Conv2D(C,(1,1),padding='same',use_bias=False)(x3)
    y = BatchNormalization(epsilon=1e-3)(y)
    y = Activation('relu')(y)

    return y
コード例 #23
0
    def call(self, x, **kwargs):
        assert isinstance(x, list)
        inp_a, inp_b = x

        outp_a = K.l2_normalize(inp_a, -1)
        outp_b = K.l2_normalize(inp_b, -1)
        alpha = K.batch_dot(outp_b, outp_a, axes=[2, 2])
        alpha = K.l2_normalize(alpha, 1)
        alpha = K.one_hot(K.argmax(alpha, 1), K.int_shape(inp_a)[1])
        hmax = K.batch_dot(alpha, outp_b, axes=[1, 1])
        kcon = K.eye(K.int_shape(inp_a)[1], dtype='float32')

        m = []
        for i in range(self.output_dim):
            outp_a = inp_a * self.W[i]
            outp_hmax = hmax * self.W[i]
            outp_a = K.l2_normalize(outp_a, -1)
            outp_hmax = K.l2_normalize(outp_hmax, -1)
            outp = K.batch_dot(outp_hmax, outp_a, axes=[2, 2])
            outp = K.sum(outp * kcon, -1, keepdims=True)
            m.append(outp)
        if self.output_dim > 1:
            persp = K.concatenate(m, 2)
        else:
            persp = m[0]
        return [persp, persp]
コード例 #24
0
 def call(self, x):
     # 如果只传入Q_seq,K_seq,V_seq,那么就不做Mask
     # 如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask
     if len(x) == 3:
         Q_seq, K_seq, V_seq = x
         Q_len, V_len = None, None
     elif len(x) == 5:
         Q_seq, K_seq, V_seq, Q_len, V_len = x
     # 对Q、K、V做线性变换
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.head_dim))
     Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.head_dim))
     K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.head_dim))
     V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))
     # 计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.head_dim ** 0.5
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = K.softmax(A)
     # 输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
     O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     return O_seq
コード例 #25
0
def mpgm_loss(target, prediction, l_A=1., l_E=1., l_F=1.):
    """
    Loss function using max-pooling graph matching as describes in the GraphVAE paper.
    Lets see if backprop works. Args obvly the same as above!
    """
    A, E, F = target
    A_hat, E_hat, F_hat = prediction
    n = A.shape[1]
    k = A_hat.shape[1]
    mpgm = MPGM()
    X = tf.cast(mpgm.call(A, A_hat, E, E_hat, F, F_hat), dtype=tf.float64)

    # now comes the loss part from the paper:
    A_t = tf.transpose(X, perm=[0, 2, 1]) @ A @ X  # shape (bs,k,n)
    E_hat_t = tf.transpose(batch_dot(batch_dot(X, E_hat, axes=(-1, 1)),
                                     X,
                                     axes=(-2, 1)),
                           perm=[0, 1, 3, 2])
    F_hat_t = tf.matmul(X, F_hat)
    # To avoid inf or nan errors we add the smallest possible value to all elements.
    A_hat_4log = add_e7(A_hat)

    term_1 = (1 / k) * tf.math.reduce_sum(
        diag_part(A_t) * tf.math.log(diag_part(A_hat_4log)), [1],
        keepdims=True)

    term_2 = tf.reduce_sum(
        (tf.ones_like(diag_part(A_t)) - diag_part(A_t)) *
        (tf.ones_like(diag_part(A_hat)) - tf.math.log(diag_part(A_hat_4log))),
        [1],
        keepdims=True)

    # TODO unsure if (1/(k*(1-k))) or ((1-k)/k) ??? Also the second sum in the paper is confusing. I am going to interpret it as matrix multiplication and sum over all elements.
    b = diag_part(A_t)
    term_31 = set_diag(A_t, tf.zeros_like(diag_part(A_t))) * set_diag(
        tf.math.log(A_hat_4log), tf.zeros_like(diag_part(A_hat)))
    term_31 = replace_nan(term_31)  # You know why!

    term_32 = tf.ones_like(A_t) - set_diag(A_t, tf.zeros_like(
        diag_part(A_t))) * tf.math.log(
            tf.ones_like(A_t) -
            set_diag(A_hat_4log, tf.zeros_like(diag_part(A_hat))))
    term_32 = replace_nan(term_32)
    term_3 = (1 / k * (1 - k)) * tf.expand_dims(
        tf.math.reduce_sum(term_31 + term_32, [1, 2]), -1)
    log_p_A = term_1 + term_2 + term_3

    # Man so many confusions: is the log over one or both Fs???
    F = tf.cast(F, dtype=tf.float64)
    A = tf.cast(A, dtype=tf.float64)
    E = tf.cast(E, dtype=tf.float64)
    log_p_F = (1 / n) * tf.math.log(
        tf.expand_dims(tf.math.reduce_sum(add_e7(F * F_hat_t), [1, 2]), -1))

    log_p_E = tf.math.log(
        tf.expand_dims((1 / (tf.norm(A, ord='fro', axis=[-2, -1]) - n)) *
                       tf.math.reduce_sum(add_e7(E * E_hat_t), [1, 2, 3]), -1))

    log_p = -l_A * log_p_A - l_F * log_p_F - l_E * log_p_E
    return log_p
コード例 #26
0
def acf_module(coarse_input, feature_map):
    input_shape = coarse_input.get_shape().as_list()
    _, H, W, N = input_shape

    coarse = tf.transpose(K.reshape(coarse_input, (-1, H * W, N)), (0, 2, 1))
    C = 64
    x = Conv2D(C, (1, 1),
               padding='same',
               use_bias=False,
               activation=None,
               name='feature_map_conv1')(feature_map)
    x = BatchNormalization(name='feature_map_conv1_BN')(x)
    x = Activation(tf.nn.relu)(x)
    x = Dropout(0.1)(x)
    x = K.reshape(x, (-1, H * W, C))

    x = K.batch_dot(coarse, x)
    x = tf.subtract(K.max(x, axis=-1, keepdims=True), x)
    x = tf.nn.softmax(x, axis=-1)

    x = tf.transpose(x, (0, 2, 1))
    x = K.batch_dot(x, coarse)

    x = tf.transpose(x, (0, 2, 1))
    x = K.reshape(x, (-1, H, W, C))

    x = Conv2D(C, (1, 1),
               padding='same',
               use_bias=False,
               activation=None,
               name='feature_map_conv2')(x)

    return x
コード例 #27
0
    def call(self, x, mask=None):
        q, k, v = x
        d_k = q.shape.as_list()[2]

        # in pure tensorflow:
        # weights = tf.matmul(x_batch, tf.transpose(y_batch, perm=[0, 2, 1]))
        # normalized_weights = tf.nn.softmax(weights/scaling)
        # output = tf.matmul(normalized_weights, x_batch)

        weights = K.batch_dot(q, k, axes=[2, 2])

        if mask is not None:
            # add mask weights
            if isinstance(mask, (list, tuple)):
                if len(mask) > 0:
                    raise ValueError(
                        "mask can only be a Tensor or a list of length 1 containing a tensor."
                    )

                mask = mask[0]

            weights += -1e10 * (1 - mask)

        normalized_weights = K.softmax(weights / np.sqrt(d_k))
        output = K.batch_dot(normalized_weights, v)

        if self._return_attention:
            return [output, normalized_weights]
        else:
            return output
コード例 #28
0
 def call(self, x):
     
     if len(x) == 3:#解析传入的入Q_seq,K_seq,V_seq
         Q_seq,K_seq,V_seq = x
         Q_len,V_len = None,None
     elif len(x) == 5:#Q_len,V_len为mask的长度
         Q_seq,K_seq,V_seq,Q_len,V_len = x
         
     print("Q_seq------------------",Q_seq)
     #对Q、K、V做线性变换,一共做nb_head次,每次线性变化成size_per_head维度
     Q_seq = K.dot(Q_seq, self.WQ)#查询
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))#相当于transpose,排列各维度的顺序 shape=(4,)
     K_seq = K.dot(K_seq, self.WK)#键
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0,2,1,3))#shape=(4,)
     V_seq = K.dot(V_seq, self.WV)#值
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0,2,1,3))
     #计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3,3]) / self.size_per_head**0.5#attention_11/Shape_12:0", shape=(5,)
     ########上句报错
     ########ValueError: Dimension must be 5 but is 4 for 'attention_11/transpose_7' 
     #####在TF1中,A形状为shape=(4,),到了TF2中,A形状变成了(5,)    
     A = K.permute_dimensions(A, (0,3,2,1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0,3,2,1))    
     A = K.softmax(A)
     #输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3,2])
     O_seq = K.permute_dimensions(O_seq, (0,2,1,3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     
     return O_seq
コード例 #29
0
    def call(self, x):
        Q_seq, K_seq, V_seq = x
        Q_len, V_len = None, None
        print("build attention")

        Q_seq = K.dot(Q_seq, self.WQ)
        Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
        Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))

        K_seq = K.dot(K_seq, self.WK)
        K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
        K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))

        V_seq = K.dot(V_seq, self.WV)
        V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
        V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))

        A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.size_per_head ** 0.5
        A = K.permute_dimensions(A, (0, 3, 2, 1))
        A = self.Mask(A, V_len, "add")
        A = K.permute_dimensions(A, (0, 3, 2, 1))
        A = K.softmax(A)

        O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
        O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
        O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
        O_seq = self.Mask(O_seq, Q_len, "mul")

        return O_seq
コード例 #30
0
 def attention(self, q, k, v, training=None) -> KTensor:
     ndim = K.cast(K.shape(q)[-1], dtype=K.floatx())
     product = K.batch_dot(q, k, axes=(2, 2))
     weights = K.softmax(product / K.sqrt(ndim))
     if self.regularise:
         self.add_regularisation(weights)
     weights_dropout = ops.apply_dropout(self.dropout, weights, training)
     return K.batch_dot(weights_dropout, v)