Exemple #1
0
        def normalize_func(mean_batch, variance_batch):
            mean_batch = K.reshape(mean_batch, broadcast_shape)
            variance_batch = K.reshape(variance_batch, broadcast_shape)

            mean_weights = K.softmax(self.mean_weights, axis=0)
            variance_weights = K.softmax(self.variance_weights, axis=0)

            mean = (mean_weights[0] * mean_instance +
                    mean_weights[1] * mean_layer +
                    mean_weights[2] * mean_batch)

            variance = (variance_weights[0] * variance_instance +
                        variance_weights[1] * variance_layer +
                        variance_weights[2] * variance_batch)

            outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

            if self.scale:
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                outputs = outputs * broadcast_gamma

            if self.center:
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                outputs = outputs + broadcast_beta

            return outputs
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        #xw = K.reshape(K.dot(x[0], K.reshape(self.W, (features_dim, features_dim))), (-1, features_dim))
        #yavg=K.reshape(K.mean(K.mean(x[1], axis=1, keepdims=True),axis=0, keepdims=True), (features_dim,-1))
        xw1 = K.dot(x[0], K.reshape(self.W1, (features_dim, features_dim)))
        xw2 = K.dot(x[1], K.reshape(self.W2, (features_dim, features_dim)))
        xw1t = K.permute_dimensions(xw1, [0, 2, 1])
        xw2t = K.permute_dimensions(xw2, [0, 2, 1])
        xw11 = K.batch_dot(xw1, xw1t) / (step_dim**0.5)
        xw12 = K.batch_dot(xw1, xw2t) / (step_dim**0.5)

        s11 = self.ll * K.softmax(xw11)
        s12 = (1 - self.ll) * K.softmax(xw12)

        eij = s11 + s12
        print(eij.get_shape())
        V = x[0] * K.mean(eij, axis=2, keepdims=True)
        if self.get_alpha:
            return eij
        else:
            if self.get_sequence:
                return V
            else:
                return K.sum(V, axis=1)
Exemple #3
0
def modified_kd_targets_from_logits(train_logits, test_logits, temp=1):
    # create soft targets from loaded logits
    if temp <= 0:
        temp = 1
    train_logits_t = train_logits / temp
    test_logits_t = test_logits / temp
    Y_train_soft = K.softmax(train_logits_t)
    Y_test_soft = K.softmax(test_logits_t)
    sess = K.get_session()
    Y_train_soft = sess.run(Y_train_soft)
    Y_test_soft = sess.run(Y_test_soft)
    return Y_train_soft, Y_test_soft
Exemple #4
0
def modified_kd_targets_from_logits(Y_train, Y_test, train_logits, test_logits,
                                    temp):
    # create soft targets from loaded logits
    train_logits_t = train_logits / temp
    test_logits_t = test_logits / temp
    Y_train_soft = K.softmax(train_logits_t)
    Y_test_soft = K.softmax(test_logits_t)
    sess = K.get_session()
    Y_train_soft = sess.run(Y_train_soft)
    Y_test_soft = sess.run(Y_test_soft)
    # concatenate hard and soft targets to create the knowledge distillation targets
    Y_train_new = np.concatenate([Y_train, Y_train_soft], axis=1)
    Y_test_new = np.concatenate([Y_test, Y_test_soft], axis=1)
    return Y_train_new, Y_test_new
Exemple #5
0
    def call(self, inputs, mask=None):
        if mask is not None:

            adder = (math_ops.cast(mask, inputs.dtype)) * (
                _large_compatible_negative(inputs.dtype))

            inputs += adder
        if isinstance(self.axis, (tuple, list)):
            if len(self.axis) > 1:
                return math_ops.exp(inputs - math_ops.reduce_logsumexp(
                    inputs, axis=self.axis, keepdims=True))
            else:
                return K.softmax(inputs, axis=self.axis[0])
        return K.softmax(inputs, axis=self.axis)
Exemple #6
0
        def energy_step(inputs, states):

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))

            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg

            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s > ', W_a_dot_s.shape)

            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # (batch_size, 1, latent_dim)
            if verbose:
                print('Ua.h > ', U_a_dot_h.shape)

            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh > ', reshaped_Ws_plus_Uh.shape)

            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            e_i = K.softmax(e_i)
            if verbose:
                print('ei > ', e_i.shape)

            return e_i, [e_i]
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            return e_i, [e_i]
Exemple #8
0
def simple_context(X, mask):
    """
    Simple context calculation layer logic
    X = (batch_size, time_steps, units)
    time_steps are nothing but number of words in our case.
    """
    # segregrate heading and desc
    desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters.max_len_desc:, :]
    # segregrate activation and context part
    head_activations, head_words = head[:, :, :parameters.activation_rnn_size], head[:, :, parameters.activation_rnn_size:]
    desc_activations, desc_words = desc[:, :, :parameters.activation_rnn_size], desc[:, :, parameters.activation_rnn_size:]

    # p=(bacth_size, length_desc_words, rnn_units)
    # q=(bacth_size, length_headline_words, rnn_units)
    # K.dot(p,q) = (bacth_size, length_desc_words,length_headline_words)
    activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))

    # make sure we dont use description words that are masked out
    activation_energies = activation_energies + -1e20 * K.expand_dims(1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1)

    # for every head word compute weights for every desc word
    activation_energies = K.reshape(activation_energies, (-1, parameters.max_len_desc))
    activation_weights = K.softmax(activation_energies)
    activation_weights = K.reshape(activation_weights, (-1, parameters.max_len_head, parameters.max_len_desc))

    # for every head word compute weighted average of desc words
    desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
    return K.concatenate((desc_avg_word, head_words))
Exemple #9
0
        def energy_step(decode_outs, states):  # decode_outs(batch,dim)

            # decoder_seq [N,30,512] 30是字符串长度
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]  # 30, 512
            de_hidden = decode_outs.shape[-1]
            #  W * h_j
            reshaped_enc_outputs = K.reshape(
                encoder_out_seq, (-1, en_hidden))  #[b,64,512]=> [b*64,512]

            # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512]
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))

            # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512]    => [b,1,512]
            U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a),
                                      axis=1)  # <= batch_size, 1, latent_dim

            # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程

            # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512]
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))

            # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64]
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))

            e_i = K.softmax(e_i)

            return e_i, [e_i]
Exemple #10
0
    def call(self, inputs, mask=None):
        # output = softmax(score)
        k, q = inputs
        if len(q.shape) == 2:
            q = K.expand_dims(q, axis=1)
        # k: (?, K_LEN, EMBED_DIM,)
        # q: (?, Q_LEN, EMBED_DIM,)
        # score: (?, Q_LEN, K_LEN,)
        if self.score_function == 'scaled_dot_product':
            kt = K.permute_dimensions(k, (0, 2, 1))
            qkt = K.batch_dot(q, kt)
            score = qkt / self.EMBED_DIM
        elif self.score_function == 'mlp':
            kq = K.concatenate([k, q], axis=1)
            kqw2 = K.tanh(K.dot(kq, self.W2))
            score = K.permute_dimensions(K.dot(self.W1, kqw2), (1, 0, 2))
        elif self.score_function == 'bi_linear':
            qw = K.dot(q, self.W)
            kt = K.permute_dimensions(k, (0, 2, 1))
            score = K.batch_dot(qw, kt)
        else:
            raise RuntimeError('invalid score_function')
        score = K.softmax(score)
        # if mask is not None:
        #     score *= K.cast(mask[0], K.floatx())
        # output: (?, Q_LEN, EMBED_DIM,)
        output = K.batch_dot(score, k)

        return output
    def call(self, inputs, **kwargs):
        inputs = inputs if isinstance(inputs, list) else [inputs]

        if len(inputs) < 1 or len(inputs) > 2:
            raise ValueError("AttentionLayer expect one or two inputs.")

        actual_input = inputs[0]
        mask = inputs[1] if len(inputs) > 1 else None
        if mask is not None and not (
            ((len(mask.shape) == 3 and mask.shape[2] == 1)
             or len(mask.shape) == 2) and mask.shape[1] == self.input_length):
            raise ValueError(
                "`mask` should be of shape (batch, input_length) or (batch, input_length, 1) "
                "when calling an AttentionLayer.")

        assert actual_input.shape[-1] == self.attention_param.shape[0]

        # (batch, input_length, input_dim) * (input_dim, 1) ==> (batch, input_length, 1)
        attention_weights = K.dot(actual_input, self.attention_param)

        if mask is not None:
            if len(mask.shape) == 2:
                mask = K.expand_dims(mask, axis=2)  # (batch, input_length, 1)
            mask = K.log(mask)
            attention_weights += mask

        attention_weights = K.softmax(attention_weights,
                                      axis=1)  # (batch, input_length, 1)
        result = K.sum(
            actual_input * attention_weights,
            axis=1)  # (batch, input_length)  [multiplication uses broadcast]
        return result, attention_weights
Exemple #12
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            # input: (batch_size, latent_dim)
            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Computing sj.Ua """
            # (batch_size, 1, d3)
            U_a_dot_s = K.expand_dims(K.dot(inputs, self.U_a), 1)
            if verbose:
                print('Ua.h>', K.int_shape(U_a_dot_s))
            """ tanh(h.Wa + s.Ua) """
            # (batch_size, h1*h2*...*hn, d3) = (batch_size, h1*h2*...*hn, d3) + (batch_size, 1, d3)
            Wh_plus_Us = K.tanh(W_hi + U_a_dot_s)
            # (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us = K.permute_dimensions(Wh_plus_Us, (0, 2, 1))
            if verbose:
                print('Wh+Us>', K.int_shape(Wh_plus_Us))
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # (1, batch_size, h1*h2*...*hn) = (1, d3) . (batch_size, d3, h1*h2*...*hn)
            Wh_plus_Us_dot_Va = K.dot(self.V_a, Wh_plus_Us)
            # (batch_size, h1*h2*...*hn)
            e_i = K.squeeze(Wh_plus_Us_dot_Va, 0)
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', K.int_shape(e_i))

            # (batch_size, h1*h2*...*hn)
            return e_i, states
Exemple #13
0
def simple_context(X, mask):

    desc, head = X[:, :parameters.max_len_desc, :], X[:, parameters.
                                                      max_len_desc:, :]

    head_activations, head_words = head[:, :, :parameters.
                                        activation_rnn_size], head[:, :,
                                                                   parameters.
                                                                   activation_rnn_size:]
    desc_activations, desc_words = desc[:, :, :parameters.
                                        activation_rnn_size], desc[:, :,
                                                                   parameters.
                                                                   activation_rnn_size:]

    activation_energies = K.batch_dot(head_activations,
                                      desc_activations,
                                      axes=(2, 2))

    activation_energies = activation_energies + -1e20 * K.expand_dims(
        1. - K.cast(mask[:, :parameters.max_len_desc], 'float32'), 1)

    activation_energies = K.reshape(activation_energies,
                                    (-1, parameters.max_len_desc))
    activation_weights = K.softmax(activation_energies)
    activation_weights = K.reshape(
        activation_weights,
        (-1, parameters.max_len_head, parameters.max_len_desc))

    desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
    return K.concatenate((desc_avg_word, head_words))
Exemple #14
0
    def call(self, inputs):
        X = inputs[0]  # Node features (N x F)
        A = inputs[1]  # Adjacency matrix (N x N)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F")
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F" x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F")

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearty
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense)  # (N x N)

            # Apply dropout to features and attention coefficients
            dropout_attn = Dropout(self.dropout_rate)(dense)  # (N x N)
            dropout_feat = Dropout(self.dropout_rate)(features)  # (N x F")

            # Linear combination with neighbors" features
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F")

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            if self.attn_heads_reduction == "concat":
                # If "concat", compute the activation here (Eq. 5)
                node_features = self.activation(node_features)

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads" output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF")
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F")

        output = self.activation(output)
        return output
Exemple #15
0
 def call(self, x, mask=None):
     energy = self.activation(K.dot(x, self.W0) + self.b0)
     #energy=self.activation(K.dot(energy, self.W) + self.b)
     energy = K.dot(energy, self.W) + self.b
     energy = K.reshape(energy, (-1, self.input_length))
     energy = K.softmax(energy)
     xx = K.batch_dot(energy, x, axes=(1, 1))
     all = K.concatenate([xx, energy])
     return all
Exemple #16
0
 def call(self, inputs, mask=None):
    
     x = K.permute_dimensions(inputs, (0, 2, 1))
    
     a = K.softmax(K.tanh(K.dot(x, self.W)))
     a = K.permute_dimensions(a, (0, 2, 1))
     outputs = a * inputs
     outputs = K.sum(outputs, axis=1)
     return outputs
  def call(self, inputs, mask=None):
    if mask is not None:
      # Since mask is 1.0 for positions we want to keep and 0.0 for
      # masked positions, this operation will create a tensor which is 0.0 for
      # positions we want to attend and -1e.9 for masked positions.
      adder = (1.0 - math_ops.cast(mask, inputs.dtype)) * (
          _large_compatible_negative(inputs.dtype))

      # Since we are adding it to the raw scores before the softmax, this is
      # effectively the same as removing these entirely.
      inputs += adder
    if isinstance(self.axis, (tuple, list)):
      if len(self.axis) > 1:
        return math_ops.exp(inputs - math_ops.reduce_logsumexp(
            inputs, axis=self.axis, keepdims=True))
      else:
        return K.softmax(inputs, axis=self.axis[0])
    return K.softmax(inputs, axis=self.axis)
Exemple #18
0
 def softmax(x, axis=1):
     ndim = K.ndim(x)
     if ndim == 2:
         return K.softmax(x)
     elif ndim > 2:
         e = K.exp(x - K.max(x, axis=axis, keepdims=True))
         s = K.sum(e, axis=axis, keepdims=True)
         return e / s
     else:
         raise ValueError('Cannot apply softmax to a tensor that is 1D')
Exemple #19
0
 def call(self, inputs, training=None, mask=None):
     q = inputs[0]
     k = inputs[1]
     v = inputs[2]
     qkTensor = K.math_ops.matmul(q,k,transpose_b=True)
     scaleTensor = K.math_ops.multiply(K.stop_gradient(1. / K.math_ops.sqrt(self.dk)) , qkTensor)
     softMaxTensor =  K.softmax(scaleTensor)
     drT = self.dropout(softMaxTensor,training = training)
     vTensor = K.math_ops.matmul(drT,v)
     return vTensor
Exemple #20
0
    def loss(y_true, y_pred):

        loss_val = -1 * K.sum(
            K.log(K.softmax(y_pred[:, :-1])) * y_true[:, :-1], axis=-1)

        return K.mean(
            K.switch(
                K.equal(task, 1005), loss_weights[task] * loss_val,
                K.switch(K.equal(y_true[:, -1], task), loss_val,
                         loss_weights[task] * loss_val)))
Exemple #21
0
 def call(self, inputs, mask=None):
     # inputs.shape = (batch_size, time_steps, seq_len)
     x = K.permute_dimensions(inputs, (0, 2, 1))
     # x.shape = (batch_size, seq_len, time_steps)
     # general
     a = K.softmax(K.tanh(K.dot(x, self.W)))
     a = K.permute_dimensions(a, (0, 2, 1))
     outputs = a * inputs
     outputs = K.sum(outputs, axis=1)
     return outputs
Exemple #22
0
    def call(self, inputs, **kwargs):
        query, values, keys = inputs

        hidden_with_time_axis = K.expand_dims(query, 1)
        score = self.attention_variable(
            K.tanh(keys + self.query_layer(hidden_with_time_axis))
        )  # TODO Mask option for score with infinity
        alignment = K.softmax(score, axis=1)
        attention = alignment * values
        alignment = K.squeeze(alignment, axis=2)
        attention = K.sum(attention, axis=1)

        return attention, alignment
def convert_logits_to_soft_targets(temp, teacher_train_logits,
                                   teacher_test_logits, Y_train, Y_test):
    # softmax at raised temperature
    train_logits_T = teacher_train_logits / temp
    test_logits_T = teacher_test_logits / temp
    Y_train_soft = K.softmax(train_logits_T)
    Y_test_soft = K.softmax(test_logits_T)
    sess = K.get_session()
    Y_train_soft = sess.run(Y_train_soft)
    Y_test_soft = sess.run(Y_test_soft)

    # # TODO remove if negative test feedback!
    # Y_train_soft, Y_test_soft = normalizeStudentSoftTargets(Y_train_soft, Y_test_soft)
    # for i in range(0, len(Y_train_soft)):
    #     Y_train_soft[i] = (1 / find_largest_value(Y_train_soft[i])) * Y_train_soft[i]
    # for i in range(0, len(Y_test_soft)):
    #     Y_test_soft[i] = (1 / find_largest_value(Y_test_soft[i])) * Y_test_soft[i]

    # Concatenate so that this becomes a (num_classes + num_classes) dimensional vector
    Y_train_new = np.concatenate([Y_train, Y_train_soft], axis=1)
    Y_test_new = np.concatenate([Y_test, Y_test_soft], axis=1)
    return Y_train_new, Y_test_new
Exemple #24
0
def FCN(input_shape):
    
    
    vgg16_model = VGG16(weights = 'imagenet', include_top = False, input_shape = input_shape);
    
    #Sq_net = squeezenet(float(input_shape));
    fire8 = extract_layer_from_model(vgg16_model, layer_name = 'block4_pool');
    
    pool8 = MaxPooling2D((3,3), strides = (2,2), name = 'pool8')(fire8.output);
    
    fc1 = Conv2D(64, (6,6), strides= (1, 1), padding = 'same', name = 'fc1')(pool8);
    
    fc1 = Dropout(rate = 0.5)(fc1);
    
    
    if SEPERATE_CONFIDENCE:
        fc2 = Conv2D(4 , (1, 1), strides = (1, 1), padding = 'same', activation = 'relu', name = 'fc2')(fc1);
        rgb = K.l2_normalize(fc2[:, :, :, 0:3], axis = 3);
        w, h = map(int, fc2.get_shape()[1:3]);
        
        confidence = fc2[:, :, :, 3:4];
        confidence = np.reshape(confidence, [-1, w*h]);
        confidence = K.softmax(confidence);
        confidence = np.reshape(confidence, shape=[-1, w, h, 1]);
        
        fc2 = rgb * confidence;
        
    else:
        fc2 = Conv2D(3, (1, 1), strides = (1, 1), padding = 'same', name = 'fc2')(fc1);
    
    fc2 = Activation('relu')(fc2);
    
    fc2 = Conv2D(3, (15, 15), padding = 'valid', name = 'fc_pooling')(fc2);
    
    
    def norm(fc2):
        
        fc2_norm = K.l2_normalize(fc2, axis = 3);
        illum_est = K.tf.reduce_sum(fc2_norm, axis = (1, 2));
        illum_est = K.l2_normalize(illum_est);
        
        return illum_est;
    
    #illum_est = Dense(3)(fc2);
    
    illum_est = Lambda(norm)(fc2);
    
    
    FCN_model = Model(inputs = vgg16_model.input, outputs = illum_est, name = 'FC4');
    
    return FCN_model;
Exemple #25
0
def selfattoptions(args):
    q = args[0]
    k = args[1]
    v = args[2]

    q = tf.expand_dims(q, -1)
    k = tf.expand_dims(k, -1)
    v = tf.expand_dims(v, -1)

    QK = K.batch_dot(q, K.permute_dimensions(k, [0, 2, 1]))
    QK = QK / (20**0.5)
    QK = K.softmax(QK)
    MV = K.batch_dot(QK, v)
    MV = tf.squeeze(MV, -1)
    return MV
Exemple #26
0
        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(
                states, type(states))
            assert isinstance(states, list) or isinstance(states,
                                                          tuple), assert_msg
            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]
            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>', W_a_dot_s.shape)
            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a),
                                      1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h >', U_a_dot_h.shape)
                print('U_a >', self.U_a.shape)
                print('inputs.shape >', inputs.shape)
            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)
            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, K.tanh(self.V_a)),
                            (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)
                K.print_tensor(reshaped_Ws_plus_Uh,
                               message='reshaped_Ws_plus_Uh')
                K.print_tensor(self.V_a, message='V_a')
                K.print_tensor(e_i, message='e_i')

            return e_i, [e_i]
 def _body(i, logits, activations):
     """Routing while loop."""
     # route: [batch, input_dim, output_dim, ...]
     # route = tf.nn.softmax(logits, dim=-1)
     route = K.softmax(logits)
     preactivate_unrolled = route * votes_trans
     preact_trans = tf.transpose(preactivate_unrolled, r_t_shape)
     preactivate = tf.reduce_sum(preact_trans, axis=1) + biases
     activation = _squash(preactivate)
     activations = activations.write(i, activation)
     act_3d = K.expand_dims(activation, 1)
     tile_shape = np.ones(num_dims, dtype=np.int32).tolist()
     tile_shape[1] = input_dim
     act_replicated = tf.tile(act_3d, tile_shape)
     distances = tf.reduce_sum(votes * act_replicated, axis=-1)
     logits += distances
     return (i + 1, logits, activations)
Exemple #28
0
    def call(self, x):

        row = []
        col = []

        # 对特征进行两两组合
        for r, c in combinations(x, 2):  # [field * (field - 1)] / 2
            row.append(r)
            col.append(c)

        p = K.concatenate(
            row,
            axis=1)  # [batch_size, [field * (field - 1)] / 2, embedding_size]
        q = K.concatenate(col, axis=1)

        inner_product = p * q  # 对应元素相乘
        # 添加非线性, 进行激活
        attention_tmp = K.relu(
            K.bias_add(K.dot(inner_product, self.attention_W),
                       self.attention_b))
        # [batch_size, [field * (field - 1)] / 2, embedding_size] * [embedding_size, attention_units]  = > [batch_size, [field * (field - 1)] / 2, attention_units]

        # context 向量
        attention_tmp_dot = K.dot(
            attention_tmp,
            self.projection_h)  # [batch_size, [field * (field - 1)] / 2, 1]

        # 计算的是一个样本的sofmax, sum的是一个样本的所有特征
        attention_weight = K.softmax(
            attention_tmp_dot, axis=1
        )  # 等价于  K.exp(attention_tmp_dot) / K.sum(attention_tmp_dot, axis=1, keepdims=True)
        # [batch_size, [field * (field - 1)] / 2, 1]

        # 权重乘以内积
        attention_output = K.sum(inner_product * attention_weight,
                                 axis=1)  # [batch_size, embedding_size]

        # 经过dropout操作
        attention_output = K.dropout(
            attention_output,
            self.dropout_rate)  # [batch_size, embedding_size]

        # 等价于dense层
        afm_out = K.dot(attention_output, self.projection_p)  # [batch_size, 1]

        return afm_out
Exemple #29
0
    def call(self, inputs, mask=None):
        '''
        :param inputs: a list of tensor of length not larger than 2, or a memory tensor of size BxTXD1.
        If a list, the first entry is memory, and the second one is query tensor of size BxD2 if any
        :param mask: the masking entry will be directly discarded
        :return: a tensor of size BxD1, weighted summing along the sequence dimension
        '''
        if isinstance(inputs, list) and len(inputs) == 2:
            memory, query = inputs
            if self.method is None:
                return memory[:, -1, :]
            elif self.method == 'cba':
                hidden = K.dot(memory, self.Wh) + K.expand_dims(K.dot(query, self.Wq), 1)
                hidden = K.tanh(hidden)
                s = K.squeeze(K.dot(hidden, self.v), -1)
            elif self.method == 'ga':
                s = K.sum(K.expand_dims(K.dot(query, self.Wq), 1) * memory, axis=-1)
            else:
                s = K.squeeze(K.dot(memory, self.v), -1)
            if mask is not None:
                mask = mask[0]
        else:
            if isinstance(inputs, list):
                if len(inputs) != 1:
                    raise ValueError('inputs length should not be larger than 2')
                memory = inputs[0]
            else:
                memory = inputs
            if self.method is None:
                return memory[:, -1, :]
            elif self.method == 'cba':
                hidden = K.dot(memory, self.Wh)
                hidden = K.tanh(hidden)
                s = K.squeeze(K.dot(hidden, self.v), -1)
            elif self.method == 'ga':
                raise ValueError('general attention needs the second input')
            else:
                s = K.squeeze(K.dot(memory, self.v), -1)

        s = K.softmax(s)
        if mask is not None:
            s *= K.cast(mask, dtype='float32')
            sum_by_time = K.sum(s, axis=-1, keepdims=True)
            s = s / (sum_by_time + K.epsilon())
        return K.sum(memory * K.expand_dims(s), axis=1)
Exemple #30
0
    def call(self, x, mask=None):
        '''
        i_emb:     [Batch_size, Hidden_units]
        hist_emb:        [Batch_size, max_len, Hidden_units]
        hist_len: [Batch_size]
        '''
        assert len(x) == 3

        i_emb, hist_emb, hist_len = x[0], x[1], x[2]
        hidden_units = K.int_shape(hist_emb)[-1]
        max_len = tf.shape(hist_emb)[1]

        i_emb = tf.tile(i_emb,
                        [1, max_len])  # (batch_size, max_len * hidden_units)
        i_emb = tf.reshape(
            i_emb,
            [-1, max_len, hidden_units])  # (batch_size, max_len, hidden_units)
        concat = K.concatenate(
            [i_emb, hist_emb, i_emb - hist_emb, i_emb * hist_emb],
            axis=2)  # (batch_size, max_len, hidden_units * 4)

        for i in range(len(self.attention_hidden_units)):
            activation = None if i == 2 else self.attention_activation
            outputs = keras.layers.Dense(self.attention_hidden_units[i],
                                         activation=activation)(concat)
            concat = outputs

        outputs = tf.reshape(outputs,
                             [-1, 1, max_len])  # (batch_size, 1, max_len)

        if self.supports_masking:
            mask = tf.sequence_mask(hist_len,
                                    max_len)  # (batch_size, 1, max_len)
            padding = tf.ones_like(outputs) * (-1e12)
            outputs = tf.where(mask, outputs, padding)

        # 对outputs进行scale
        outputs = outputs / (hidden_units**0.5)
        outputs = K.softmax(outputs)

        outputs = tf.matmul(outputs, hist_emb)  # batch_size, 1, hidden_units)

        outputs = tf.squeeze(outputs)  # (batch_size, hidden_units)

        return outputs
 def call(self, inputs):
   return K.softmax(inputs, axis=self.axis)