def loss(y_true, y_pred):
        y_soft = K.softmax(old_logits / temp)

        logits_pred = new_logits[:, :old_classes]
        y_pred_soft = K.softmax(logits_pred / temp)

        return sparselogloss(y_true, y_pred) + L * logloss(y_soft, y_pred_soft)
Exemple #2
0
    def call(self, x):
        f = K.conv2d(x, kernel=self.kernel_f, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        g = K.conv2d(x, kernel=self.kernel_g, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']
        h = K.conv2d(x, kernel=self.kernel_h, strides=(1, 1),
                     padding='same')  # [bs, h, w, c']

        f_ = K.permute_dimensions(self._hw_flatten(f),
                                  (0, 2, 1))  # [bs, 3c', N]
        s = K.batch_dot(self._hw_flatten(g), f_)  # [bs, N, N]
        beta = K.softmax(s, axis=-1)  # attention map

        double_attn = K.batch_dot(f_, self._hw_flatten(x))  # [bs, 3c', 3c]
        double_attn = K.softmax(double_attn, axis=1)

        h_tmp, shape_tmp = self._hw_flatten(h,
                                            return_shape=True)  # [bs, N, 3c']
        o_tmp = K.batch_dot(beta, h_tmp)  # [bs, N, 3c']
        o = K.batch_dot(o_tmp, double_attn)  # [bs, N, 3c]
        o = self._hw_recover(o, shape_tmp)  # [bs, h, w, C]

        x = self.gamma * o + x

        return x
Exemple #3
0
 def __call__(self, x):
     regularization = 0.
     if self.l1:
         regularization += self.l1 * K.sum(K.softmax(x))
     if self.l2:
         regularization += self.l2 * K.sum(K.square(K.softmax(x)))
     return regularization
Exemple #4
0
        def normalize_func(mean_batch, variance_batch):
            mean_batch = K.reshape(mean_batch, broadcast_shape)
            variance_batch = K.reshape(variance_batch, broadcast_shape)

            mean_weights = K.softmax(self.mean_weights, axis=0)
            variance_weights = K.softmax(self.variance_weights, axis=0)

            mean = (mean_weights[0] * mean_instance +
                    mean_weights[1] * mean_layer +
                    mean_weights[2] * mean_batch)

            variance = (variance_weights[0] * variance_instance +
                        variance_weights[1] * variance_layer +
                        variance_weights[2] * variance_batch)

            outputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

            if self.scale:
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                outputs = outputs * broadcast_gamma

            if self.center:
                broadcast_beta = K.reshape(self.beta, broadcast_shape)
                outputs = outputs + broadcast_beta

            return outputs
Exemple #5
0
    def call(self, x):
        # Input is a 3-D or 4-D Tensor
        ndim = K.ndim(x)
        if ndim == 4:
            dims = K.int_shape(x)
            x = K.reshape(x, (-1, dims[1] * dims[2], 1, self.D))
        elif ndim != 3:
            raise ValueError(
                'Encoding input should have shape BxNxD or BxHxWxD')

        # Residual vectors
        R = x - self.codes
        ''' OLD WAY
        _x_i = K.repeat_elements(x, self.K, 1)
        _c_k = K.tile(self.codes, (n, 1))
        R = K.reshape(_x_i - _c_k, (-1, n, self.K, self.D))
        '''

        # Assignment weights, optional dropout
        if self.dropout_rate is not None:
            W_ik = K.softmax(
                scaledL2(R, K.dropout(self.scale, self.dropout_rate)))
        else:
            W_ik = K.softmax(scaledL2(R, self.scale))

        # Aggregation
        E = tf.einsum('bik,bikd->bkd', W_ik, R)

        # Normalize encoding vectors
        if self.l2_normalize:
            E = tf.nn.l2_normalize(E, axis=-1)

        E = tf.layers.Flatten()(E)

        return E
Exemple #6
0
    def fit(self, X, Y=None, val_X=None, val_Y=None, num_epochs=300, batch_size=None, start_temp=10.0,
            min_temp=0.1, tryout_limit=1, class_weight=None):
        if Y is None:
            Y = X
        assert len(X) == len(Y)
        validation_data = None
        if val_X is not None and val_Y is not None:
            assert len(val_X) == len(val_Y)
            validation_data = (val_X, val_Y)

        if batch_size is None:
            batch_size = max(len(X) // 256, 16)

        steps_per_epoch = (len(X) + batch_size - 1) // batch_size

        for i in range(tryout_limit):

            K.set_learning_phase(1)

            inputs = layers.Input(shape=X.shape[1:])

            alpha = np.exp(np.log(min_temp / start_temp) / (num_epochs * steps_per_epoch))

            self.concrete_select = ConcreteSelect(self.K, start_temp, min_temp, alpha, name='concrete_select')

            selected_features = self.concrete_select(inputs)

            outputs = self.output_function(selected_features)

            self.model = models.Model(inputs, outputs)

            self.model.compile(
                loss=LinearSVC.loss_function(loss_function, class_weight),
                optimizer=optimizer_class(lr=initial_lr),
                metrics=[LinearSVC.accuracy]
            )

            print(self.model.summary())

            stopper_callback = StopperCallback()

            hist = self.model.fit(X, Y, batch_size, num_epochs, verbose=0, callbacks=[stopper_callback],
                                  validation_data=validation_data)  # , validation_freq = 10)

            if K.get_value(
                    K.mean(K.max(K.softmax(self.concrete_select.logits, axis=-1)))) >= stopper_callback.mean_max_target:
                break

            num_epochs *= 2

        self.probabilities = K.get_value(K.softmax(self.model.get_layer('concrete_select').logits))
        self.indices = K.get_value(K.argmax(self.model.get_layer('concrete_select').logits))

        return self
def gumbel_softmax(x, tau, from_logits=False, straight_through=False):
    # ref: https://arxiv.org/abs/1611.01144
    eps = 1e-20
    u = K.random_uniform(K.shape(x), eps, 1 - eps)
    if not from_logits:
        x = K.log(K.maximum(eps, x))
    y = x - K.log(-K.log(u))
    if tau > 0:
        if straight_through:
            return combine_value_gradient(hardmax(y),
                                          K.softmax(y / tau, axis=-1))
        else:
            return K.softmax(y / tau, axis=-1)
    else:
        return hardmax(y)
Exemple #8
0
def MultiHeadAttention(l=8 * 8, d=512, dv=64, dim_out=512, nv=8):
    """
    Args:
        l: number of blocks in feature map
        d: dimension of the block
        dv: dimension of linear space to be projected
        nv: number of project for each block
    """
    value_vector_1 = Input(shape=(l, d))
    query_vector_1 = Input(shape=(l, d))
    key_vector_1 = Input(shape=(l, d))

    value_vector_2 = Dense(dv * nv, activation="relu")(value_vector_1)
    query_vector_2 = Dense(dv * nv, activation="relu")(query_vector_1)
    key_vector_2 = Dense(dv * nv, activation="relu")(key_vector_1)

    value = Reshape([l, nv, dv])(value_vector_2)
    query = Reshape([l, nv, dv])(query_vector_2)
    key = Reshape([l, nv, dv])(key_vector_2)

    attention = tf.einsum('baik,baij->bakj', query, key) / np.sqrt(dv)
    attention = Lambda(lambda x: K.softmax(x),
                       output_shape=(l, nv, nv))(attention)
    output = tf.einsum('bajk,baik->baji', attention, value)
    output = Reshape([l, d])(output)

    output = Add()([output, query_vector_1])
    output = Dense(dim_out, activation='relu')(output)

    return Model(inputs=[query_vector_1, key_vector_1, value_vector_1],
                 outputs=output)
Exemple #9
0
 def test_step(inp, tar):
     outputs = 0
     for j in range(args.eva_iter):
         current_batch = net(inp)
         outputs = outputs + K.softmax(current_batch, axis=1)
     outputs = outputs / args.eva_iter
     test_accuracy(tar, outputs)
Exemple #10
0
    def call(self, inputs, **kwargs):
        inputs = inputs if isinstance(inputs, list) else [inputs]

        if len(inputs) < 1 or len(inputs) > 2:
            raise ValueError("AttentionLayerWithBatchNormalization expect one or two inputs.")

        actual_input = inputs[0]
        mask = inputs[1] if len(inputs) > 1 else None
        if mask is not None and not (((len(mask.shape) == 3 and mask.shape[2] == 1) or len(mask.shape) == 2)
                                     and mask.shape[1] == self.input_length):
            raise ValueError("`mask` should be of shape (batch, input_length) or (batch, input_length, 1) "
                             "when calling an AttentionLayerWithBatchNormalization.")

        assert actual_input.shape[-1] == self.attention_param.shape[0]

        # (batch, input_length, input_dim) * (input_dim, 1) ==> (batch, input_length, 1)
        attention_weights = K.dot(actual_input, self.attention_param)

        if mask is not None:
            if len(mask.shape) == 2:
                mask = K.expand_dims(mask, axis=2)  # (batch, input_length, 1)
            mask = K.log(mask)
            attention_weights += mask

        # batch normalization
        attention_weights = BatchNormalization()(attention_weights)

        attention_weights = K.softmax(attention_weights, axis=1)  # (batch, input_length, 1)
        result = K.sum(actual_input * attention_weights, axis=1)  # (batch, input_length)  [multiplication uses broadcast]
        return result
    def step(self, x, states):    
        # x.shape=(1, 512, 30, 40)
        # states : lista de tensores shape=(1, 512, 30, 40)
        h_tm1 = states[0]
        c_tm1 = states[1]
      
        #print("Checkpoint 1--------------")
        e = self.V_a(K.tanh(self.W_a(h_tm1) + self.U_a(x))) #e.shape (1, 1, 30, 40)
        #print("Checkpoint 2--------------")
        a = K.reshape(K.softmax(K.batch_flatten(e)), (x.shape[0], 1, x.shape[2], x.shape[3])) #Nueva version a.shape (1, 1, 30, 40)
        #a = K.reshape(K.softmax(K.batch_flatten(e)), (x_shape[0], 1, x_shape[2], x_shape[3])) 
        #print("Checkpoint 3--------------")
        x_tilde = x * K.repeat_elements(a, x.shape[1], 1) #Nueva version x_tilde.shape=(1, 512, 30, 40)
        #x_tilde = x * K.repeat_elements(a, x_shape[1], 1)
        #print("Checkpoint 4--------------")
        x_i = self.W_i(x_tilde)
        x_f = self.W_f(x_tilde)
        x_c = self.W_c(x_tilde)
        x_o = self.W_o(x_tilde)

        i = self.inner_activation(x_i + self.U_i(h_tm1))
        f = self.inner_activation(x_f + self.U_f(h_tm1))
        c = f * c_tm1 + i * self.activation(x_c + self.U_c(h_tm1))
        o = self.inner_activation(x_o + self.U_o(h_tm1))

        h = o * self.activation(c)
        #print("Dime que llegaste/////////////////////")
        return h, [h, c]
Exemple #12
0
 def kl_divergence(self, other):
     self._check_other(other)
     p_self = K.softmax(self.logits)
     logp_self = log_softmax_tf(self.logits)
     logp_other = log_softmax_tf(other.logits)
     kl_div = tf.einsum('ij,ij->i', p_self, logp_self - logp_other)
     return self._rename(kl_div, 'kl_divergence')
Exemple #13
0
    def step(self, x, states):
        h = states[0]
        # states[1] necessary?

        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])

        x = K.dot(K.concatenate([x, static_x_weighted], 1),
                  self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])

        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states
Exemple #14
0
 def get_monitor_value(self, logs):
     monitor_value = K.get_value(
         K.mean(
             K.max(K.softmax(
                 self.model.get_layer('concrete_select').logits),
                   axis=-1)))
     return monitor_value
Exemple #15
0
    def call(self, inputs):
        if self._masking:
            assert len(
                inputs
            ) == 4, "inputs should be set [queries, keys, values, masks]."
            queries, keys, values, masks = inputs
        else:
            assert len(
                inputs) == 3, "inputs should be set [queries, keys, values]."
            queries, keys, values = inputs

        if K.dtype(queries) != 'float32': queries = K.cast(queries, 'float32')
        if K.dtype(keys) != 'float32': keys = K.cast(keys, 'float32')
        if K.dtype(values) != 'float32': values = K.cast(values, 'float32')

        matmul = K.batch_dot(queries, tf.transpose(keys, [0, 2, 1]))  # MatMul
        scaled_matmul = matmul / int(queries.shape[-1])**0.5  # Scale
        if self._masking:
            scaled_matmul = self.mask(scaled_matmul, masks)  # Mask(opt.)

        if self._future:
            scaled_matmul = self.future_mask(scaled_matmul)

        softmax_out = K.softmax(scaled_matmul)  # SoftMax
        # Dropout
        out = K.dropout(softmax_out, self._dropout_rate)

        outputs = K.batch_dot(out, values)

        return outputs
Exemple #16
0
def MultiHeadsAttModel(l=8 * 8, d=512, dv=64, dout=512, nv=8):

    v1 = Input(shape=(l, d))
    q1 = Input(shape=(l, d))
    k1 = Input(shape=(l, d))

    v2 = Dense(dv * nv, activation="relu")(v1)
    q2 = Dense(dv * nv, activation="relu")(q1)
    k2 = Dense(dv * nv, activation="relu")(k1)

    v = Reshape([l, nv, dv])(v2)
    q = Reshape([l, nv, dv])(q2)
    k = Reshape([l, nv, dv])(k2)
    att = tf.einsum('baik,baij->bakj', q, k) / np.sqrt(dv)
    #att = Lambda(lambda x: K.batch_dot(x[0],x[1] ,axes=[-1,-1]) / np.sqrt(dv),output_shape=(l, nv, nv))([q,k])# l, nv, nv
    #att = tf.einsum('', q, k)
    att = Lambda(lambda x: K.softmax(x), output_shape=(l, nv, nv))(att)
    out = tf.einsum('bajk,baik->baji', att, v)
    #out = Lambda(lambda x: K.batch_dot(x[0], x[1],axes=[2,2]),  output_shape=(l, nv, dv))([att, v])
    out = Reshape([l, d])(out)

    out = Add()([out, q1])

    out = Dense(dout, activation="relu")(out)

    return Model(inputs=[q1, k1, v1], outputs=out)
Exemple #17
0
    def call(self, x):
        Q_seq, K_seq, V_seq = x
        Q_len, V_len = None, None
        print("build attention")

        Q_seq = K.dot(Q_seq, self.WQ)
        Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
        Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))

        K_seq = K.dot(K_seq, self.WK)
        K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
        K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))

        V_seq = K.dot(V_seq, self.WV)
        V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
        V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))

        A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.size_per_head ** 0.5
        A = K.permute_dimensions(A, (0, 3, 2, 1))
        A = self.Mask(A, V_len, "add")
        A = K.permute_dimensions(A, (0, 3, 2, 1))
        A = K.softmax(A)

        O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
        O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
        O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
        O_seq = self.Mask(O_seq, Q_len, "mul")

        return O_seq
Exemple #18
0
def masked_softmax(vector, mask):
    """
    `K.softmax(vector)` does not work if some elements of `vector` should be masked.  This performs
    a softmax on just the non-masked portions of `vector` (passing None in for the mask is also
    acceptable; you'll just get a regular softmax).

    We assume that both `vector` and `mask` (if given) have shape (batch_size, vector_dim).

    In the case that the input vector is completely masked, this function returns an array
    of ``0.0``. This behavior may cause ``NaN`` if this is used as the last layer of a model
    that uses categorial cross-entropy loss.
    """
    # We calculate masked softmax in a numerically stable fashion, as done
    # in https://github.com/rkadlec/asreader/blob/master/asreader/custombricks/softmax_mask_bricks.py
    if mask is not None:
        # Here we get normalized log probabilities for
        # enhanced numerical stability.
        mask = K.cast(mask, "float32")
        input_masked = mask * vector
        shifted = mask * (input_masked -
                          K.max(input_masked, axis=1, keepdims=True))
        # We add epsilon to avoid numerical instability when
        # the sum in the log yields 0.
        normalization_constant = K.log(
            K.sum(mask * K.exp(shifted), axis=1, keepdims=True) + K.epsilon())
        normalized_log_probabilities = mask * (shifted -
                                               normalization_constant)
        unmasked_probabilities = K.exp(normalized_log_probabilities)
        return switch(mask, unmasked_probabilities,
                      K.zeros_like(unmasked_probabilities))
    else:
        # There is no mask, so we use the provided ``K.softmax`` function.
        return K.softmax(vector)
def yolo2_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    #box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Exemple #20
0
    def call(self, x, mask=None):
        q, k, v = x
        d_k = q.shape.as_list()[2]

        # in pure tensorflow:
        # weights = tf.matmul(x_batch, tf.transpose(y_batch, perm=[0, 2, 1]))
        # normalized_weights = tf.nn.softmax(weights/scaling)
        # output = tf.matmul(normalized_weights, x_batch)

        weights = K.batch_dot(q, k, axes=[2, 2])

        if mask is not None:
            # add mask weights
            if isinstance(mask, (list, tuple)):
                if len(mask) > 0:
                    raise ValueError(
                        "mask can only be a Tensor or a list of length 1 containing a tensor."
                    )

                mask = mask[0]

            weights += -1e10 * (1 - mask)

        normalized_weights = K.softmax(weights / np.sqrt(d_k))
        output = K.batch_dot(normalized_weights, v)

        if self._return_attention:
            return [output, normalized_weights]
        else:
            return output
Exemple #21
0
    def call(self, inputs, **kwargs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.
        This change can improve the feature representation of the capsule.
        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to get standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        print(self.routings)
        for i in range(self.routings):
            c = K.softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)
        return o
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(
                b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs
def accuracy_mod(y_true, y_pred):
  # Squeeze the shape to (None, ) from (None, 1) as we want to apply operations directly on y_true
  if K.ndim(y_true) == K.ndim(y_pred):
        y_true = K.squeeze(y_true, -1)

  # Normalize the y_pred values first and then take the arg at which we have a maximum value (This is the predicted label)
  y_pred = K.softmax(y_pred, axis = -1)
  y_pred = K.argmax(y_pred, axis = -1)

  # Since the ground labels can also have -1s for which we don't wanna calculate accuracy, we are filtering them off
  defa = K.constant([0], dtype=tf.float32)
  #Creating a boolean tensor for labels greater or equal to 0
  is_valid = K.greater_equal(y_true, defa)
  #Get the corresponding indices
  indices = tf.where(is_valid)

  #Gather the results of y_true and y_pred at the indices we calculated above
  fil_y_true = K.gather(y_true, K.reshape(indices, [-1])) 
  fil_y_pred = K.gather(y_pred, K.reshape(indices, [-1]))
  # K.print_tensor(res, message='res = ')
  # K.print_tensor(comp, message='comp = ')

  fil_y_true = K.cast(fil_y_true, K.floatx())
  fil_y_pred = K.cast(fil_y_pred, K.floatx())

  #pdb.set_trace()
  return K.cast(K.equal(fil_y_true, fil_y_pred), K.floatx())
Exemple #24
0
    def call(self, inputs, **kwargs):
        assert isinstance(inputs, list) and len(inputs) == 3
        first, second, features = inputs[0], inputs[1], inputs[2]
        if not self.from_logits:
            first = K.clip(first, 1e-10, 1.0)
            second = K.clip(second, 1e-10, 1.0)
            first_, second_ = K.log(first), K.log(second)
        else:
            first_, second_ = first, second
        # embedded_features.shape = (M, T, 1)
        if self.use_intermediate_layer:
            features = K.dot(features, self.first_kernel)
            features = K.bias_add(features,
                                  self.first_bias,
                                  data_format="channels_last")
            features = self.intermediate_activation(features)
        embedded_features = K.dot(features, self.features_kernel)
        embedded_features = K.bias_add(embedded_features,
                                       self.features_bias,
                                       data_format="channels_last")
        if self.use_dimension_bias:
            tiling_shape = [1] * (K.ndim(first) - 1) + [K.shape(first)[-1]]
            embedded_features = K.tile(embedded_features, tiling_shape)
            embedded_features = K.bias_add(embedded_features,
                                           self.dimensions_bias,
                                           data_format="channels_last")
        sigma = K.sigmoid(embedded_features)

        result = weighted_sum(first_, second_, sigma, self.first_threshold,
                              self.second_threshold)
        probs = K.softmax(result)
        if self.return_logits:
            return [probs, result]
        return probs
def attention(x_inner, x_outer, n_factor, dropout):
    x_Q = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_inner)
    x_K = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_outer)
    x_V = L.Conv1D(
        n_factor,
        1,
        activation='linear',
        kernel_initializer='glorot_uniform',
        bias_initializer='glorot_uniform',
    )(x_outer)
    x_KT = L.Permute((2, 1))(x_K)
    res = L.Lambda(lambda c: K.batch_dot(c[0], c[1]) / np.sqrt(n_factor))(
        [x_Q, x_KT])
    #     res = tf.expand_dims(res, axis = 3)
    #     res = L.Conv2D(16, 3, 1, padding = "same", activation = "relu")(res)
    #     res = L.Conv2D(1, 3, 1, padding = "same", activation = "relu")(res)
    #     res = tf.squeeze(res, axis = 3)
    att = L.Lambda(lambda c: K.softmax(c, axis=-1))(res)
    att = L.Lambda(lambda c: K.batch_dot(c[0], c[1]))([att, x_V])
    return att
Exemple #26
0
 def call(self, x):
     # 如果只传入Q_seq,K_seq,V_seq,那么就不做Mask
     # 如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask
     if len(x) == 3:
         Q_seq, K_seq, V_seq = x
         Q_len, V_len = None, None
     elif len(x) == 5:
         Q_seq, K_seq, V_seq, Q_len, V_len = x
     # 对Q、K、V做线性变换
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.head_dim))
     Q_seq = K.permute_dimensions(Q_seq, (0, 2, 1, 3))
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.head_dim))
     K_seq = K.permute_dimensions(K_seq, (0, 2, 1, 3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.head_dim))
     V_seq = K.permute_dimensions(V_seq, (0, 2, 1, 3))
     # 计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3, 3]) / self.head_dim ** 0.5
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0, 3, 2, 1))
     A = K.softmax(A)
     # 输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3, 2])
     O_seq = K.permute_dimensions(O_seq, (0, 2, 1, 3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     return O_seq
        def energy_step(decode_outs, states):  # decode_outs(batch,dim)
            decode_outs = _p(decode_outs,
                             "energy_step:decode_outs 算能量函数了.........."
                             )  #decode_outs:[1,20]
            # decoder_seq [N,30,512] 30是字符串长度
            en_seq_len, en_hidden = encoder_out_seq.shape[
                1], encoder_out_seq.shape[2]  # 30, 512
            de_hidden = decode_outs.shape[-1]
            #  W * h_j
            reshaped_enc_outputs = K.reshape(
                encoder_out_seq, (-1, en_hidden))  #[b,64,512]=> [b*64,512]
            _p(reshaped_enc_outputs, "reshaped_enc_outputs")

            # W_a[512x512],reshaped_enc_outputs[b*64,512] => [b*64,512] => [b,64,512]
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a),
                                  (-1, en_seq_len, en_hidden))
            # U * S_t - 1,decode_outs[b,512],U_a[512,512] => [b,512]    => [b,1,512]
            U_a_dot_h = K.expand_dims(K.dot(decode_outs, self.U_a),
                                      axis=1)  # <= batch_size, 1, latent_dim

            # 这个细节很变态,其实就是完成了decoder的输出复制time(64)个,和encoder的输出【64,512】,相加的过程

            # tanh ( W * h_j + U * S_t-1 + b ),[b,64,512] = [b*64,512]
            reshaped_Ws_plus_Uh = K.tanh(
                K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            # V * tanh ( W * h_j + U * S_t-1 + b ), [b*64,512]*[512,1] => [b*64,1] => [b,64]
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a),
                            (-1, en_seq_len))
            # softmax(e_tj)
            e_i = K.softmax(e_i)
            e_i = _p(e_i, "energy_step:e_i")
            return e_i, [e_i]
Exemple #28
0
 def call(self, x):
     
     if len(x) == 3:#解析传入的入Q_seq,K_seq,V_seq
         Q_seq,K_seq,V_seq = x
         Q_len,V_len = None,None
     elif len(x) == 5:#Q_len,V_len为mask的长度
         Q_seq,K_seq,V_seq,Q_len,V_len = x
         
     print("Q_seq------------------",Q_seq)
     #对Q、K、V做线性变换,一共做nb_head次,每次线性变化成size_per_head维度
     Q_seq = K.dot(Q_seq, self.WQ)#查询
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))#相当于transpose,排列各维度的顺序 shape=(4,)
     K_seq = K.dot(K_seq, self.WK)#键
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0,2,1,3))#shape=(4,)
     V_seq = K.dot(V_seq, self.WV)#值
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0,2,1,3))
     #计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3,3]) / self.size_per_head**0.5#attention_11/Shape_12:0", shape=(5,)
     ########上句报错
     ########ValueError: Dimension must be 5 but is 4 for 'attention_11/transpose_7' 
     #####在TF1中,A形状为shape=(4,),到了TF2中,A形状变成了(5,)    
     A = K.permute_dimensions(A, (0,3,2,1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0,3,2,1))    
     A = K.softmax(A)
     #输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3,2])
     O_seq = K.permute_dimensions(O_seq, (0,2,1,3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     
     return O_seq
    def call(self, x):
        # soft-assignment.
        s = K.conv2d(x, self.kernel, padding='same') + self.bias
        print('s.shape=', s.shape)
        a = K.softmax(s)
        self.amap = K.argmax(a, -1)
        # print 'amap.shape', self.amap.shape

        # Dims used hereafter: batch, H, W, desc_coeff, cluster
        a = K.expand_dims(a, -2)
        # print 'a.shape=',a.shape

        # Core
        v = K.expand_dims(x, -1) + self.C
        # print 'v.shape', v.shape
        v = a * v
        # print 'v.shape', v.shape
        v = K.sum(v, axis=[1, 2])
        # print 'v.shape', v.shape
        v = K.permute_dimensions(v, pattern=[0, 2, 1])
        # print 'v.shape', v.shape
        #v.shape = None x K x D

        # Normalize v (Intra Normalization)
        v = K.l2_normalize(v, axis=-1)
        v = K.batch_flatten(v)
        v = K.l2_normalize(v, axis=-1)

        # return [v, self.amap]
        return v
Exemple #30
0
    def rpn_loss_regr_fixed_num(y_true, y_pred):
        
        shape = K.shape(y_true)
        
        true_reshaped = K.reshape(y_true, (C.BATCH_SIZE, 7, 7, 5, 25))
        pred_reshaped = K.reshape(y_pred, (C.BATCH_SIZE, 7, 7, 5, 25))
        
        mask = true_reshaped[:,:,:,:,4]
            
#         class_mask = K.reshape(K.repeat_elements(mask,20,3), (C.BATCH_SIZE,7,7,5,20))
#         coord_mask = K.reshape(K.repeat_elements(mask,4,3), (C.BATCH_SIZE,7,7,5,4))
#         object_mask = mask
#         no_object_mask = 1 - mask
        
        class_loss = 10 * (1 - K.categorical_crossentropy(true_reshaped[:,:,:,:,5:],K.softmax(pred_reshaped[:,:,:,:,5:])))
        
        object_square = K.square(1 - K.sigmoid(pred_reshaped[:,:,:,:,4]))
        object_loss = object_lambda * K.sum(object_square)
        
        no_object_square = K.square(0 - K.sigmoid(pred_reshaped[:,:,:,:,4]))
        no_object_loss = object_lambda * K.sum(no_object_square)
        
        coord_square = K.square(true_reshaped[:,:,:,:,:4] - pred_reshaped[:,:,:,:,:4])
        coord_loss = coord_lambda * K.sum(coord_square)
       
        return (class_loss + object_loss + no_object_loss + coord_loss)