def sub_mean(x):
    x = x / 255
    x = x - backend.mean(x)
    return x
def contrastive_loss(label, ED):
    margin = 1
    # note: the images are scaled between 0 and 1
    return K.mean((1 - label) * 0.5 * K.square(ED) +
                  label * 0.5 * K.square(K.maximum(margin - ED, 0)))
def quantile_loss(q, y_true, y_pred):
    err = (y_true - y_pred)
    return K.mean(K.maximum(q*err, (q-1)*err), axis=-1)
Exemple #4
0
 def l2_loss(y_true: NDArray, y_pred: NDArray):
     error = y_true - y_pred
     sqr_error = K.square(error)
     sum_sqr_error = K.sum(sqr_error, axis=(1, 2, 3))
     l2_loss = K.mean(sum_sqr_error, axis=0)
     return l2_loss * weight
Exemple #5
0
def sparse_crossentropy_masked(y_true, y_pred, pad_idx = 0):
    y_true_masked = tf.boolean_mask(y_true, tf.not_equal(y_true, pad_idx))
    y_pred_masked = tf.boolean_mask(y_pred, tf.not_equal(y_true, pad_idx))
    return K.mean(K.sparse_categorical_crossentropy(y_true_masked, y_pred_masked))
 def call(self, y_true, y_pred, tau=0.1):
     error = y_true - y_pred
     return kb.mean(kb.maximum(tau * error, (tau - 1) * error), axis=-1)
Exemple #7
0
def logloss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, P_MIN, P_MAX)
    return -backend.mean(y_true * backend.log(y_pred) +
                         (1 - y_true) * backend.log(1 - y_pred))
    def call(self, inputs):
        """
        Creates the layer as a Keras graph.

        Note that the inputs are tensors with a batch dimension of 1:
        Keras requires this batch dimension, and for full-batch methods
        we only have a single "batch".

        There are three inputs required, the node features, the output
        indices (the nodes that are to be selected in the final layer)
        and the graph adjacency matrix

        Notes:
            This does not add self loops to the adjacency matrix.
            The output indices are only used when ``final_layer=True``

        Args:
            inputs (list): list of inputs with 3 items:
            node features (size 1 x N x F),
            output indices (size 1 x M),
            graph adjacency matrix (size N x N),
            where N is the number of nodes in the graph,
                  F is the dimensionality of node features
                  M is the number of output nodes
        """
        X = inputs[0]  # Node features (1 x N x F)
        out_indices = inputs[1]  # output indices (1 x K)
        A = inputs[2]  # Adjacency matrix (N x N)
        N = K.int_shape(A)[-1]

        batch_dim, n_nodes, _ = K.int_shape(X)
        if batch_dim != 1:
            raise ValueError(
                "Currently full-batch methods only support a batch dimension of one"
            )

        else:
            # Remove singleton batch dimension
            X = K.squeeze(X, 0)
            out_indices = K.squeeze(out_indices, 0)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearity
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            # YT: this only works for 'binary' A, not for 'weighted' A!
            # YT: if A does not have self-loops, the node itself will be masked, so A should have self-loops
            # YT: this is ensured by setting the diagonal elements of A tensor to 1 above
            if not self.saliency_map_support:
                mask = -10e9 * (1.0 - A)
                dense += mask
                dense = K.softmax(dense)  # (N x N), Eq. 3 of the paper

            else:
                # dense = dense - tf.reduce_max(dense)
                # GAT with support for saliency calculations
                W = (self.delta * A
                     ) * K.exp(dense - K.max(dense, axis=1, keepdims=True)) * (
                         1 - self.non_exist_edge) + self.non_exist_edge * (
                             A + self.delta * (tf.ones((N, N)) - A) + tf.eye(N)
                         ) * K.exp(dense - K.max(dense, axis=1, keepdims=True))
                dense = W / K.sum(W, axis=1, keepdims=True)

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(dense)  # (N x N)

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        # Nonlinear activation function
        output = self.activation(output)

        # On the final layer we gather the nodes referenced by the indices
        if self.final_layer:
            output = K.gather(output, out_indices)

        # Add batch dimension back if we removed it
        if batch_dim == 1:
            output = K.expand_dims(output, 0)

        return output
Exemple #9
0
 def wasserstein(self, y_true, y_pred):
     return K.mean(y_true * y_pred,axis=-1)
Exemple #10
0
def mae_loss(y_true, y_pred):
    global percentage_MAE
    return percentage_MAE * K.mean(mean_absolute_error(y_true, y_pred))
def squared_area_between(y_true, y_pred):
    return K.mean(
        K.square(K.cumsum(y_true, axis=-1) - K.cumsum(y_pred, axis=-1)))
def area_between(y_true, y_pred):
    return K.mean(K.abs(K.cumsum(y_true, axis=-1) - K.cumsum(y_pred, axis=-1)))
Exemple #13
0
def rmse(a, b):
    return K.sqrt(K.mean(K.square(a - b)))
Exemple #14
0
 def weightedMSE(self, y_true, y_pred):
     y_true = K.cast(y_true, y_pred.dtype)
     loss = K.mean(K.square(y_true - y_pred) * K.maximum(y_pred, y_true),
                   axis=(-1))
     return loss
Exemple #15
0
def class_loss_cls(y_true, y_pred):
    return lambda_cls_class * K.mean(
        categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
Exemple #16
0
    def __call__(self,
                 loss,
                 seed_input,
                 penultimate_layer=-1,
                 seek_penultimate_conv_layer=True,
                 activation_modifier=lambda cam: K.relu(cam),
                 normalize_gradient=True,
                 expand_cam=True):
        """Generate a gradient based class activation map (CAM) by using positive gradient of
            penultimate_layer with respect to loss.

            For details on Grad-CAM, see the paper:
            [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via
            Gradient-based Localization](https://arxiv.org/pdf/1610.02391v1.pdf).

        # Arguments
            loss: A loss function. If the model has multiple outputs, you can use a different
                loss on each output by passing a list of losses.
            seed_input: An N-dim Numpy array. If the model has multiple inputs,
                you have to pass a list of N-dim Numpy arrays.
            penultimate_layer: A number of integer or a tf.keras.layers.Layer object.
            seek_penultimate_conv_layer: True to seek the penultimate layter that is a subtype of
                `keras.layers.convolutional.Conv` class.
                If False, the penultimate layer is that was elected by penultimate_layer index.
            normalize_gradient: True to normalize gradients.
            activation_modifier: A function to modify gradients.
            expand_cam: True to expand cam to same as input image size.
                ![Note] Even if the model has multiple inputs, this function return only one cam
                value (That's, when `expand_cam` is True, multiple cam images are generated from
                a model that has multiple inputs).
        # Returns
            The heatmap image or a list of their images that indicate the `seed_input` regions
                whose change would most contribute  the loss value,
        # Raises
            ValueError: In case of invalid arguments for `loss`, or `penultimate_layer`.
        """
        # Preparing
        losses = self._get_losses_for_multiple_outputs(loss)
        seed_inputs = self._get_seed_inputs_for_multiple_inputs(seed_input)
        penultimate_output_tensor = self._find_penultimate_output(
            penultimate_layer, seek_penultimate_conv_layer)
        # Processing gradcam
        model = tf.keras.Model(inputs=self.model.inputs,
                               outputs=self.model.outputs +
                               [penultimate_output_tensor])
        with tf.GradientTape() as tape:
            tape.watch(seed_inputs)
            outputs = model(seed_inputs)
            outputs, penultimate_output = outputs[:-1], outputs[-1]
            loss_values = [loss(y) for y, loss in zip(outputs, losses)]
        grads = tape.gradient(loss_values, penultimate_output)
        if normalize_gradient:
            grads = K.l2_normalize(grads)
        weights = K.mean(grads,
                         axis=tuple(range(grads.ndim)[1:-1]),
                         keepdims=True)
        cam = np.sum(penultimate_output * weights, axis=-1)
        if activation_modifier is not None:
            cam = activation_modifier(cam)

        if not expand_cam:
            return cam

        # Visualizing
        cam = self._zoom_for_visualizing(seed_inputs, cam)
        if len(self.model.inputs) == 1 and not isinstance(seed_input, list):
            cam = cam[0]
        return cam
Exemple #17
0
def root_mean_square(x, axis=None, keepdims=False):
    """均方根,相当于模长的变体
    """
    return K.sqrt(K.mean(K.square(x), axis=axis, keepdims=keepdims))
    def call(self, inputs, **kwargs):
        """
        Creates the layer as a Keras graph

        Notes:
            This does not add self loops to the adjacency matrix.
            The output indices are only used when `final_layer=True`

        Args:
            inputs (list): list of inputs with 4 items:
            node features (size b x N x F),
            output indices (size b x M),
            sparse graph adjacency matrix (size N x N),
            where N is the number of nodes in the graph,
                  F is the dimensionality of node features
                  M is the number of output nodes
        """
        X = inputs[0]  # Node features (1 x N x F)
        out_indices = inputs[1]  # output indices (1 x K)
        A_sparse = inputs[2]  # Adjacency matrix (1 x N x N)

        if not isinstance(A_sparse, tf.SparseTensor):
            raise TypeError("A is not sparse")

        # Get undirected graph edges (E x 2)
        A_indices = A_sparse.indices

        batch_dim, n_nodes, _ = K.int_shape(X)
        if batch_dim != 1:
            raise ValueError(
                "Currently full-batch methods only support a batch dimension of one"
            )
        else:
            # Remove singleton batch dimension
            out_indices = K.squeeze(out_indices, 0)
            X = K.squeeze(X, 0)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Create sparse attention vector (All non-zero values of the matrix)
            sparse_attn_self = tf.gather(K.reshape(attn_for_self, [-1]),
                                         A_indices[:, 0],
                                         axis=0)
            sparse_attn_neighs = tf.gather(K.reshape(attn_for_neighs, [-1]),
                                           A_indices[:, 1],
                                           axis=0)
            attn_values = sparse_attn_self + sparse_attn_neighs

            # Add nonlinearity
            attn_values = LeakyReLU(alpha=0.2)(attn_values)

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(
                attn_values)  # (N x N)

            # Convert to sparse matrix
            sparse_attn = tf.sparse.SparseTensor(
                A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes])

            # Apply softmax to get attention coefficients
            sparse_attn = tf.sparse.softmax(
                sparse_attn)  # (N x N), Eq. 3 of the paper

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = tf.sparse.sparse_dense_matmul(
                sparse_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        output = self.activation(output)

        # On the final layer we gather the nodes referenced by the indices
        if self.final_layer:
            output = K.gather(output, out_indices)

        # Add batch dimension back if we removed it
        if batch_dim == 1:
            output = K.expand_dims(output, 0)
        return output
def r2_score(y_true, y_pred):
    SS_res = K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - SS_res / (SS_tot + K.epsilon()))
Exemple #20
0
def acc(y_true, y_pred):
    return K.mean(K.all(K.equal(tf.cast(K.reshape(y_true, (-1, max_seq_len)), tf.int64), K.argmax(y_pred, axis=-1)), axis=-1))
def pTLossTF(y_true, y_pred):
    y_t = K.cast(y_true < 80, K.dtype(y_true)) * y_true + K.cast(
        y_true >= 80, K.dtype(y_true)) * K.cast(
            y_true < 250, K.dtype(y_true)) * y_true * 2.4 + K.cast(
                y_true >= 160, K.dtype(y_true)) * 10
    return K.mean(y_t * K.pow((y_pred - y_true) / y_true, 2)) / 250
Exemple #22
0
 def call(self, x):
     mean = K.mean(x=x, axis=-1, keepdims=True)
     std = K.std(x=x, axis=-1, keepdims=True)
     return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
def metr(y_true, y_pred):
    '''custom keras metric to monitor real data'''
    return K.mean(K.square(K.exp(y_pred) - K.exp(y_true)))
Exemple #24
0
    def policy_loss_with_metrics(self, Adv, A=None):
        """

        This method constructs the policy loss as a scalar-valued Tensor,
        together with a dictionary of metrics (also scalars).

        This method may be overridden to construct a custom policy loss and/or
        to change the accompanying metrics.

        Parameters
        ----------
        Adv : 1d Tensor, shape: [batch_size]

            A batch of advantages.

        A : nd Tensor, shape: [batch_size, ...]

            A batch of actions taken under the behavior policy. For some
            choices of policy loss, e.g. ``update_strategy='sac'`` this input
            is ignored.

        Returns
        -------
        loss, metrics : (Tensor, dict of Tensors)

            The policy loss along with some metrics, which is a dict of type
            ``{name <str>: metric <Tensor>}``. The loss and each of the metrics
            (dict values) are scalar Tensors, i.e. Tensors with ``ndim=0``.

            The ``loss`` is passed to a keras Model using
            ``train_model.add_loss(loss)``. Similarly, each metric in the
            metric dict is passed to the model using
            ``train_model.add_metric(metric, name=name, aggregation='mean')``.


        """
        if K.ndim(Adv) == 2:
            check_tensor(Adv, axis_size=1, axis=1)
            Adv = K.squeeze(Adv, axis=1)
        check_tensor(Adv, ndim=1)

        if self.update_strategy == 'vanilla':
            assert A is not None

            log_pi = self.dist.log_proba(A)
            check_tensor(log_pi, same_as=Adv)

            entropy = K.mean(self.dist.entropy())

            # flip sign to get loss from objective
            loss = -K.mean(Adv * log_pi) + self.entropy_beta * entropy

            # no metrics related to behavior_dist since its not used in loss
            metrics = {'policy/entropy': entropy}

        elif self.update_strategy == 'ppo':
            assert A is not None

            log_pi = self.dist.log_proba(A)
            log_pi_old = K.stop_gradient(self.target_dist.log_proba(A))
            check_tensor(log_pi, same_as=Adv)
            check_tensor(log_pi_old, same_as=Adv)

            eps = self.ppo_clip_eps
            ratio = K.exp(log_pi - log_pi_old)
            ratio_clip = K.clip(ratio, 1 - eps, 1 + eps)
            check_tensor(ratio, same_as=Adv)
            check_tensor(ratio_clip, same_as=Adv)

            clip_objective = K.mean(K.minimum(Adv * ratio, Adv * ratio_clip))
            entropy = K.mean(self.dist.entropy())
            kl_div = K.mean(self.target_dist.kl_divergence(self.dist))

            # flip sign to get loss from objective
            loss = -(clip_objective + self.entropy_beta * entropy)
            metrics = {'policy/entropy': entropy, 'policy/kl_div': kl_div}

        elif self.update_strategy == 'sac':
            self.logger.debug("using update_strategy 'sac'")
            loss = -K.mean(Adv)
            metrics = {'policy/entropy': K.mean(self.dist.entropy())}

        elif self.update_strategy == 'cross_entropy':
            raise NotImplementedError('cross_entropy')

        else:
            raise ValueError(
                "unknown update_strategy '{}'".format(self.update_strategy))

        # rename
        check_tensor(loss, ndim=0)
        loss = tf.identity(loss, name='policy/loss')

        return loss, metrics
Exemple #25
0
    def loss_gt_(y_true, y_pred):
        intersection = K.sum(K.abs(y_true * y_pred), axis=[-3, -2, -1])
        dn = K.sum(K.square(y_true) + K.square(y_pred), axis=[-3, -2, -1]) + e

        return -K.mean(2 * intersection / dn, axis=[0, 1])
Exemple #26
0
def l1_loss(y_true: NDArray, y_pred: NDArray):
    error = y_true - y_pred
    error = K.abs(error)
    sum_error = K.sum(error, axis=(1, 2, 3))
    l1_loss = K.mean(sum_error, axis=0)
    return l1_loss
Exemple #27
0
def dice_coefficient(y_true, y_pred):
    intersection = K.sum(K.abs(y_true * y_pred), axis=[-3, -2, -1])
    dn = K.sum(K.square(y_true) + K.square(y_pred), axis=[-3, -2, -1]) + 1e-8
    return K.mean(2 * intersection / dn, axis=[0, 1])
Exemple #28
0
 def wasserstein(self, y_true, y_pred):
     return -K.mean(y_true * y_pred)
Exemple #29
0
 def vae_reconstruction_loss(y_true, y_predict):
     reconstruction_loss_factor = 1000
     reconstruction_loss = K.mean(K.square(y_true - y_predict),
                                  axis=[1, 2, 3])
     return reconstruction_loss_factor * reconstruction_loss
 def rmse(y_true, y_pred):
   import tensorflow.keras.backend as K
   return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
 def critic_PPO2_loss(self, y_true, y_pred):
     value_loss = K.mean((y_true - y_pred) ** 2) # standard PPO loss
     return value_loss