Beispiel #1
0
def ca_loss(y_true, y_pred, background_class_index):
    if tf.size(tf.where(tf.equal(y_true, -1))) == 0:
        return 0.  # no unkown classes
    if background_class_index is not None:
        y_true = tf.concat([
            y_true[:, :, :, :background_class_index],
            tf.fill(
                tf.shape(
                    y_true[:, :, :,
                           background_class_index:background_class_index + 1]),
                -1.), y_true[:, :, :, background_class_index + 1:]
        ],
                           axis=-1)
    # find maximum value along last axis -> we get ones where a true mask exists.
    # if no true mask exists the only values are 0 and -1
    # after that take the maximum to convert all -1's to 0's -> we get a map for every sample where a true mask exists
    available_true_values = tf.math.maximum(tf.reduce_max(y_true, axis=[3]), 0)
    # repeat the mask availability values three times to regain right shape (to be compatible for calculations with y_pred)
    available_true_values = tf.repeat(tf.reshape(
        available_true_values, tf.concat([tf.shape(y_true)[:3], [1]], axis=0)),
                                      tf.size(y_true[0, 0, 0, :]),
                                      axis=3)
    # stack the true calculated available true values and y_true flattened on top of each other
    stacked = tf.stack([K.flatten(available_true_values), K.flatten(y_true)])
    # y_true possible values : [-1, 0, 1] ; available_true_values possible values : [0, 1]
    # if we take the product along axis 0 only the combination [1,-1] will yield '-1'
    # so we know that if the product is -1 we have the searched indices:
    #           - The true value is given (available_true_values = 1) and
    #           - The value does not have a ground truth label (y_true = -1)
    indices = tf.where(tf.math.equal(tf.math.reduce_prod(stacked, axis=0), -1))
    # collect all values of the candidate indices in y_pred
    candidates_y_pred = tf.gather(K.flatten(y_pred), indices)
    return tf.math.divide_no_nan(
        tf.reduce_sum(candidates_y_pred),
        tf.cast(tf.size(candidates_y_pred), dtype=tf.float32))
Beispiel #2
0
def dice_coef(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) +
                                           smooth)
Beispiel #3
0
def dsc(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return score
Beispiel #4
0
        def training_phase():
            mean_batch = K.mean(mean_instance, axis=0, keepdims=True)
            variance_batch = K.mean(temp, axis=0,
                                    keepdims=True) - K.square(mean_batch)

            mean_batch_reshaped = K.flatten(mean_batch)
            variance_batch_reshaped = K.flatten(variance_batch)

            if K.backend() != 'cntk':
                sample_size = K.prod(
                    [K.shape(inputs)[axis] for axis in reduction_axes])
                sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

                # sample variance - unbiased estimator of population variance
                variance_batch_reshaped *= sample_size / (sample_size -
                                                          (1.0 + self.epsilon))

            self.add_update([
                K.moving_average_update(self.moving_mean, mean_batch_reshaped,
                                        self.momentum),
                K.moving_average_update(self.moving_variance,
                                        variance_batch_reshaped, self.momentum)
            ], inputs)

            return normalize_func(mean_batch, variance_batch)
Beispiel #5
0
 def vae_loss(x, x_mean):
     x = flatten(x)
     x_mean = flatten(x_mean)
     xent_loss = input_shape[0] * binary_crossentropy(x, x_mean)
     kl_loss = -0.5 * mean(
         1 + z_log_var - square(z_mean) - exp(z_log_var), axis=-1)
     return xent_loss + kl_loss
Beispiel #6
0
def tversky(y_true, y_pred, smooth=K.epsilon()):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1-y_pred_pos))
    false_pos = K.sum((1-y_true_pos)*y_pred_pos)
    alpha = 0.7
    return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)
def jaccard_index(y_true, y_pred):
    smooth = 1.
    y_true_f = backend.flatten(y_true)
    y_pred_f = backend.flatten(y_pred)
    intersection = backend.sum(y_true_f * y_pred_f)
    jac = (intersection + smooth) / (
        backend.sum(y_true_f) + backend.sum(y_pred_f) - intersection + smooth)
    return jac
def dice_coeff(y_true, y_pred):
    smooth = 1.
    y_true_f = backend.flatten(y_true)
    y_pred_f = backend.flatten(y_pred)
    intersection = backend.sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) +
                                            tf.reduce_sum(y_pred_f) + smooth)
    return score
Beispiel #9
0
 def vae_loss(self, x, z_decoded):
     from tensorflow.python.keras import backend as K 
     x = K.flatten(x)
     z_decoded = K.flatten(z_decoded)
     # Reconstruction loss
     xent_loss = metrics.binary_crossentropy(x, z_decoded)
     # KL divergence
     kl_loss = -5e-4 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
     return K.mean(xent_loss + kl_loss)
Beispiel #10
0
def dice_loss(y_true, y_pred, smooth=1):
    """Computes the dice loss
    # Arguments:
        y_true: A tensor of the same shape as `y_pred`.
        y_pred: A tensor resulting from a softmax
    """
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return - ((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))
Beispiel #11
0
 def dice_coef(self, y_true, y_pred):
     y_true = K.flatten(y_true)
     y_pred = K.flatten(y_pred)
     intersection = K.sum(y_true * y_pred)
     denominator = K.sum(y_true) + K.sum(y_pred)
     if denominator == 0:
         return 1
     if intersection == 0:
         return 1 / (denominator + 1)
     return (2.0 * intersection) / denominator
Beispiel #12
0
def dice_coef(y_true, y_pred):
    # calculating the DICE coefficient with a smoothing term
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)

    smooth = 1

    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) +
                                           smooth)
Beispiel #13
0
def dice_loss(y_true, y_pred):
    """
    """
    smooth = 1.
    y_true_f = k.flatten(y_true)
    y_pred_f = k.flatten(y_pred)
    intersection = k.sum(y_true_f * y_pred_f)
    X_all = k.sum(y_true_f * y_true_f)
    Y_all = k.sum(y_pred_f * y_pred_f)
    return 1 - (2. * intersection + smooth) / (X_all + Y_all + smooth)
Beispiel #14
0
        def vae_loss(y_true, y_pred):
            y_true_flat = K.flatten(y_true)
            y_pred_flat = K.flatten(y_pred)

            #r_loss = 10 * K.mean(K.square(y_true_flat - y_pred_flat), axis = -1)
            #r_loss = K.binary_crossentropy(x, x_decoded_mean)
            r_loss = K.binary_crossentropy(y_pred, y_true)

            kl_loss = -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) -
                                    K.exp(vae_z_log_var),
                                    axis=-1)
            #kl_loss = - 0.5 * K.sum(1 + vae_z_log_var - K.square(vae_z_mean) - K.exp(vae_z_log_var), axis = -1)

            return K.mean(r_loss + kl_loss)
Beispiel #15
0
    def loss(y_true, y_pred):
        beta2   = beta*beta
        smooth  = 1e-6

        A = K.flatten(y_pred)
        B = K.flatten(y_true)

        sumAB     = K.sum(A*B)
        precision = sumAB / (K.sum(A) + smooth)
        recall    = sumAB / (K.sum(B) + smooth)

        fb = (1.0+beta2) * ((precision * recall) / (((beta2 * precision) + recall) + smooth))

        return 1.0 - fb
Beispiel #16
0
def tp_score(y_true, y_pred, threshold=0.1):

    tp_3d = K.concatenate([
        K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
        K.cast(
            K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))),
            'bool'),
        K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
    ],
                          axis=1)

    tp = K.sum(K.cast(K.all(tp_3d, axis=1), 'int32'))

    return tp
Beispiel #17
0
def dice_loss(y_true, y_pred, smooth=1):
    """Dice coefficient loss between an output tensor and a target tensor.

    Args:
        y_true: A tensor of the same shape as y_pred.
        y_pred: A tensor resulting from a softmax

    Returns:
        tensor: Output tensor.
    """
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return - ((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))
Beispiel #18
0
def dice_coefficient(y_true, y_pred):
    """
    A statistic used for comparing the similarity of two samples. Here binary segmentations.

    Args:
        y_true (numpy.array): the true segmentation
        y_pred (numpy.array): the predicted segmentation

    Returns:
        (float) returns a number from 0. to 1. measuring the similarity y_true and y_pred
    """
    y_true_f=K.flatten(y_true)
    y_pred_f=K.flatten(y_pred)
    intersection=K.sum(y_true_f*y_pred_f)
    smooth=1.0
    return (2*intersection+smooth)/(K.sum(y_true_f)+K.sum(y_pred_f)+smooth)
Beispiel #19
0
 def build(self, input_shape):
     if self.custom_params:
         self.kernel = self.custom_params["kernel"]
         self.recurrent_kernel = self.custom_params["recurrent_kernel"]
         if self.use_bias:
             self.bias = self.custom_params["bias"]
             if not self.reset_after:
                 self.input_bias, self.recurrent_bias = self.bias, None
             else:
                 self.input_bias = K.flatten(self.bias[0])
                 self.recurrent_bias = K.flatten(self.bias[1])
         else:
             self.bias = None
         self.built = True
     else:
         keras_layers.GRUCell.build(self, input_shape)
Beispiel #20
0
def dice_loss_single_channel(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    # get boolean mask for valid classes ( all points which are not labeled as -1)
    valid_classes_mask = tf.not_equal(y_true_f, -1)
    # mask both y_true and y_pred to only contain labeled values
    y_true_masked = tf.boolean_mask(y_true_f, valid_classes_mask)
    y_pred_masked = tf.boolean_mask(y_pred_f, valid_classes_mask)
    # calculate the dice loss only with the labeled values
    # normalization is included 'implicitly' since we masked out the unlabeled values.
    intersection = K.sum(y_true_masked * y_pred_masked)
    if intersection == 0:
        return -1.
    else:
        coef = (2. * intersection + smooth) / (K.sum(y_true_masked) +
                                               K.sum(y_pred_masked) + smooth)
        return 1 - coef
Beispiel #21
0
def fn_score(y_true, y_pred, threshold=0.1):

    fn_3d = K.concatenate([
        K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
        K.cast(
            K.expand_dims(
                K.flatten(
                    K.abs(
                        K.cast(K.greater(y_pred, K.constant(threshold)),
                               'float') - K.ones_like(y_pred)))), 'bool'),
        K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
    ],
                          axis=1)

    fn = K.sum(K.cast(K.all(fn_3d, axis=1), 'int32'))

    return fn
    def __init__(self, model, layer_name, index_feature1, index_feature2):
        self.model = model
        self.layer_name = layer_name
        self.index_feature1 = index_feature1
        self.index_feature2 = index_feature2
        dream = model.input
        # Get the symbolic outputs of each "key" layer (we gave them unique names).
        layers_all = [layer.name for layer in model.layers]
        if layer_name not in layers_all:
            raise ValueError('Layer ' + layer_name + ' not found in model.')

        # Define the loss.
        loss = K.variable(0.)
        for layer_local in model.layers:
            if layer_local.name == layer_name:
                x = layer_local.output

                # We avoid border artifacts by only involving non-border pixels in the loss.
                if K.image_data_format() == 'channels_first':
                    raise (NotImplementedError)
                    scaling = K.prod(K.cast(K.shape(x), 'float32'))
                    loss = loss + K.sum(K.square(x[:, :, 2:-2,
                                                   2:-2])) / scaling
                else:
                    x_index_feature1 = x[:, 2:-2, 2:-2, index_feature1]
                    x_index_feature2 = x[:, 2:-2, 2:-2, index_feature2]
                    x_index_feature1_flatten = K.flatten(x_index_feature1)
                    x_index_feature2_flatten = K.flatten(x_index_feature2)
                    sum_squared_12 = tf.reduce_mean(K.square(
                        tf.multiply(x_index_feature1_flatten,
                                    x_index_feature2_flatten)),
                                                    axis=0)

                    loss = loss + sum_squared_12

        # Compute the gradients of the dream wrt the loss.
        grads = K.gradients(loss, dream)[0]
        # Normalize gradients.
        grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon())

        # Set up function to retrieve the value
        # of the loss and gradients given an input image.
        outputs = [loss, grads]
        self.fetch_loss_and_grads = K.function([dream], outputs)
Beispiel #23
0
    def call(self, inputs, mask=None, training=None):
        input_x, input_mi, input_m, input_n, input_cnt = inputs
        input_x = tf.cast(input_x, dtype=tf.int32)
        input_mi = tf.cast(input_mi, dtype=tf.int32)
        input_cnt = K.flatten(input_cnt)

        # shape (5,35,17)
        em_p = self.p_layer(input_x)
        em_v = self.v_layer(input_x)
        em_x = get_em_x(input_x, st_size=self.style_size)  # x*17
        em_m = get_em_m(input_x, st_size=self.style_size)  # 0~16

        # pxvm
        out_p = get_pxvm(em_p,
                         em_p,
                         input_mi,
                         input_n,
                         input_cnt,
                         max_seq=self.seq_size)
        out_v = get_pxvm(em_p,
                         em_v,
                         input_mi,
                         input_n,
                         input_cnt,
                         max_seq=self.seq_size)
        out_x = get_pxvm(em_p,
                         em_x,
                         input_mi,
                         input_n,
                         input_cnt,
                         max_seq=self.seq_size)
        out_m = get_pxvm(em_p,
                         em_m,
                         input_mi,
                         input_n,
                         input_cnt,
                         max_seq=self.seq_size)

        out_v = tf.expand_dims(out_v, -1)
        out_x = tf.expand_dims(out_x, -1)
        out_m = tf.expand_dims(out_m, -1)
        out_xvm = tf.concat([out_x, out_v, out_m], -1)
        # m范围是0~16, st_size>16
        # shape (5,17,5,2)
        out_board = get_board(out_xvm,
                              input_mi,
                              input_cnt,
                              st_size=17,
                              board_len=self.board_size)

        # 保留vx, 不要m
        out_board = tf.slice(out_board, [0, 0, 0, 0], [-1, -1, -1, 2])

        out_board = tf.reshape(out_board, [-1, 17 * 5 * 2])
        return out_board
Beispiel #24
0
def sparse_accuracy_ignoring_last_label(y_true, y_pred):
    nb_classes = K.int_shape(y_pred)[-1]
    y_pred = K.reshape(y_pred, (-1, nb_classes))

    y_true = K.one_hot(tf.to_int32(K.flatten(y_true)),
                       nb_classes + 1)
    unpacked = tf.unstack(y_true, axis=-1)
    legal_labels = ~tf.cast(unpacked[-1], tf.bool)
    y_true = tf.stack(unpacked[:-1], axis=-1)

    return K.sum(tf.to_float(legal_labels & K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)))) / K.sum(tf.to_float(legal_labels))
Beispiel #25
0
def softmax_sparse_crossentropy_ignoring_last_label(y_true, y_pred):
    y_pred = K.reshape(y_pred, (-1, K.int_shape(y_pred)[-1]))
    log_softmax = tf.nn.log_softmax(y_pred)

    y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), K.int_shape(y_pred)[-1]+1)
    unpacked = tf.unstack(y_true, axis=-1)
    y_true = tf.stack(unpacked[:-1], axis=-1)

    cross_entropy = -K.sum(y_true * log_softmax, axis=1)
    cross_entropy_mean = K.mean(cross_entropy)

    return cross_entropy_mean
Beispiel #26
0
def dice_coef(y_true, y_pred):
    ''' Dice Coefficient
    Project: BraTs   Author: cv-lee   File: unet.py    License: MIT License
    Args:
        y_true (np.array): Ground Truth Heatmap (Label)
        y_pred (np.array): Prediction Heatmap
    Returns:
        (np.array): Calcula a porcentagem de acerto da rede neural
    '''

    class_num = 1

    for class_now in range(class_num):

        # Converte y_pred e y_true em vetores
        y_true_f = K.flatten(y_true[:, :, :, class_now])
        y_pred_f = K.flatten(y_pred[:, :, :, class_now])

        # Calcula o numero de vezes que
        # y_true(positve) é igual y_pred(positive) (tp)
        intersection = K.sum(y_true_f * y_pred_f)
        # Soma o número de vezes que ambos foram positivos
        union = K.sum(y_true_f) + K.sum(y_pred_f)
        # Smooth - Evita que o denominador fique muito pequeno
        smooth = K.constant(1e-6)
        # Calculo o erro entre eles
        num = (K.constant(2) * intersection + 1)
        den = (union + smooth)
        loss = num / den

        if class_now == 0:
            total_loss = loss
        else:
            total_loss = total_loss + loss

    total_loss = total_loss / class_num

    return total_loss
def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, time_major):
    """GRU with CuDNN implementation which is only available for GPU."""
    if not time_major:
        inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
    init_h = array_ops.expand_dims(init_h, axis=0)

    weights = array_ops.split(kernel, 3, axis=1)
    weights += array_ops.split(recurrent_kernel, 3, axis=1)
    # Note that the bias was initialized as shape (2, 3 * units), flat it into
    # (6 * units)
    bias = array_ops.split(K.flatten(bias), 6)
    # Note that the gate order for CuDNN is different from the canonical format.
    # canonical format is [z, r, h], whereas CuDNN is [r, z, h]. The swap need to
    # be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
    # z is update gate weights.
    # r is reset gate weights.
    # h is output gate weights.
    weights[0], weights[1] = weights[1], weights[0]
    weights[3], weights[4] = weights[4], weights[3]
    bias[0], bias[1] = bias[1], bias[0]
    bias[3], bias[4] = bias[4], bias[3]

    params = _canonical_to_params(weights=weights,
                                  biases=bias,
                                  shape=constant_op.constant([-1]),
                                  transpose_weights=True)

    outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn(inputs,
                                                   input_h=init_h,
                                                   input_c=0,
                                                   params=params,
                                                   is_training=True,
                                                   rnn_mode='gru')
    last_output = outputs[-1]
    if not time_major:
        outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
    h = h[0]
    return last_output, outputs, h, _runtime('cudnn')
Beispiel #28
0
    def call(self, y_true, y_pred):
        """Invokes the `Loss` instance.

        Args:
            y_true: Ground truth values.
            y_pred: The predicted values.

        Returns:
            Loss values in the form of a Tensor
        """
        gamma = self.gamma
        from_logits = self.from_logits
        axis = -1

        y_true = tf.cast(y_true, y_pred.dtype)
        y_true = ops.convert_to_tensor_v2(y_true)
        y_pred = ops.convert_to_tensor_v2(y_pred)

        probs = y_pred

        # Reformat y_pred shapes
        if (not from_logits and
                not isinstance(y_pred,
                               (ops.EagerTensor, variables_module.Variable))
                and y_pred.op.type == 'Softmax') and not hasattr(
                    y_pred, '_keras_history'):
            assert len(y_pred.op.inputs) == 1
            y_pred = y_pred.op.inputs[0]
            from_logits = True

        # Clip y_pred to a minimum and maximum value
        if not from_logits:
            epsilon_ = constant_op.constant(K.epsilon(),
                                            y_pred.dtype.base_dtype)
            y_pred = clip_ops.clip_by_value(y_pred, epsilon_, 1 - epsilon_)
            y_pred = math_ops.log(y_pred)

        # Get dimensions of predictions tensor
        if isinstance(y_pred.shape, (tuple, list)):
            output_rank = len(y_pred.shape)
        else:
            output_rank = y_pred.shape.ndims
        if output_rank is not None:
            axis %= output_rank
            if axis != output_rank - 1:
                permutation = list(
                    itertools.chain(range(axis), range(axis + 1, output_rank),
                                    [axis]))
                y_pred = array_ops.transpose(y_pred, perm=permutation)
        elif axis != -1:
            raise ValueError(
                'Cannot compute sparse categorical crossentropy with `axis={}` on an '
                'output tensor with unknown rank'.format(axis))

        # Reformat y_true shape and data type.
        y_true = cast(y_true, 'int64')

        output_shape = array_ops.shape_v2(y_pred)
        target_rank = y_true.shape.ndims

        update_shape = (target_rank is not None and output_rank is not None
                        and target_rank != output_rank - 1)
        if update_shape:
            y_true = flatten(y_true)
            y_pred = array_ops.reshape(y_pred, [-1, output_shape[-1]])

        # Calculate cross-entropy loss
        if py_any(_is_symbolic_tensor(v) for v in [y_true, y_pred]):
            with get_graph().as_default():
                loss = nn.sparse_softmax_cross_entropy_with_logits_v2(
                    labels=y_true, logits=y_pred)
        else:
            loss = nn.sparse_softmax_cross_entropy_with_logits_v2(
                labels=y_true, logits=y_pred)

        if update_shape and output_rank >= 3:
            loss = array_ops.reshape(loss, output_shape[:-1])

        # Calculate focal modulation to be applied
        gamma = tf.convert_to_tensor(gamma, dtype=tf.dtypes.float32)
        scalar_gamma = gamma.shape.rank == 0

        y_true_rank = y_true.shape.rank
        if not scalar_gamma:
            gamma = tf.gather(gamma, y_true, axis=0, batch_dims=y_true_rank)

        focal_modulation = K.pow(1 - tf.math.reduce_mean(probs, axis=1), gamma)
        focal_modulation = tf.gather(focal_modulation,
                                     y_true,
                                     axis=0,
                                     batch_dims=y_true_rank)

        loss = focal_modulation * loss

        return loss
Beispiel #29
0
def dice_coef(y_true, y_pred):
    """ref: https://arxiv.org/pdf/1606.04797v1.pdf"""
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)
Beispiel #30
0
def dice_coefficient(y_true, y_pred, smooth=1.):
    y_true_f = flatten(y_true)
    y_pred_f = flatten(y_pred)
    intersection = sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (sum(y_true_f) + sum(y_pred_f) +
                                           smooth)
Beispiel #31
0
def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major,
              go_backwards):
  """GRU with CuDNN implementation which is only available for GPU."""
  if not time_major:
    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
  init_h = array_ops.expand_dims(init_h, axis=0)

  weights = array_ops.split(kernel, 3, axis=1)
  weights += array_ops.split(recurrent_kernel, 3, axis=1)
  # Note that the bias was initialized as shape (2, 3 * units), flat it into
  # (6 * units)
  bias = array_ops.split(K.flatten(bias), 6)
  # Note that the gate order for CuDNN is different from the canonical format.
  # canonical format is [z, r, h], whereas CuDNN is [r, z, h]. The swap need to
  # be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
  # z is update gate weights.
  # r is reset gate weights.
  # h is output gate weights.
  weights[0], weights[1] = weights[1], weights[0]
  weights[3], weights[4] = weights[4], weights[3]
  bias[0], bias[1] = bias[1], bias[0]
  bias[3], bias[4] = bias[4], bias[3]

  params = _canonical_to_params(
      weights=weights,
      biases=bias,
      shape=constant_op.constant([-1]),
      transpose_weights=True)

  if mask is not None:
    sequence_length = calculate_sequence_by_mask(mask, time_major)
  else:
    # Fill the array with shape [batch] with value of max timesteps.
    sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
                                     array_ops.shape(inputs)[0])
  if go_backwards:
    inputs = array_ops.reverse_sequence_v2(inputs, sequence_length, seq_axis=0,
                                           batch_axis=1)

  outputs, h, _, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
      inputs,
      input_h=init_h,
      input_c=0,
      params=params,
      is_training=True,
      rnn_mode='gru',
      sequence_lengths=sequence_length)
  last_output = outputs[-1]
  if not time_major:
    outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
  h = h[0]

  # In the case of variable length input, the cudnn kernel will fill zeros for
  # the output, whereas the default keras behavior is to bring over the previous
  # output for t-1, so that in the return_sequence=False case, user can quickly
  # get the final effect output instead just 0s at the last timestep.
  # In order to mimic the default keras behavior, we copy the final h state as
  # the last_output, since it is numerically same as the output.
  if mask is not None:
    last_output = h

  return last_output, outputs, h, _runtime('cudnn')