Beispiel #1
0
    def call(self, inputs, **kwargs):
        inputs_shape = K.shape(inputs)

        mask = K.cast(K.squeeze(K.any(K.not_equal(inputs, 0.),
                                      axis=(-2, -1),
                                      keepdims=True),
                                axis=-1),
                      dtype=inputs.dtype)

        inputs_to_lstm = K.reshape(inputs,
                                   (-1, inputs.shape[-2], inputs.shape[-1]))

        inputs_embed = super(InferenceSpeakerEmbedding,
                             self).call(inputs_to_lstm)

        inputs_embed = K.reshape(
            inputs_embed,
            (inputs_shape[0], inputs_shape[1], inputs_embed.shape[-1]))

        inputs_embed = inputs_embed * mask

        n = K.sum(mask, axis=1)

        inputs_embed = K.sum(inputs_embed, axis=1) / n

        return inputs_embed
Beispiel #2
0
 def call(self, inputs):
   boolean_mask = K.any(
       math_ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
   outputs = inputs * math_ops.cast(boolean_mask, inputs.dtype)
   # Compute the mask and outputs simultaneously.
   outputs._keras_mask = array_ops.squeeze(boolean_mask, axis=-1)  # pylint: disable=protected-access
   return outputs
    def generate_val_set(self):
        """
        Generates the actual dataset. It uses all the functions defined above to read images from disk and create croppings.
        :return: K.data.Dataset
        """
        parse_path_func = lambda x, y: self.parse_path(x, y)
        process_label_func = lambda x, y: self.process_label(x, y)
        resize_func = lambda x, y: self.resize_and_norm(x, y)
        crops_func = lambda x, y: self.crop_img_and_serve(x, y)
        filter_func = lambda x, y: K.equal(K.any(y), False)

        batch_size = self.batch_size

        n_el = len(list(self.val_id_ep_dict.keys()))
        ids = []
        labels = []
        for k, v in self.val_id_ep_dict.items():
            ids.append(os.path.join(self.train_images_folder, k))
            labels.append(v)
        id_tensor = K.constant(ids, dtype=tf.string, shape=([n_el]))
        label_tensor = K.constant(labels, dtype=tf.string, shape=(n_el, 4))
        return (tf.data.Dataset.from_tensor_slices((id_tensor, label_tensor))
                .shuffle(buffer_size=n_el)
                .map(parse_path_func, num_parallel_calls=AUTOTUNE)
                .map(process_label_func, num_parallel_calls=AUTOTUNE)  # create actual one_crop
                .map(resize_func, num_parallel_calls=AUTOTUNE)  # create actual one_crop
                .map(crops_func, num_parallel_calls=AUTOTUNE)  # create crops of image to enlarge output
                .flat_map(
            lambda x, y: tf.data.Dataset.from_tensor_slices((x, y)))  # serve crops as new dataset to flat_map array
                .filter(filter_func)
                .batch(batch_size)  # defined batch_size
                .prefetch(AUTOTUNE)  # number of batches to be prefetch.
                .repeat()  # repeats the dataset when it is finished
                )
Beispiel #4
0
 def call(self, inputs):
   boolean_mask = K.any(
       math_ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
   outputs = inputs * math_ops.cast(boolean_mask, inputs.dtype)
   # Compute the mask and outputs simultaneously.
   outputs._keras_mask = array_ops.squeeze(boolean_mask, axis=-1)  # pylint: disable=protected-access
   return outputs
Beispiel #5
0
    def loss_function(target_subtoken, y_pred):
        # prediction is a probability, log probability for speed and smoothness

        print("Model objective: y_pred.shape: {}".format(y_pred.shape))
        # I_C = vector of a target subtoken exist in the input token - TODO probably not ok, debug using TF eager
        I_C = K.expand_dims(
            K.cast(K.any(K.equal(input_code_subtoken,
                                 K.cast(target_subtoken, 'int32')),
                         axis=-1),
                   dtype='float32'), -1)
        print("Model objective: I_C.shape: {}".format(I_C.shape))
        # I_C shape = [batch_size, token, max_char_len, 1]
        # TODO should I add a penality if there is no subtokens appearing in the model ? Yes
        probability_correct_copy = K.log(copy_probability) + K.log(
            K.sum(I_C * copy_weights) + mu)
        print("Model objective: probability_correct_copy.shape: {}".format(
            probability_correct_copy.shape))

        # penalise the model when cnn-attention predicts unknown
        # but the value can be predicted from the copy mechanism.
        mask_unknown = K.cast(K.equal(target_subtoken, unknown_id),
                              dtype='float32') * mu

        probability_target_token = K.sum(
            K.log(1 - copy_probability) + K.log(y_pred) + mask_unknown, -1,
            True)
        print("Model objective: probability_target_token.shape: {}".format(
            probability_target_token.shape))

        loss = K.logsumexp(
            [probability_correct_copy, probability_target_token])
        return K.mean(loss)
Beispiel #6
0
 def _fd_conditional(y_true, y_pred):
     # if there are no masks annotations, return 0; else, compute fdl loss
     return tf.cond(
         K.any(K.equal(K.shape(y_true), 0)),
         lambda: K.cast_to_floatx(0.0),
         lambda: _fd_batch(y_true, y_pred,
                           iou_threshold=self.fdl_iou_threshold,
                           parallel_iterations=self.parallel_iterations))
Beispiel #7
0
def diaggreater3(x):
    # x = K.print_tensor(x, 'input: ')
    x1 = diaggreater3_on_axis(x, 0, 1, 2)
    x2 = diaggreater3_on_axis(x, 1, 0, 2)
    x3 = diaggreater3_on_axis(x, 2, 0, 1)
    x = K.stack([x1, x2, x3])
    # x = tf.Print(x, [x], summarize=64, message='diagonals:   ')
    return K.any(x)
Beispiel #8
0
def greater3(x):
    # x = K.print_tensor(x, 'input: ')
    x1 = greater3_on_axis(x, 1)
    x2 = greater3_on_axis(x, 2)
    x3 = greater3_on_axis(x, 3)
    x = K.stack([x1, x2, x3])
    # x = K.print_tensor(x, 'output: ')
    return K.any(x)
Beispiel #9
0
    def call(self, inputs, **kwargs):
        pair1, pair2 = inputs

        pair1_shape, pair2_shape = K.shape(pair1), K.shape(pair2)

        pair1_mask = K.cast(K.squeeze(K.any(K.not_equal(pair1, 0.),
                                            axis=(-2, -1),
                                            keepdims=True),
                                      axis=-1),
                            dtype=pair1.dtype)
        pair2_mask = K.cast(K.squeeze(K.any(K.not_equal(pair2, 0.),
                                            axis=(-2, -1),
                                            keepdims=True),
                                      axis=-1),
                            dtype=pair2.dtype)

        pair1_to_lstm = K.reshape(pair1,
                                  (-1, pair1.shape[-2], pair1.shape[-1]))
        pair2_to_lstm = K.reshape(pair2,
                                  (-1, pair2.shape[-2], pair2.shape[-1]))

        batch = K.concatenate([pair1_to_lstm, pair2_to_lstm], axis=0)

        embedded = super(TestSpeakerEmbedding, self).call(batch)

        pair1_embed = embedded[:K.shape(pair1_to_lstm)[0]]
        pair2_embed = embedded[K.shape(pair1_to_lstm)[0]:]

        pair1_embed = K.reshape(pair1_embed,
                                (pair1_shape[0], pair1_shape[1], -1))
        pair2_embed = K.reshape(pair2_embed,
                                (pair2_shape[0], pair2_shape[1], -1))

        pair1_embed = pair1_embed * pair1_mask
        pair2_embed = pair2_embed * pair2_mask

        pair1_n = K.sum(pair1_mask, axis=1)
        pair2_n = K.sum(pair2_mask, axis=1)

        pair1_embed = K.sum(pair1_embed, axis=1) / pair1_n
        pair2_embed = K.sum(pair2_embed, axis=1) / pair2_n

        return pair1_embed, pair2_embed
Beispiel #10
0
def antidiaggreater3(x):
    x1 = x * tf.constant(ANTIDIAG23)
    x1 = K.sum(x1, axis=2)
    x1 = K.sum(x1, axis=2)
    x1 = K.greater(x1, 3)
    x1 = K.any(x1)

    x2 = x * tf.constant(ANTIDIAG13)
    x2 = K.sum(x2, axis=2)
    x2 = K.sum(x2, axis=2)
    x2 = K.greater(x2, 3)
    x2 = K.any(x2)

    x3 = x * tf.constant(ANTIDIAG12)
    x3 = K.sum(x3, axis=2)
    x3 = K.sum(x3, axis=2)
    x3 = K.greater(x3, 3)
    x3 = K.any(x3)

    x = K.stack([x1, x2, x3])
    return K.any(x)
Beispiel #11
0
def fulldiaggreater3(x):
    xs = []
    for filt in [FULLDIAG1, FULLDIAG2, FULLDIAG3, FULLDIAG4]:
        x1 = x * tf.constant(filt)
        x1 = K.sum(x1, axis=1)
        x1 = K.sum(x1, axis=1)
        x1 = K.sum(x1, axis=1)
        x1 = K.greater(x1, 3)
        xs.append(x1)

    x = K.stack(xs)
    return K.any(x)
Beispiel #12
0
def diaggreater3_on_axis(x, i, j, k):
    assert j < k
    # x = tf.Print(x, [x], summarize=64, message='initial x:          ')
    diag = tf.diag(np.array([1, 1, 1, 1], dtype=np.int32))
    # x = tf.Print(x, [diag], summarize=64, message='diagnonal:          ')
    diag = K.stack([diag, diag, diag, diag], axis=i)
    # x = tf.Print(x, [diag], summarize=64, message='diagnonal:          ')
    x = x * diag
    # x = tf.Print(x, [x], summarize=64, message='x * diag:           ')
    x = K.sum(x, axis=k + 1)
    # x = tf.Print(x, [x], summarize=64, message='x after first sum:  ')
    x = K.sum(x, axis=j + 1)
    # x = tf.Print(x, [x], summarize=64, message='x after second sum: ')
    x = K.greater(x, 3)
    x = K.any(x)
    return x
Beispiel #13
0
    def call(self, x, mask=None):
        '''mask是上一层的'''
        '''# using 'mask' you can access the mask passed from the previous layer'''
        # x [batch_size, seq_len, embedding_size]
        if self.supports_masking:
            # mask [batch_size, seq_len]
            if mask is None:
                # 先判断是否非零,然后执行OR运算,计算每个序列的有效长度
                mask = K.any(K.not_equal(x, 0), -1)  # [batch_size, seq_len]
                mask = K.cast(mask, K.floatx())
                return K.sum(x, axis=1) / K.sum(mask, axis=1, keepdims=True)

            if mask is not None:
                mask = K.cast(mask, K.floatx())
                # [batch_size, embedding_size, seq_len]
                mask = K.repeat(mask, x.shape[-1].value)
                # [batch_size, seq_len, embedding_size]
                mask = tf.transpose(mask, [0, 2, 1])
                x = x * mask
                return K.sum(x, axis=1) / K.sum(mask, axis=1)
Beispiel #14
0
 def compute_mask(self, inputs, mask=None):
     # cases need to call the layer.compute_mask when input_mask is None:
     # Masking layer and Embedding layer with mask_zero
     input_shape = K.int_shape(inputs)
     if input_shape[0]:
         # batch size matters, we currently do not handle mask explicitly
         return mask
     inner_mask = mask
     if inner_mask is not None:
         inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
         inner_mask = K.reshape(inner_mask, inner_mask_shape)
     inner_input_shape = self._get_shape_tuple((-1, ), inputs, 2)
     inner_inputs = array_ops.reshape(inputs, inner_input_shape)
     output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
     if output_mask is None:
         if mask is None:
             return None
         # input_mask is not None, and output_mask is None:
         # we should return a not-None mask
         output_mask = mask
         for _ in range(2, len(K.int_shape(mask))):
             output_mask = K.any(output_mask, axis=-1)
     else:
         # output_mask is not None. We need to reshape it
         input_length = input_shape[1]
         if not input_length:
             input_length = K.shape(inputs)[1]
         output_mask_int_shape = K.int_shape(output_mask)
         if output_mask_int_shape is None:
             # if the output_mask does not have a static shape,
             # its shape must be the same as mask's
             if mask is not None:
                 output_mask_int_shape = K.int_shape(mask)
             else:
                 output_mask_int_shape = K.compute_output_shape(
                     input_shape)[:-1]
         output_mask_shape = self._get_shape_tuple(
             (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
         output_mask = K.reshape(output_mask, output_mask_shape)
     return output_mask
Beispiel #15
0
    def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Arguments:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
          input to TimeDistributed. If static shape information is available for
          "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
          input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
        # cases need to call the layer.compute_mask when input_mask is None:
        # Masking layer and Embedding layer with mask_zero
        input_shape = K.int_shape(inputs)
        if input_shape[0]:
            # batch size matters, we currently do not handle mask explicitly
            return mask
        inner_mask = mask
        if inner_mask is not None:
            inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
            inner_mask = K.reshape(inner_mask, inner_mask_shape)
        input_uid = generic_utils.object_list_uid(inputs)
        inner_inputs = self._input_map.get(input_uid, inputs)
        output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
        if output_mask is None:
            if mask is None:
                return None
            # input_mask is not None, and output_mask is None:
            # we should return a not-None mask
            output_mask = mask
            for _ in range(2, len(K.int_shape(mask))):
                output_mask = K.any(output_mask, axis=-1)
        else:
            # output_mask is not None. We need to reshape it
            input_length = input_shape[1]
            if not input_length:
                input_length = K.shape(inputs)[1]
            output_mask_int_shape = K.int_shape(output_mask)
            if output_mask_int_shape is None:
                # if the output_mask does not have a static shape,
                # its shape must be the same as mask's
                if mask is not None:
                    output_mask_int_shape = K.int_shape(mask)
                else:
                    output_mask_int_shape = K.compute_output_shape(
                        input_shape)[:-1]
            output_mask_shape = self._get_shape_tuple(
                (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
            output_mask = K.reshape(output_mask, output_mask_shape)
        return output_mask
Beispiel #16
0
 def call(self, inputs):
   boolean_mask = K.any(
       math_ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
   return inputs * math_ops.cast(boolean_mask, inputs.dtype)
Beispiel #17
0
 def compute_mask(self, inputs, mask=None):
   return K.any(math_ops.not_equal(inputs, self.mask_value), axis=-1)
Beispiel #18
0
def greater3_on_axis(x, axis):
    x = K.sum(x, axis=axis)
    x = K.greater(x, 3)
    x = K.any(x)
    return x
Beispiel #19
0
 def compute_mask(self, inputs, mask=None):
   return K.any(math_ops.not_equal(inputs, self.mask_value), axis=-1)
Beispiel #20
0
 def call(self, inputs):
   boolean_mask = K.any(
       math_ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
   return inputs * math_ops.cast(boolean_mask, inputs.dtype)
Beispiel #21
0
    def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Args:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
        input to TimeDistributed. If static shape information is available for
        "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
        input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
        # cases need to call the layer.compute_mask when input_mask is None:
        # Masking layer and Embedding layer with mask_zero
        input_shape = nest.map_structure(
            lambda x: tensor_shape.TensorShape(backend.int_shape(x)), inputs)
        input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
        batch_size = tf_utils.convert_shapes(input_shape)
        batch_size = nest.flatten(batch_size)[0]
        is_ragged_input = nest.map_structure(
            lambda x: isinstance(x, ragged_tensor.RaggedTensor), inputs)
        is_ragged_input = generic_utils.to_list(nest.flatten(is_ragged_input))
        if batch_size and not self._always_use_reshape or any(is_ragged_input):
            # batch size matters, we currently do not handle mask explicitly, or if
            # the layer always uses reshape approach, or the input is a ragged tensor.
            return mask
        inner_mask = mask
        if inner_mask is not None:
            inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
            inner_mask = backend.reshape(inner_mask, inner_mask_shape)
        inner_input_shape = nest.map_structure(
            lambda tensor: self._get_shape_tuple((-1, ), tensor, 2), inputs)
        inner_inputs = nest.map_structure_up_to(inputs, array_ops.reshape,
                                                inputs, inner_input_shape)
        output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
        if output_mask is None:
            if mask is None:
                return None
            # input_mask is not None, and output_mask is None:
            # we should return a not-None mask
            output_mask = mask
            for _ in range(2, len(backend.int_shape(mask))):
                output_mask = backend.any(output_mask, axis=-1)
        else:
            # output_mask is not None. We need to reshape it
            input_length = tf_utils.convert_shapes(input_shape)
            input_length = nest.flatten(input_length)[1]
            if not input_length:
                input_length = nest.map_structure(
                    lambda x: backend.shape(x)[1], inputs)
                input_length = nest.flatten(input_length)[0]
            output_mask_int_shape = backend.int_shape(output_mask)
            if output_mask_int_shape is None:
                # if the output_mask does not have a static shape,
                # its shape must be the same as mask's
                if mask is not None:
                    output_mask_int_shape = backend.int_shape(mask)
                else:
                    input_shape = generic_utils.to_list(
                        nest.flatten(input_shape))[0]
                    output_mask_int_shape = backend.compute_output_shape(
                        input_shape)[:-1]
            output_mask_shape = self._get_shape_tuple(
                (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
            output_mask = backend.reshape(output_mask, output_mask_shape)
        return output_mask
Beispiel #22
0
  def compute_mask(self, inputs, mask=None):
    """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Arguments:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
        input to TimeDistributed. If static shape information is available for
        "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
        input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
    # cases need to call the layer.compute_mask when input_mask is None:
    # Masking layer and Embedding layer with mask_zero
    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, we currently do not handle mask explicitly
      return mask
    inner_mask = mask
    if inner_mask is not None:
      inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
      inner_mask = K.reshape(inner_mask, inner_mask_shape)
    input_uid = generic_utils.object_list_uid(inputs)
    inner_inputs = self._input_map.get(input_uid, inputs)
    output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
    if output_mask is None:
      if mask is None:
        return None
      # input_mask is not None, and output_mask is None:
      # we should return a not-None mask
      output_mask = mask
      for _ in range(2, len(K.int_shape(mask))):
        output_mask = K.any(output_mask, axis=-1)
    else:
      # output_mask is not None. We need to reshape it
      input_length = input_shape[1]
      if not input_length:
        input_length = K.shape(inputs)[1]
      output_mask_int_shape = K.int_shape(output_mask)
      if output_mask_int_shape is None:
        # if the output_mask does not have a static shape,
        # its shape must be the same as mask's
        if mask is not None:
          output_mask_int_shape = K.int_shape(mask)
        else:
          output_mask_int_shape = K.compute_output_shape(input_shape)[:-1]
      output_mask_shape = self._get_shape_tuple(
          (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
      output_mask = K.reshape(output_mask, output_mask_shape)
    return output_mask
Beispiel #23
0
    def mask_loss(self, y_true, y_pred):
        def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)):
            # split up the different predicted blobs
            boxes = y_pred[:, :, :4]
            masks = y_pred[:, :, 4:]

            # split up the different blobs
            annotations = y_true[:, :, :5]
            width = K.cast(y_true[0, 0, 5], dtype='int32')
            height = K.cast(y_true[0, 0, 6], dtype='int32')
            masks_target = y_true[:, :, 7:]

            # reshape the masks back to their original size
            masks_target = K.reshape(masks_target,
                                     (K.shape(masks_target)[0] *
                                      K.shape(masks_target)[1], height, width))
            masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1],
                                      mask_size[0], mask_size[1], -1))

            # batch size > 1 fix
            boxes = K.reshape(boxes, (-1, K.shape(boxes)[2]))
            annotations = K.reshape(annotations, (-1, K.shape(annotations)[2]))

            # compute overlap of boxes with annotations
            iou = overlap(boxes, annotations)
            argmax_overlaps_inds = K.argmax(iou, axis=1)
            max_iou = K.max(iou, axis=1)

            # filter those with IoU > 0.5
            indices = tf.where(K.greater_equal(max_iou, iou_threshold))
            boxes = tf.gather_nd(boxes, indices)
            masks = tf.gather_nd(masks, indices)
            argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices)
            argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32')
            labels = K.gather(annotations[:, 4], argmax_overlaps_inds)
            labels = K.cast(labels, 'int32')

            # make normalized boxes
            x1 = boxes[:, 0]
            y1 = boxes[:, 1]
            x2 = boxes[:, 2]
            y2 = boxes[:, 3]
            boxes = K.stack([
                y1 / (K.cast(height, dtype=K.floatx()) - 1),
                x1 / (K.cast(width, dtype=K.floatx()) - 1),
                (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1),
                (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1),
            ],
                            axis=1)

            # crop and resize masks_target
            # append a fake channel dimension
            masks_target = K.expand_dims(masks_target, axis=3)
            masks_target = tf.image.crop_and_resize(masks_target, boxes,
                                                    argmax_overlaps_inds,
                                                    mask_size)

            # remove fake channel dimension
            masks_target = masks_target[:, :, :, 0]

            # gather the predicted masks using the annotation label
            masks = tf.transpose(masks, (0, 3, 1, 2))
            label_indices = K.stack([tf.range(K.shape(labels)[0]), labels],
                                    axis=1)
            masks = tf.gather_nd(masks, label_indices)

            # compute mask loss
            mask_loss = K.binary_crossentropy(masks_target, masks)
            normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(
                masks)[2]
            normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1)
            mask_loss = K.sum(mask_loss) / normalizer

            return mask_loss

        # if there are no masks annotations, return 0; else, compute the masks loss
        return tf.cond(
            K.any(K.equal(K.shape(y_true), 0)), lambda: K.cast_to_floatx(0.0),
            lambda: _mask(y_true,
                          y_pred,
                          iou_threshold=self.iou_threshold,
                          mask_size=self.mask_size))
Beispiel #24
0
def is_winning(x):
    return K.any(K.stack([greater3(x), diaggreater3(x), fulldiaggreater3(x)]))
Beispiel #25
0
    def __call__(self,
                 *args,
                 inputs=None,
                 initial_state=None,
                 constants=None,
                 mask=None,
                 **kwargs):

        inputs, initial_state, constants = _standardize_args(
            inputs, initial_state, constants, self._num_constants)
        # We allow different shapes of input, even None. It doesn't really matter
        # because ultimately the input will be ignored except for the first step.
        # Nevertheless, we expand the input to have a timesteps dimension. This
        # is done simply for parent class calculations of output size, etc.

        # Allow None as an input. We will create an array of zeros of appropriate shape.
        if inputs is None:
            if initial_state is not None:
                # If LSTM then state might be a list.
                _state = initial_state[0] if isinstance(
                    initial_state, list) else initial_state
                batch_size = _state.shape[:-1]
                inputs = K.zeros_like(_state[..., 0][..., tf.newaxis])
                # inputs = 0 * _state[..., 0][..., tf.newaxis]  # Assume dim=1 input
            else:
                # Neither inputs nor initial_state provided. This likely only happens
                # when building/testing the layer.
                inputs = tf.zeros(
                    (self.timesteps, 1, 1)) if self.time_major else tf.zeros(
                        (1, self.timesteps, 1))

        # Allow 2D input, here reshape to 3D input
        if len(K.int_shape(inputs)) < 3:
            if self.time_major:
                inputs = inputs[tf.newaxis, ...]
            else:
                inputs = inputs[..., tf.newaxis, :]

        time_ax_ix, batch_ax_ix = (0, 1) if self.time_major else (-2, 0)
        input_shape = K.int_shape(inputs)
        input_timesteps = input_shape[time_ax_ix]

        if mask is not None and K.any(~mask):
            mask = nest.flatten(mask)[0]
            # We assume mask has a time dimension and require it is same size as input
            # (It doesn't make sense to use mask otherwise).
            mask_shape = K.int_shape(mask)
            # If the mask only has 1 item in the batch dim then tile it
            if mask_shape[batch_ax_ix] == 1 and input_shape[batch_ax_ix] > 1:
                if self.time_major:
                    bcast_or = tf.zeros((1, input_shape[batch_ax_ix], 1),
                                        dtype=tf.bool)
                else:
                    bcast_or = tf.zeros((input_shape[batch_ax_ix], 1, 1),
                                        dtype=tf.bool)
                mask = tf.math.logical_or(mask, bcast_or)
            if mask_shape[time_ax_ix] == input_timesteps:
                # Prepare slice parameters
                # For head (kept)
                h_sl_begin = [0 for _ in input_shape]
                h_sl_sz = [-1 for _ in input_shape]
                h_sl_sz[batch_ax_ix] = 1
                # For tail (replaced)
                t_sl_begin = [0 for _ in input_shape]
                t_sl_sz = [-1 for _ in input_shape]
                t_sl_sz[batch_ax_ix] = 1
                # Collect input replacements in list
                new_inputs = []
                for batch_ix in range(input_shape[batch_ax_ix]):
                    samp_mask = mask[
                        ...,
                        batch_ix, :] if self.time_major else mask[batch_ix]
                    if K.any(~samp_mask):
                        h_sl_begin[batch_ax_ix] = batch_ix
                        t_sl_begin[batch_ax_ix] = batch_ix
                        first_bad = tf.where(~samp_mask)[0, 0]
                        h_sl_sz[time_ax_ix] = first_bad  # sz is 1-based
                        t_sl_begin[time_ax_ix] = first_bad
                        head = tf.slice(inputs, h_sl_begin, h_sl_sz)
                        tail = tf.slice(inputs, t_sl_begin, t_sl_sz)
                        if self.tile_input:
                            tile_samp = head[-1] if self.time_major else head[
                                ..., -1, :]
                        else:
                            tile_samp = tf.zeros((1, input_shape[-1]))
                        new_row = tf.concat(
                            (head, tile_samp * K.ones_like(tail)),
                            axis=time_ax_ix)
                        new_inputs.append(new_row)
                inputs = tf.concat(new_inputs, axis=batch_ax_ix)

        # Fill/trim input time dimension to be self.timesteps
        if input_timesteps > self.timesteps:
            # Trim excess, if any
            inputs = inputs[:self.timesteps,
                            ...] if self.time_major else inputs[
                                ..., :self.timesteps, :]
        elif input_timesteps < self.timesteps:
            # Take the last timestep as our starting point for the padding data
            pad_sample = inputs[-1] if self.time_major else inputs[..., -1, :]
            if not self.tile_input:
                # zero out padding data if we aren't tiling
                pad_sample = K.zeros_like(pad_sample)
                # pad_sample = 0 * pad_sample
            # Add the time axis back to our pad_sample
            pad_sample = pad_sample[tf.newaxis,
                                    ...] if self.time_major else pad_sample[
                                        ..., tf.newaxis, :]
            # How many more timestamps do we need?
            pad_timestamps = self.timesteps - K.int_shape(inputs)[time_ax_ix]
            # Tile pad_data using broadcast-add. Does this same line work for time_major and not?
            pad_data = pad_sample + tf.zeros((pad_timestamps, 1))
            inputs = tf.concat((inputs, pad_data), axis=time_ax_ix)

        if not self._built_with_input:
            self.build_with_input(inputs,
                                  *args,
                                  initial_state=initial_state,
                                  constants=constants,
                                  mask=mask,
                                  **kwargs)

        return super().__call__(inputs,
                                initial_state=initial_state,
                                constants=constants,
                                **kwargs)