コード例 #1
0
    def provide_groundtruth(self, groundtruth_text, scope=None):
        with tf.name_scope(scope, 'ProvideGroundtruth', [groundtruth_text]):
            batch_size = shape_utils.combined_static_and_dynamic_shape(
                groundtruth_text)[0]
            if self._reverse:
                groundtruth_text = ops.string_reverse(groundtruth_text)
            text_labels, text_lengths = self._label_map.text_to_labels(
                groundtruth_text,
                pad_value=self.end_label,
                return_lengths=True)
            start_labels = tf.fill([batch_size, 1],
                                   tf.constant(self.start_label, tf.int64))
            end_labels = tf.fill([batch_size, 1],
                                 tf.constant(self.end_label, tf.int64))
            if not self._sync:
                decoder_inputs = tf.concat(
                    [start_labels, start_labels, text_labels], axis=1)
                decoder_targets = tf.concat(
                    [start_labels, text_labels, end_labels], axis=1)
                decoder_lengths = text_lengths + 2
            else:
                decoder_inputs = tf.concat([start_labels, text_labels], axis=1)
                decoder_targets = tf.concat([text_labels, end_labels], axis=1)
                decoder_lengths = text_lengths + 1

            # set maximum lengths
            decoder_inputs = decoder_inputs[:, :self._max_num_steps]
            decoder_targets = decoder_targets[:, :self._max_num_steps]
            decoder_lengths = tf.minimum(decoder_lengths, self._max_num_steps)

            self._groundtruth_dict['decoder_inputs'] = decoder_inputs
            self._groundtruth_dict['decoder_targets'] = decoder_targets
            self._groundtruth_dict['decoder_lengths'] = decoder_lengths
コード例 #2
0
 def _aggregate_recognition_results(self,
                                    text_list,
                                    scores_list,
                                    scope=None):
     """Aggregate recognition results by picking up ones with highest scores.
 Args
   text_list: a list of tensors with shape [batch_size]
   scores_list: a list of tensors with shape [batch_size]
 """
     with tf.variable_scope(scope, 'AggregateRecognitionResults',
                            (text_list + scores_list)):
         stacked_text = tf.stack(text_list, axis=1)
         stacked_scores = tf.stack(scores_list, axis=1)
         argmax_scores = tf.argmax(stacked_scores, axis=1)
         batch_size = shape_utils.combined_static_and_dynamic_shape(
             stacked_text)[0]
         indices = tf.stack(
             [tf.range(batch_size, dtype=tf.int64), argmax_scores], axis=1)
         aggregated_text = tf.gather_nd(stacked_text, indices)
         aggregated_scores = tf.gather_nd(stacked_scores, indices)
         recognition_dict = {
             'text': aggregated_text,
             'scores': aggregated_scores
         }
     return recognition_dict
コード例 #3
0
ファイル: loss.py プロジェクト: zhouscientist/RMB
 def __call__(self, logits, labels, lengths, scope=None):
     """
 Args:
   logits: float32 tensor with shape [batch_size, max_time, num_classes]
   labels: int32 tensor with shape [batch_size, max_time]
   lengths: int32 tensor with shape [batch_size]
 """
     with tf.name_scope(scope, 'SequenceCrossEntropyLoss',
                        [logits, labels, lengths]):
         raw_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
             labels=labels, logits=logits)
         batch_size, max_time = shape_utils.combined_static_and_dynamic_shape(
             labels)
         mask = tf.less(tf.tile([tf.range(max_time)], [batch_size, 1]),
                        tf.expand_dims(lengths, 1),
                        name='mask')
         masked_losses = tf.multiply(
             raw_losses, tf.cast(mask, tf.float32),
             name='masked_losses')  # => [batch_size, max_time]
         row_losses = tf.reduce_sum(masked_losses, 1, name='row_losses')
         if self._sequence_normalize:
             loss = tf.truediv(row_losses,
                               tf.cast(tf.maximum(lengths, 1), tf.float32),
                               name='seq_normed_losses')
         loss = tf.reduce_sum(row_losses)
         if self._sample_normalize:
             loss = tf.truediv(
                 loss, tf.cast(tf.maximum(batch_size, 1), tf.float32))
         if self._weight:
             loss = loss * self._weight
     return loss
コード例 #4
0
 def _localize(self, preprocessed_images):
     k = self._num_control_points
     conv_output = self._convnet.extract_features(preprocessed_images)[-1]
     batch_size = shape_utils.combined_static_and_dynamic_shape(
         conv_output)[0]
     conv_output = tf.reshape(conv_output, [batch_size, -1])
     with arg_scope(self._fc_hyperparams):
         fc1 = fully_connected(conv_output, 512)
         fc2_weights_initializer = tf.zeros_initializer()
         fc2_biases_initializer = tf.constant_initializer(self._init_bias)
         fc2 = fully_connected(0.1 * fc1,
                               2 * k,
                               weights_initializer=fc2_weights_initializer,
                               biases_initializer=fc2_biases_initializer,
                               activation_fn=None,
                               normalizer_fn=None)
         if self._summarize_activations:
             tf.summary.histogram('fc1', fc1)
             tf.summary.histogram('fc2', fc2)
     if self._activation == 'sigmoid':
         ctrl_pts = tf.sigmoid(fc2)
     elif self._activation == 'none':
         ctrl_pts = fc2
     else:
         raise ValueError('Unknown activation: {}'.format(self._activation))
     ctrl_pts = tf.reshape(ctrl_pts, [batch_size, k, 2])
     return ctrl_pts
コード例 #5
0
    def extract_features(self, preprocessed_inputs, scope=None):
        with tf.variable_scope(scope, 'FeatureExtractor',
                               [preprocessed_inputs]):
            feature_maps = self._convnet.extract_features(preprocessed_inputs)

        if len(self._brnn_fn_list) > 0:
            feature_sequences_list = []
            for i, feature_map in enumerate(feature_maps):
                shape_assert = tf.Assert(tf.equal(
                    tf.shape(feature_map)[1], 1
                ), [
                    'Feature map height must be 1 if bidirectional RNN is going to be applied.'
                ])
                batch_size, _, _, map_depth = shape_utils.combined_static_and_dynamic_shape(
                    feature_map)
                with tf.control_dependencies([shape_assert]):
                    feature_sequence = tf.reshape(feature_map,
                                                  [batch_size, -1, map_depth])
                for j, brnn_fn in enumerate(self._brnn_fn_list):
                    brnn_object = brnn_fn()
                    feature_sequence = brnn_object.predict(
                        feature_sequence,
                        scope='BidirectionalRnn_Branch_{}_{}'.format(i, j))
                feature_sequences_list.append(feature_sequence)

            feature_maps = [
                tf.expand_dims(fmap, axis=1) for fmap in feature_sequences_list
            ]
        return feature_maps
コード例 #6
0
 def postprocess(self, predictions_dict, scope=None):
   with tf.variable_scope(scope, 'Postprocess', list(predictions_dict.values())):
     logits = predictions_dict['logits']
     batch_size, max_time, _ = shape_utils.combined_static_and_dynamic_shape(logits)
     logits_time_major = tf.transpose(logits, [1,0,2])
     sparse_labels, log_prob = tf.nn.ctc_greedy_decoder(
       logits_time_major,
       tf.fill([batch_size], max_time),
       merge_repeated=True
     )
     labels = tf.sparse_tensor_to_dense(sparse_labels[0], default_value=-1)
     text = self._label_map.labels_to_text(labels)
     recognitions_dict = {'text': text}
   return recognitions_dict
コード例 #7
0
 def loss(self, predictions_dict, scope=None):
   with tf.variable_scope(scope, 'Loss', list(predictions_dict.values())):
     logits = predictions_dict['logits']
     batch_size, max_time, _ = shape_utils.combined_static_and_dynamic_shape(logits)
     losses = tf.nn.ctc_loss(
       tf.cast(self._groundtruth_dict['text_labels_sparse'], tf.int32),
       predictions_dict['logits'],
       tf.fill([batch_size], max_time),
       preprocess_collapse_repeated=False,
       ctc_merge_repeated=True,
       ignore_longer_outputs_than_inputs=True,
       time_major=False)
     loss = tf.reduce_mean(losses)
   return {'RecognitionLoss': loss}
コード例 #8
0
    def predict(self, feature_maps, scope=None):
        if not isinstance(feature_maps, (list, tuple)):
            raise ValueError('`feature_maps` must be list of tuple')

        with tf.variable_scope(scope, 'Predict', feature_maps):
            batch_size = shape_utils.combined_static_and_dynamic_shape(
                feature_maps[0])[0]
            decoder_cell = self._build_decoder_cell(feature_maps)
            decoder = self._build_decoder(decoder_cell, batch_size)

            outputs, _, output_lengths = seq2seq.dynamic_decode(
                decoder=decoder,
                output_time_major=False,
                impute_finished=False,
                maximum_iterations=self._max_num_steps)
            # apply regularizer
            filter_weights = lambda vars: [
                x for x in vars if x.op.name.endswith('kernel')
            ]
            tf.contrib.layers.apply_regularization(
                self._rnn_regularizer,
                filter_weights(decoder_cell.trainable_weights))

            outputs_dict = None
            if self._is_training:
                assert isinstance(outputs, seq2seq.BasicDecoderOutput)
                outputs_dict = {
                    'labels': outputs.sample_id,
                    'logits': outputs.rnn_output,
                }
            else:
                assert isinstance(outputs,
                                  seq2seq.FinalBeamSearchDecoderOutput)
                prediction_labels = outputs.predicted_ids[:, :, 0]
                prediction_lengths = output_lengths[:, 0]
                prediction_scores = tf.gather_nd(
                    outputs.beam_search_decoder_output.scores[:, :, 0],
                    tf.stack([tf.range(batch_size), prediction_lengths - 1],
                             axis=1))
                outputs_dict = {
                    'labels': prediction_labels,
                    'scores': prediction_scores,
                    'lengths': prediction_lengths
                }
        return outputs_dict
コード例 #9
0
def tile_activation_maps_rows_cols(maps, num_rows, num_cols):
    """
  Args:
    maps: [batch_size, map_height, map_width, map_depth]
  Return:
    tiled_map: [batch_size, tiled_height, tiled_width]
  """
    batch_size, map_height, map_width, map_depth = \
      shape_utils.combined_static_and_dynamic_shape(maps)

    # padding
    num_maps = num_rows * num_cols
    padded_map = tf.cond(tf.greater(num_maps, map_depth),
                         true_fn=lambda: tf.pad(
                             maps, [[0, 0], [0, 0], [0, 0],
                                    [0, tf.maximum(num_maps - map_depth, 0)]]),
                         false_fn=lambda: maps[:, :, :, :num_maps])

    # reshape to [batch_size, map_height, map_width, num_rows, num_cols]
    reshaped_map = tf.reshape(
        padded_map, [batch_size, map_height, map_width, num_rows, num_cols])

    # unstack and concat along widths
    width_concated_maps = tf.concat(
        tf.unstack(
            reshaped_map, axis=4
        ),  # => list of [batch_size, map_height, map_width, num_rows]
        axis=2)  # => [batch_size, map_height, map_width * num_cols, num_rows]

    tiled_map = tf.concat(
        tf.unstack(
            width_concated_maps, axis=3
        ),  # => list of [batch_size, map_height, map_width * num_cols]
        axis=1)  # => [batch_size, map_height * num_rows, map_width * num_cols]

    tiled_map = tf.expand_dims(tiled_map, axis=3)

    return tiled_map
コード例 #10
0
def tile_activation_maps_max_dimensions(maps, max_height, max_width):
    batch_size, map_height, map_width, map_depth = \
      shape_utils.combined_static_and_dynamic_shape(maps)
    num_rows = max_height // map_height
    num_cols = max_width // map_width
    return tile_activation_maps_rows_cols(maps, num_rows, num_cols)
コード例 #11
0
    def _batch_sample(self, images, batch_sampling_grid):
        """
    Args:
      images: tensor of any time with shape [batch_size, image_h, image_w, depth]
      batch_sampling_grid; float32 tensor with shape [batch_size, num_sampling_pts, 2]
    """
        if images.dtype != tf.float32:
            raise ValueError('image must be of type tf.float32')
        batch_G = batch_sampling_grid
        batch_size, image_h, image_w, _ = shape_utils.combined_static_and_dynamic_shape(
            images)
        n = shape_utils.combined_static_and_dynamic_shape(
            batch_sampling_grid)[1]

        batch_Gx = image_w * batch_G[:, :, 0]
        batch_Gy = image_h * batch_G[:, :, 1]
        batch_Gx = tf.clip_by_value(batch_Gx, 0., image_w - 2)
        batch_Gy = tf.clip_by_value(batch_Gy, 0., image_h - 2)

        batch_Gx0 = tf.cast(tf.floor(batch_Gx),
                            tf.int32)  # G* => [batch_size, n, 2]
        batch_Gx1 = batch_Gx0 + 1  # G*x, G*y => [batch_size, n]
        batch_Gy0 = tf.cast(tf.floor(batch_Gy), tf.int32)
        batch_Gy1 = batch_Gy0 + 1

        def _get_pixels(images, batch_x, batch_y, batch_indices):
            indices = tf.stack([batch_indices, batch_y, batch_x],
                               axis=2)  # => [B, n, 3]
            pixels = tf.gather_nd(images, indices)
            return pixels

        batch_indices = tf.tile(tf.expand_dims(tf.range(batch_size), 1),
                                [1, n])  # => [B, n]
        batch_I00 = _get_pixels(images, batch_Gx0, batch_Gy0, batch_indices)
        batch_I01 = _get_pixels(images, batch_Gx0, batch_Gy1, batch_indices)
        batch_I10 = _get_pixels(images, batch_Gx1, batch_Gy0, batch_indices)
        batch_I11 = _get_pixels(images, batch_Gx1, batch_Gy1,
                                batch_indices)  # => [B, n, d]

        batch_Gx0 = tf.to_float(batch_Gx0)
        batch_Gx1 = tf.to_float(batch_Gx1)
        batch_Gy0 = tf.to_float(batch_Gy0)
        batch_Gy1 = tf.to_float(batch_Gy1)

        batch_w00 = (batch_Gx1 - batch_Gx) * (batch_Gy1 - batch_Gy)
        batch_w01 = (batch_Gx1 - batch_Gx) * (batch_Gy - batch_Gy0)
        batch_w10 = (batch_Gx - batch_Gx0) * (batch_Gy1 - batch_Gy)
        batch_w11 = (batch_Gx - batch_Gx0) * (batch_Gy - batch_Gy0
                                              )  # => [B, n]

        batch_pixels = tf.add_n([
            tf.expand_dims(batch_w00, axis=2) * batch_I00,
            tf.expand_dims(batch_w01, axis=2) * batch_I01,
            tf.expand_dims(batch_w10, axis=2) * batch_I10,
            tf.expand_dims(batch_w11, axis=2) * batch_I11,
        ])

        output_h, output_w = self._output_image_size
        output_maps = tf.reshape(batch_pixels,
                                 [batch_size, output_h, output_w, -1])
        output_maps = tf.cast(output_maps, dtype=images.dtype)

        if self._summarize_activations:
            tf.summary.image('InputImage1', images[:2], max_outputs=2)
            tf.summary.image('InputImage2', images[-2:], max_outputs=2)
            tf.summary.image('RectifiedImage1', output_maps[:2], max_outputs=2)
            tf.summary.image('RectifiedImage2',
                             output_maps[-2:],
                             max_outputs=2)

        return output_maps