def ctc_beam_search(inputs, sequence_length, beam_width=100,
                            top_paths=1, merge_repeated=True):
    decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
        gen_ctc_ops._ctc_beam_search_decoder(
            inputs, sequence_length, beam_width=beam_width, top_paths=top_paths,
            merge_repeated=merge_repeated))
    return ops.SparseTensor(decoded_ixs[0], decoded_vals[0], decoded_shapes[0]), log_probabilities
Beispiel #2
0
def ctc_beam_search_decoder(inputs,
                            sequence_length,
                            beam_width=100,
                            top_paths=1,
                            merge_repeated=True):
    """Performs beam search decoding on the logits given in input.

  **Note** The `ctc_greedy_decoder` is a special case of the
  `ctc_beam_search_decoder` with `top_paths=1` (but that decoder is faster
  for this special case).

  If `merge_repeated` is `True`, merge repeated classes in the output beams.
  This means that if consecutive entries in a beam are the same,
  only the first of these is emitted.  That is, when the top path
  is `A B B B B`, the return value is:

    * `A B` if `merge_repeated = True`.
    * `A B B B B` if `merge_repeated = False`.

  Args:
    inputs: 3-D `float` `Tensor`, size
      `[max_time x batch_size x num_classes]`.  The logits.
    sequence_length: 1-D `int32` vector containing sequence lengths,
      having size `[batch_size]`.
    beam_width: An int scalar >= 0 (beam search beam width).
    top_paths: An int scalar >= 0, <= beam_width (controls output size).
    merge_repeated: Boolean.  Default: True.

  Returns:
    A tuple `(decoded, log_probabilities)` where
    decoded: A list of length top_paths, where `decoded[j]`
      is a `SparseTensor` containing the decoded outputs:
      `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)`
        The rows store: [batch, time].
      `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`.
        The vector stores the decoded classes for beam j.
      `decoded[j].shape`: Shape vector, size `(2)`.
        The shape values are: `[batch_size, max_decoded_length[j]]`.
    log_probability: A `float` matrix `(batch_size x top_paths)` containing
        sequence log-probabilities.
  """

    decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
        gen_ctc_ops._ctc_beam_search_decoder(inputs,
                                             sequence_length,
                                             beam_width=beam_width,
                                             top_paths=top_paths,
                                             merge_repeated=merge_repeated))

    return ([
        ops.SparseTensor(ix, val, shape)
        for (ix, val, shape) in zip(decoded_ixs, decoded_vals, decoded_shapes)
    ], log_probabilities)
Beispiel #3
0
def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
                            top_paths=1, merge_repeated=True):
  """Performs beam search decoding on the logits given in input.

  **Note** The `ctc_greedy_decoder` is a special case of the
  `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but
  that decoder is faster for this special case).

  If `merge_repeated` is `True`, merge repeated classes in the output beams.
  This means that if consecutive entries in a beam are the same,
  only the first of these is emitted.  That is, when the top path
  is `A B B B B`, the return value is:

    * `A B` if `merge_repeated = True`.
    * `A B B B B` if `merge_repeated = False`.

  Args:
    inputs: 3-D `float` `Tensor`, size
      `[max_time x batch_size x num_classes]`.  The logits.
    sequence_length: 1-D `int32` vector containing sequence lengths,
      having size `[batch_size]`.
    beam_width: An int scalar >= 0 (beam search beam width).
    top_paths: An int scalar >= 0, <= beam_width (controls output size).
    merge_repeated: Boolean.  Default: True.

  Returns:
    A tuple `(decoded, log_probabilities)` where
    decoded: A list of length top_paths, where `decoded[j]`
      is a `SparseTensor` containing the decoded outputs:
      `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)`
        The rows store: [batch, time].
      `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`.
        The vector stores the decoded classes for beam j.
      `decoded[j].shape`: Shape vector, size `(2)`.
        The shape values are: `[batch_size, max_decoded_length[j]]`.
    log_probability: A `float` matrix `(batch_size x top_paths)` containing
        sequence log-probabilities.
  """

  decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
      gen_ctc_ops._ctc_beam_search_decoder(
          inputs, sequence_length, beam_width=beam_width, top_paths=top_paths,
          merge_repeated=merge_repeated))

  return (
      [sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
       in zip(decoded_ixs, decoded_vals, decoded_shapes)],
      log_probabilities)
Beispiel #4
0
with tf.name_scope("encodeing"):
    batch_s = tf.shape(y_conv)[0]
    logits = tf.reshape(y_conv, [batch_s, -1, n_classes])
    print logits
    logits = tf.transpose(logits, (1, 0, 2))
    loss = ctc_ops.ctc_loss(logits, y, seq_len)
    cost = tf.reduce_mean(loss)
    print logits
    optimizer = tf.train.MomentumOptimizer(initial_learning_rate,
                                           0.9).minimize(cost)
    #decoded_ixs, decoded_vals, decoded_shapes,
    decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
        gen_ctc_ops._ctc_beam_search_decoder(logits,
                                             seq_len,
                                             beam_width=100,
                                             top_paths=1,
                                             merge_repeated=True))
    print type(decoded_ixs)
    ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded_ixs, tf.int32), y))
'''
with tf.name_scope("loss"):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.arg_max(y_conv,1), tf.arg_max(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))
'''

# Launch the graph