def ctc_beam_search(inputs, sequence_length, beam_width=100, top_paths=1, merge_repeated=True): decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( gen_ctc_ops._ctc_beam_search_decoder( inputs, sequence_length, beam_width=beam_width, top_paths=top_paths, merge_repeated=merge_repeated)) return ops.SparseTensor(decoded_ixs[0], decoded_vals[0], decoded_shapes[0]), log_probabilities
def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, top_paths=1, merge_repeated=True): """Performs beam search decoding on the logits given in input. **Note** The `ctc_greedy_decoder` is a special case of the `ctc_beam_search_decoder` with `top_paths=1` (but that decoder is faster for this special case). If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, only the first of these is emitted. That is, when the top path is `A B B B B`, the return value is: * `A B` if `merge_repeated = True`. * `A B B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size `[max_time x batch_size x num_classes]`. The logits. sequence_length: 1-D `int32` vector containing sequence lengths, having size `[batch_size]`. beam_width: An int scalar >= 0 (beam search beam width). top_paths: An int scalar >= 0, <= beam_width (controls output size). merge_repeated: Boolean. Default: True. Returns: A tuple `(decoded, log_probabilities)` where decoded: A list of length top_paths, where `decoded[j]` is a `SparseTensor` containing the decoded outputs: `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)` The rows store: [batch, time]. `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`. The vector stores the decoded classes for beam j. `decoded[j].shape`: Shape vector, size `(2)`. The shape values are: `[batch_size, max_decoded_length[j]]`. log_probability: A `float` matrix `(batch_size x top_paths)` containing sequence log-probabilities. """ decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( gen_ctc_ops._ctc_beam_search_decoder(inputs, sequence_length, beam_width=beam_width, top_paths=top_paths, merge_repeated=merge_repeated)) return ([ ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(decoded_ixs, decoded_vals, decoded_shapes) ], log_probabilities)
def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, top_paths=1, merge_repeated=True): """Performs beam search decoding on the logits given in input. **Note** The `ctc_greedy_decoder` is a special case of the `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but that decoder is faster for this special case). If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, only the first of these is emitted. That is, when the top path is `A B B B B`, the return value is: * `A B` if `merge_repeated = True`. * `A B B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size `[max_time x batch_size x num_classes]`. The logits. sequence_length: 1-D `int32` vector containing sequence lengths, having size `[batch_size]`. beam_width: An int scalar >= 0 (beam search beam width). top_paths: An int scalar >= 0, <= beam_width (controls output size). merge_repeated: Boolean. Default: True. Returns: A tuple `(decoded, log_probabilities)` where decoded: A list of length top_paths, where `decoded[j]` is a `SparseTensor` containing the decoded outputs: `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)` The rows store: [batch, time]. `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`. The vector stores the decoded classes for beam j. `decoded[j].shape`: Shape vector, size `(2)`. The shape values are: `[batch_size, max_decoded_length[j]]`. log_probability: A `float` matrix `(batch_size x top_paths)` containing sequence log-probabilities. """ decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( gen_ctc_ops._ctc_beam_search_decoder( inputs, sequence_length, beam_width=beam_width, top_paths=top_paths, merge_repeated=merge_repeated)) return ( [sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(decoded_ixs, decoded_vals, decoded_shapes)], log_probabilities)
with tf.name_scope("encodeing"): batch_s = tf.shape(y_conv)[0] logits = tf.reshape(y_conv, [batch_s, -1, n_classes]) print logits logits = tf.transpose(logits, (1, 0, 2)) loss = ctc_ops.ctc_loss(logits, y, seq_len) cost = tf.reduce_mean(loss) print logits optimizer = tf.train.MomentumOptimizer(initial_learning_rate, 0.9).minimize(cost) #decoded_ixs, decoded_vals, decoded_shapes, decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( gen_ctc_ops._ctc_beam_search_decoder(logits, seq_len, beam_width=100, top_paths=1, merge_repeated=True)) print type(decoded_ixs) ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded_ixs, tf.int32), y)) ''' with tf.name_scope("loss"): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.arg_max(y_conv,1), tf.arg_max(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float")) ''' # Launch the graph