def ctc_beam_search_decoder(probs_seq, vocabulary, beam_size, cutoff_prob=1.0, cutoff_top_n=40, ext_scoring_func=None): """Wrapper for the CTC Beam Search Decoder. :param probs_seq: 2-D list of probability distributions over each time step, with each element being a list of normalized probabilities over vocabulary and blank. :type probs_seq: 2-D list :param vocabulary: Vocabulary list. :type vocabulary: list :param beam_size: Width for beam search. :type beam_size: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n characters with highest probs in vocabulary will be used in beam search, default 40. :type cutoff_top_n: int :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count or language model. :type external_scoring_func: callable :return: List of tuples of log probability and sentence as decoding results, in descending order of the probability. :rtype: list """ beam_results = decoders_deprecated.ctc_beam_search_decoder( probs_seq.tolist(), beam_size, vocabulary, cutoff_prob, cutoff_top_n, ext_scoring_func) beam_results = [(res[0], res[1].decode('utf-8')) for res in beam_results] return beam_results
def test_beam_search_decoder_2(self): beam_result = decoder.ctc_beam_search_decoder( probs_seq=self.probs_seq2, beam_size=self.beam_size, vocabulary=self.vocab_list) self.assertEqual(beam_result[0][1], self.beam_search_result[1])