def test_beam_decoders(self):
    '''
    Test on random data that custom decoder outputs the same transcript
    as standard TF beam search decoder
    '''
    seq = np.random.uniform(size=self.seq.shape).astype(np.float32)
    logits = tf.constant(seq)
    seq_len = tf.constant([self.seq.shape[0]])

    beam_search_decoded = tf.nn.ctc_beam_search_decoder(logits, seq_len,
        beam_width=self.beam_width,
        top_paths=1,
        merge_repeated=False)


    with tf.Session() as sess:
      res_beam = sess.run(beam_search_decoded)
    decoded_beam, prob_beam = res_beam
    prob1 = prob_beam[0][0]
    decoded_text1 = ''.join([self.vocab[c] for c in decoded_beam[0].values])

    res = ctc_beam_search_decoder(softmax(seq.squeeze()), self.vocab[:-1],
                                  beam_size=self.beam_width)
    prob2, decoded_text2 = res[0]

    if tf.__version__ >= '1.11':
      # works for newer versions only (with CTC decoder fix)
      self.assertTrue( abs(prob1 - prob2) < self.tol )
    self.assertTrue( prob2 < 0 )

    self.assertTrue( decoded_text1 == decoded_text2 )
Example #2
0
    def perform_beam_search(self, probs: np.ndarray, lm: bool = False):
        decoded = ctc_beam_search_decoder(
            probs_seq=probs,
            vocabulary=self.text_featurizer.vocab_array,
            beam_size=self.text_featurizer.decoder_config["beam_width"],
            ext_scoring_func=self.text_featurizer.scorer if lm else None)
        decoded = decoded[0][-1]

        return tf.convert_to_tensor(decoded, dtype=tf.string)
Example #3
0
    def predict_lm(self, inputs, lm, beam_size: int = 100, **kwargs):
        """
        Transcribe inputs using Beam Search + LM, will return list of strings.
        This method will not able to utilise batch decoding, instead will do loop to decode for each elements.

        Parameters
        ----------
        input: List[np.array]
            List[np.array] or List[malaya_speech.model.frame.Frame].
        lm: ctc_decoders.Scorer
            Returned from `malaya_speech.stt.language_model()`.
        beam_size: int, optional (default=100)
            beam size for beam decoder.
        

        Returns
        -------
        result: List[str]
        """
        try:
            from ctc_decoders import ctc_beam_search_decoder
        except:
            raise ModuleNotFoundError(
                'ctc_decoders not installed. Please install it by `pip install ctc-decoders` and try again.'
            )

        inputs = [
            input.array if isinstance(input, Frame) else input
            for input in inputs
        ]

        padded, lens = sequence_1d(inputs, return_len=True)
        logits, seq_lens = self._sess.run(
            [self._softmax, self._seq_lens],
            feed_dict={
                self._X: padded,
                self._X_len: lens
            },
        )
        logits = np.transpose(logits, axes=(1, 0, 2))
        results = []
        for i in range(len(logits)):
            d = ctc_beam_search_decoder(
                logits[i][:seq_lens[i]],
                self._vocab,
                beam_size,
                ext_scoring_func=lm,
                **kwargs,
            )
            results.append(d[0][1])
        return results
Example #4
0
    def _perform_beam_search(
        self,
        probs: np.ndarray,
        lm: bool = False,
    ):
        from ctc_decoders import ctc_beam_search_decoder

        decoded = ctc_beam_search_decoder(
            probs_seq=probs,
            vocabulary=self.text_featurizer.non_blank_tokens,
            beam_size=self.text_featurizer.decoder_config.beam_width,
            ext_scoring_func=self.text_featurizer.scorer if lm else None,
        )
        decoded = decoded[0][-1]

        return tf.convert_to_tensor(decoded, dtype=tf.string)
  def test_decoders(self):
    '''
    Test all CTC decoders on a sample transcript ('ten seconds').
    Standard TF decoders should output 'then seconds'.
    Custom CTC decoder with LM rescoring should yield 'ten seconds'.
    '''
    logits = tf.constant(self.seq)
    seq_len = tf.constant([self.seq.shape[0]])

    greedy_decoded = tf.nn.ctc_greedy_decoder(logits, seq_len, 
        merge_repeated=True)

    beam_search_decoded = tf.nn.ctc_beam_search_decoder(logits, seq_len, 
        beam_width=self.beam_width, 
        top_paths=1, 
        merge_repeated=False)

    with tf.Session() as sess:
      res_greedy, res_beam = sess.run([greedy_decoded, 
          beam_search_decoded])

    decoded_greedy, prob_greedy = res_greedy
    decoded_text = ''.join([self.vocab[c] for c in decoded_greedy[0].values])
    self.assertTrue( abs(7079.117 + prob_greedy[0][0]) < self.tol )
    self.assertTrue( decoded_text == 'then seconds' )

    decoded_beam, prob_beam = res_beam
    decoded_text = ''.join([self.vocab[c] for c in decoded_beam[0].values])
    if tf.__version__ >= '1.11':
      # works for newer versions only (with CTC decoder fix)
      self.assertTrue( abs(1.1842 + prob_beam[0][0]) < self.tol )
    self.assertTrue( decoded_text == 'then seconds' )

    scorer = Scorer(alpha=2.0, beta=0.5,
        model_path='ctc_decoder_with_lm/ctc-test-lm.binary', 
        vocabulary=self.vocab[:-1])
    res = ctc_beam_search_decoder(softmax(self.seq.squeeze()), self.vocab[:-1],
                                  beam_size=self.beam_width,
                                  ext_scoring_func=scorer)
    res_prob, decoded_text = res[0]
    self.assertTrue( abs(4.0845 + res_prob) < self.tol )
    self.assertTrue( decoded_text == self.label )
Example #6
0
        0.04139363,
    ],
    [
        0.15882358,
        0.1235788,
        0.23376776,
        0.20510435,
        0.00279306,
        0.05294827,
        0.22298418,
    ],
]
greedy_result = ["ac'bdc", "b'da"]
beam_search_result = ['acdc', "b'a"]

ctc_greedy_decoder(np.array(probs_seq1), vocab_list) == greedy_result[0]

ctc_greedy_decoder(np.array(probs_seq2), vocab_list) == greedy_result[1]

ctc_beam_search_decoder(
    probs_seq=np.array(probs_seq1),
    beam_size=beam_size,
    vocabulary=vocab_list,
)

ctc_beam_search_decoder(
    probs_seq=np.array(probs_seq2),
    beam_size=beam_size,
    vocabulary=vocab_list,
)