Beispiel #1
0
 def featurize(self, audio_clip, overwrite=False):
     """ For a given audio clip, calculate the log of its Fourier Transform
     Params:
         audio_clip(str): Path to the audio clip
     """
     return spectrogram_from_file(
         audio_clip, step=self.step, window=self.window,
         max_freq=self.max_freq, overwrite=overwrite)
Beispiel #2
0
 def featurize(self, audio_clip, overwrite=False):
     """ For a given audio clip, calculate the log of its Fourier Transform
     Params:
         audio_clip(str): Path to the audio clip
     """
     return spectrogram_from_file(
         audio_clip, step=self.step, window=self.window,
         max_freq=self.max_freq, overwrite=overwrite)
Beispiel #3
0
 def featurize(self,
               audio_clip,
               overwrite=False,
               save_feature_as_csvfile=False,
               noise_percent=0.4,
               seq_length=-1):
     """ For a given audio clip, calculate the log of its Fourier Transform
     Params:
         audio_clip(str): Path to the audio clip
     """
     return spectrogram_from_file(
         audio_clip,
         step=self.step,
         window=self.window,
         max_freq=self.max_freq,
         overwrite=overwrite,
         save_feature_as_csvfile=save_feature_as_csvfile,
         noise_percent=noise_percent,
         seq_length=seq_length)
Beispiel #4
0
 def getTrans(self, wav_file):
     res = spectrogram_from_file(wav_file, noise_percent=0)
     buck = bisect.bisect_left(self.buckets, len(res))
     bucket_key = 1600
     res = self.datagen.normalize(res)
     d = np.zeros((self.batch_size, bucket_key, res.shape[1]))
     d[0, :res.shape[0], :] = res
     st = time.time()
     # model_loaded.forward(data_batch, is_train=False)
     probs = self.forward(mx.nd.array(d))
     from stt_metric import ctc_greedy_decode
     res = ctc_greedy_decode(probs, self.labelUtil.byList)
     self.log.info("forward cost %.2f, %s" % (time.time() - st, res))
     st = time.time()
     # model_loaded.update_metric(self.eval_metric, data_batch.label)
     self.log.info("upate metric cost %.2f" % (time.time() - st))
     # print("my res is:")
     # print(eval_metric.placeholder)
     return self.eval_metric.placeholder
Beispiel #5
0
    def getTrans(self, wav_file):
        res = spectrogram_from_file(wav_file, noise_percent=0)
        buck = bisect.bisect_left(self.buckets, len(res))
        bucket_key = self.buckets[buck]
        res = self.datagen.normalize(res)
        d = np.zeros((self.batch_size, bucket_key, res.shape[1]))
        d[0, :res.shape[0], :] = res
        init_state_arrays = [mx.nd.zeros(x[1]) for x in self.init_states]

        model_loaded = self.model

        provide_data = [
            ('data', (self.batch_size, bucket_key, self.width * self.height))
        ] + self.init_states
        data_batch = mx.io.DataBatch([mx.nd.array(d)] + init_state_arrays,
                                     label=None,
                                     bucket_key=bucket_key,
                                     provide_data=provide_data,
                                     provide_label=None)
        st = time.time()
        model_loaded.forward(data_batch, is_train=False)
        probs = model_loaded.get_outputs()[0].asnumpy()
        log.info("forward cost %.3f" % (time.time() - st))
        st = time.time()
        res = ctc_greedy_decode(probs, self.labelUtil.byList)
        log.info("greedy decode cost %.3f, result is:\n%s" %
                 (time.time() - st, res))
        beam_size = 5
        from stt_metric import ctc_beam_decode
        st = time.time()
        results = ctc_beam_decode(scorer=self.scorer,
                                  beam_size=beam_size,
                                  vocab=self.labelUtil.byList,
                                  probs=probs)
        log.info("beam decode cost %.3f, result is:\n%s" %
                 (time.time() - st, "\n".join(results)))
        return "greedy:\n" + res + "\nbeam:\n" + "\n".join(results)