def featurize(self, audio_clip, overwrite=False): """ For a given audio clip, calculate the log of its Fourier Transform Params: audio_clip(str): Path to the audio clip """ return spectrogram_from_file( audio_clip, step=self.step, window=self.window, max_freq=self.max_freq, overwrite=overwrite)
def featurize(self, audio_clip, overwrite=False, save_feature_as_csvfile=False, noise_percent=0.4, seq_length=-1): """ For a given audio clip, calculate the log of its Fourier Transform Params: audio_clip(str): Path to the audio clip """ return spectrogram_from_file( audio_clip, step=self.step, window=self.window, max_freq=self.max_freq, overwrite=overwrite, save_feature_as_csvfile=save_feature_as_csvfile, noise_percent=noise_percent, seq_length=seq_length)
def getTrans(self, wav_file): res = spectrogram_from_file(wav_file, noise_percent=0) buck = bisect.bisect_left(self.buckets, len(res)) bucket_key = 1600 res = self.datagen.normalize(res) d = np.zeros((self.batch_size, bucket_key, res.shape[1])) d[0, :res.shape[0], :] = res st = time.time() # model_loaded.forward(data_batch, is_train=False) probs = self.forward(mx.nd.array(d)) from stt_metric import ctc_greedy_decode res = ctc_greedy_decode(probs, self.labelUtil.byList) self.log.info("forward cost %.2f, %s" % (time.time() - st, res)) st = time.time() # model_loaded.update_metric(self.eval_metric, data_batch.label) self.log.info("upate metric cost %.2f" % (time.time() - st)) # print("my res is:") # print(eval_metric.placeholder) return self.eval_metric.placeholder
def getTrans(self, wav_file): res = spectrogram_from_file(wav_file, noise_percent=0) buck = bisect.bisect_left(self.buckets, len(res)) bucket_key = self.buckets[buck] res = self.datagen.normalize(res) d = np.zeros((self.batch_size, bucket_key, res.shape[1])) d[0, :res.shape[0], :] = res init_state_arrays = [mx.nd.zeros(x[1]) for x in self.init_states] model_loaded = self.model provide_data = [ ('data', (self.batch_size, bucket_key, self.width * self.height)) ] + self.init_states data_batch = mx.io.DataBatch([mx.nd.array(d)] + init_state_arrays, label=None, bucket_key=bucket_key, provide_data=provide_data, provide_label=None) st = time.time() model_loaded.forward(data_batch, is_train=False) probs = model_loaded.get_outputs()[0].asnumpy() log.info("forward cost %.3f" % (time.time() - st)) st = time.time() res = ctc_greedy_decode(probs, self.labelUtil.byList) log.info("greedy decode cost %.3f, result is:\n%s" % (time.time() - st, res)) beam_size = 5 from stt_metric import ctc_beam_decode st = time.time() results = ctc_beam_decode(scorer=self.scorer, beam_size=beam_size, vocab=self.labelUtil.byList, probs=probs) log.info("beam decode cost %.3f, result is:\n%s" % (time.time() - st, "\n".join(results))) return "greedy:\n" + res + "\nbeam:\n" + "\n".join(results)