def perform(self, input_file_path, speech_length, nonspeech_length): vad = VAD(get_abs_path(input_file_path)) vad.compute_mfcc() vad.compute_vad() self.assertEqual(len(vad.speech), speech_length) self.assertEqual(len(vad.nonspeech), nonspeech_length)
def main(): """ Entry point """ if len(sys.argv) < 4: usage() return audio_file_path = sys.argv[1] tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) mode = sys.argv[2] output_file_path = sys.argv[3] verbose = (sys.argv[-1] == "-v") if mode not in ["speech", "nonspeech", "both"]: usage() return if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Converting audio file to mono..." converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) print "[INFO] Converting audio file to mono... done" vad = VAD(tmp_file_path, logger=logger) print "[INFO] Extracting MFCCs..." vad.compute_mfcc() print "[INFO] Extracting MFCCs... done" print "[INFO] Executing VAD..." vad.compute_vad() print "[INFO] Executing VAD... done" print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path) print "[INFO] Cleaning up... done" if mode == "speech": print "[INFO] Creating speech file..." output_file = open(output_file_path, "w") for interval in vad.speech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating speech file... done" if mode == "nonspeech": print "[INFO] Creating nonspeech file..." output_file = open(output_file_path, "w") for interval in vad.nonspeech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating nonspeech file... done" if mode == "both": print "[INFO] Creating speech and nonspeech file..." output_file = open(output_file_path, "w") speech = [[x[0], x[1], "speech"] for x in vad.speech] nonspeech = [[x[0], x[1], "nonspeech"] for x in vad.nonspeech] both = sorted(speech + nonspeech) for interval in both: output_file.write("%.3f\t%.3f\t%s\n" % ( interval[0], interval[1], interval[2] )) output_file.close() print "[INFO] Creating speech and nonspeech file... done" print "[INFO] Created file %s" % output_file_path