Exemple #1
0
 def perform(self, input_file_path, speech_length, nonspeech_length):
     vad = VAD(get_abs_path(input_file_path))
     vad.compute_mfcc()
     vad.compute_vad()
     self.assertEqual(len(vad.speech), speech_length)
     self.assertEqual(len(vad.nonspeech), nonspeech_length)
Exemple #2
0
 def perform(self, input_file_path, speech_length, nonspeech_length):
     vad = VAD(get_abs_path(input_file_path))
     vad.compute_mfcc()
     vad.compute_vad()
     self.assertEqual(len(vad.speech), speech_length)
     self.assertEqual(len(vad.nonspeech), nonspeech_length)
Exemple #3
0
def main():
    """ Entry point """
    if len(sys.argv) < 4:
        usage()
        return
    audio_file_path = sys.argv[1]
    tmp_handler, tmp_file_path = tempfile.mkstemp(
        suffix=".wav",
        dir=gf.custom_tmp_dir()
    )
    mode = sys.argv[2]
    output_file_path = sys.argv[3]
    verbose = (sys.argv[-1] == "-v")

    if mode not in ["speech", "nonspeech", "both"]:
        usage()
        return

    if not gf.can_run_c_extension():
        print "[WARN] Unable to load Python C Extensions"
        print "[WARN] Running the slower pure Python code"
        print "[WARN] See the README file for directions to compile the Python C Extensions"

    logger = Logger(tee=verbose)

    print "[INFO] Converting audio file to mono..."
    converter = FFMPEGWrapper(logger=logger)
    converter.convert(audio_file_path, tmp_file_path)
    print "[INFO] Converting audio file to mono... done"

    vad = VAD(tmp_file_path, logger=logger)
    print "[INFO] Extracting MFCCs..."
    vad.compute_mfcc()
    print "[INFO] Extracting MFCCs... done"
    print "[INFO] Executing VAD..."
    vad.compute_vad()
    print "[INFO] Executing VAD... done"

    print "[INFO] Cleaning up..."
    cleanup(tmp_handler, tmp_file_path)
    print "[INFO] Cleaning up... done"

    if mode == "speech":
        print "[INFO] Creating speech file..."
        output_file = open(output_file_path, "w")
        for interval in vad.speech:
            output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1]))
        output_file.close()
        print "[INFO] Creating speech file... done"

    if mode == "nonspeech":
        print "[INFO] Creating nonspeech file..."
        output_file = open(output_file_path, "w")
        for interval in vad.nonspeech:
            output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1]))
        output_file.close()
        print "[INFO] Creating nonspeech file... done"

    if mode == "both":
        print "[INFO] Creating speech and nonspeech file..."
        output_file = open(output_file_path, "w")
        speech = [[x[0], x[1], "speech"] for x in vad.speech]
        nonspeech = [[x[0], x[1], "nonspeech"] for x in vad.nonspeech]
        both = sorted(speech + nonspeech)
        for interval in both:
            output_file.write("%.3f\t%.3f\t%s\n" % (
                interval[0],
                interval[1],
                interval[2]
            ))
        output_file.close()
        print "[INFO] Creating speech and nonspeech file... done"

    print "[INFO] Created file %s" % output_file_path