def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None): """ Extract the MFCCs from the given audio file. :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC` """ audio_file_mfcc = AudioFileMFCC(file_path=file_path, file_format=file_format, audio_file=audio_file, rconf=self.rconf, logger=self.logger) if self.rconf.mmn: self.log(u"Running VAD inside _extract_mfcc...") audio_file_mfcc.run_vad( log_energy_threshold=self.rconf[ RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD], min_nonspeech_length=self.rconf[ RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH], extend_before=self.rconf[ RuntimeConfiguration. MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE], extend_after=self.rconf[ RuntimeConfiguration. MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER]) self.log(u"Running VAD inside _extract_mfcc... done") return audio_file_mfcc
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() audio_file_path = self.actual_arguments[0] mode = self.actual_arguments[1] if mode not in [u"speech", u"nonspeech", u"both"]: return self.print_help() output_file_path = None if len(self.actual_arguments) >= 3: output_file_path = self.actual_arguments[2] output_time = not self.has_option([u"-i", u"--index"]) self.check_c_extensions("cmfcc") if not self.check_input_file(audio_file_path): return self.ERROR_EXIT_CODE if (output_file_path is not None) and (not self.check_output_file(output_file_path)): return self.ERROR_EXIT_CODE self.print_info(u"Reading audio...") try: audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger) except AudioFileConverterError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") return self.ERROR_EXIT_CODE except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError): self.print_error(u"Cannot read file '%s'" % (audio_file_path)) self.print_error(u"Check that its format is supported by ffmpeg") return self.ERROR_EXIT_CODE except Exception as exc: self.print_error(u"An unexpected error occurred while reading the audio file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE self.print_info(u"Reading audio... done") self.print_info(u"Executing VAD...") audio_file_mfcc.run_vad() self.print_info(u"Executing VAD... done") speech = audio_file_mfcc.intervals(speech=True, time=output_time) nonspeech = audio_file_mfcc.intervals(speech=False, time=output_time) if mode == u"speech": intervals = speech elif mode == u"nonspeech": intervals = nonspeech elif mode == u"both": speech = [[x[0], x[1], u"speech"] for x in speech] nonspeech = [[x[0], x[1], u"nonspeech"] for x in nonspeech] intervals = sorted(speech + nonspeech) intervals = [tuple(interval) for interval in intervals] self.write_to_file(output_file_path, intervals, output_time) return self.NO_ERROR_EXIT_CODE
def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None): """ Extract the MFCCs from the given audio file. :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC` """ audio_file_mfcc = AudioFileMFCC( file_path=file_path, file_format=file_format, audio_file=audio_file, rconf=self.rconf, logger=self.logger ) if self.rconf.mmn: self.log(u"Running VAD inside _extract_mfcc...") audio_file_mfcc.run_vad( log_energy_threshold=self.rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD], min_nonspeech_length=self.rconf[RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH], extend_before=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE], extend_after=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER] ) self.log(u"Running VAD inside _extract_mfcc... done") return audio_file_mfcc
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 4: return self.print_help() text_format = gf.safe_unicode(self.actual_arguments[0]) if text_format == u"list": text = gf.safe_unicode(self.actual_arguments[1]) elif text_format in TextFileFormat.ALLOWED_VALUES: text = self.actual_arguments[1] if not self.check_input_file(text): return self.ERROR_EXIT_CODE else: return self.print_help() l1_id_regex = self.has_option_with_value(u"--l1-id-regex") l2_id_regex = self.has_option_with_value(u"--l2-id-regex") l3_id_regex = self.has_option_with_value(u"--l3-id-regex") id_regex = self.has_option_with_value(u"--id-regex") class_regex = self.has_option_with_value(u"--class-regex") sort = self.has_option_with_value(u"--sort") parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX : l1_id_regex, gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX : l2_id_regex, gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX : l3_id_regex, gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX : id_regex, gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX : class_regex, gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT : sort } if (text_format == TextFileFormat.MUNPARSED) and ((l1_id_regex is None) or (l2_id_regex is None) or (l3_id_regex is None)): self.print_error(u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format") return self.ERROR_EXIT_CODE if (text_format == TextFileFormat.UNPARSED) and (id_regex is None) and (class_regex is None): self.print_error(u"You must specify --id-regex and/or --class-regex for unparsed format") return self.ERROR_EXIT_CODE language = gf.safe_unicode(self.actual_arguments[2]) audio_file_path = self.actual_arguments[3] if not self.check_input_file(audio_file_path): return self.ERROR_EXIT_CODE text_file = self.get_text_file(text_format, text, parameters) if text_file is None: self.print_error(u"Unable to build a TextFile from the given parameters") return self.ERROR_EXIT_CODE elif len(text_file) == 0: self.print_error(u"No text fragments found") return self.ERROR_EXIT_CODE text_file.set_language(language) self.print_info(u"Read input text with %d fragments" % (len(text_file))) self.print_info(u"Reading audio...") try: audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger) except AudioFileConverterError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") return self.ERROR_EXIT_CODE except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError): self.print_error(u"Cannot read file '%s'" % (audio_file_path)) self.print_error(u"Check that its format is supported by ffmpeg") return self.ERROR_EXIT_CODE except Exception as exc: self.print_error(u"An unexpected error occurred while reading the audio file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE self.print_info(u"Reading audio... done") self.print_info(u"Running VAD...") audio_file_mfcc.run_vad() self.print_info(u"Running VAD... done") min_head = gf.safe_float(self.has_option_with_value(u"--min-head"), None) max_head = gf.safe_float(self.has_option_with_value(u"--max-head"), None) min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"), None) max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"), None) self.print_info(u"Detecting audio interval...") start_detector = SD(audio_file_mfcc, text_file, rconf=self.rconf, logger=self.logger) start, end = start_detector.detect_interval(min_head, max_head, min_tail, max_tail) self.print_info(u"Detecting audio interval... done") self.print_result(audio_file_mfcc.audio_length, start, end) return self.NO_ERROR_EXIT_CODE
def perform(self, input_file_path, speech_length, nonspeech_length): audiofile = AudioFileMFCC(gf.absolute_path(input_file_path, __file__)) audiofile.run_vad() self.assertEqual(len(audiofile.intervals(speech=True)), speech_length) self.assertEqual(len(audiofile.intervals(speech=False)), nonspeech_length)
def perform(self, input_file_path, speech_length, nonspeech_length): audiofile = AudioFileMFCC(gf.absolute_path(input_file_path, __file__)) audiofile.run_vad() self.assertEqual(len(audiofile.intervals(speech=True)), speech_length) self.assertEqual(len(audiofile.intervals(speech=False)), nonspeech_length)
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() audio_file_path = self.actual_arguments[0] mode = self.actual_arguments[1] if mode not in [u"speech", u"nonspeech", u"both"]: return self.print_help() output_file_path = None if len(self.actual_arguments) >= 3: output_file_path = self.actual_arguments[2] output_time = not self.has_option([u"-i", u"--index"]) self.check_c_extensions("cmfcc") if not self.check_input_file(audio_file_path): return self.ERROR_EXIT_CODE if (output_file_path is not None) and ( not self.check_output_file(output_file_path)): return self.ERROR_EXIT_CODE self.print_info(u"Reading audio...") try: audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger) except AudioFileConverterError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") return self.ERROR_EXIT_CODE except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError): self.print_error(u"Cannot read file '%s'" % (audio_file_path)) self.print_error(u"Check that its format is supported by ffmpeg") return self.ERROR_EXIT_CODE except Exception as exc: self.print_error( u"An unexpected error occurred while reading the audio file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE self.print_info(u"Reading audio... done") self.print_info(u"Executing VAD...") audio_file_mfcc.run_vad() self.print_info(u"Executing VAD... done") speech = audio_file_mfcc.intervals(speech=True, time=output_time) nonspeech = audio_file_mfcc.intervals(speech=False, time=output_time) if mode == u"speech": intervals = speech elif mode == u"nonspeech": intervals = nonspeech elif mode == u"both": speech = [[x[0], x[1], u"speech"] for x in speech] nonspeech = [[x[0], x[1], u"nonspeech"] for x in nonspeech] intervals = sorted(speech + nonspeech) intervals = [tuple(interval) for interval in intervals] self.write_to_file(output_file_path, intervals, output_time) return self.NO_ERROR_EXIT_CODE
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 4: return self.print_help() text_format = gf.safe_unicode(self.actual_arguments[0]) if text_format == u"list": text = gf.safe_unicode(self.actual_arguments[1]) elif text_format in TextFileFormat.ALLOWED_VALUES: text = self.actual_arguments[1] if not self.check_input_file(text): return self.ERROR_EXIT_CODE else: return self.print_help() l1_id_regex = self.has_option_with_value(u"--l1-id-regex") l2_id_regex = self.has_option_with_value(u"--l2-id-regex") l3_id_regex = self.has_option_with_value(u"--l3-id-regex") id_regex = self.has_option_with_value(u"--id-regex") class_regex = self.has_option_with_value(u"--class-regex") sort = self.has_option_with_value(u"--sort") parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex, gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex, gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex, gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort, } if (text_format == TextFileFormat.MUNPARSED) and ( (l1_id_regex is None) or (l2_id_regex is None) or (l3_id_regex is None)): self.print_error( u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format" ) return self.ERROR_EXIT_CODE if (text_format == TextFileFormat.UNPARSED) and ( id_regex is None) and (class_regex is None): self.print_error( u"You must specify --id-regex and/or --class-regex for unparsed format" ) return self.ERROR_EXIT_CODE language = gf.safe_unicode(self.actual_arguments[2]) audio_file_path = self.actual_arguments[3] if not self.check_input_file(audio_file_path): return self.ERROR_EXIT_CODE text_file = self.get_text_file(text_format, text, parameters) if text_file is None: self.print_error( u"Unable to build a TextFile from the given parameters") return self.ERROR_EXIT_CODE elif len(text_file) == 0: self.print_error(u"No text fragments found") return self.ERROR_EXIT_CODE text_file.set_language(language) self.print_info(u"Read input text with %d fragments" % (len(text_file))) self.print_info(u"Reading audio...") try: audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger) except AudioFileConverterError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") return self.ERROR_EXIT_CODE except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError): self.print_error(u"Cannot read file '%s'" % (audio_file_path)) self.print_error(u"Check that its format is supported by ffmpeg") return self.ERROR_EXIT_CODE except Exception as exc: self.print_error( u"An unexpected error occurred while reading the audio file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE self.print_info(u"Reading audio... done") self.print_info(u"Running VAD...") audio_file_mfcc.run_vad() self.print_info(u"Running VAD... done") min_head = gf.safe_float(self.has_option_with_value(u"--min-head"), None) max_head = gf.safe_float(self.has_option_with_value(u"--max-head"), None) min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"), None) max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"), None) self.print_info(u"Detecting audio interval...") start_detector = SD(audio_file_mfcc, text_file, rconf=self.rconf, logger=self.logger) start, end = start_detector.detect_interval(min_head, max_head, min_tail, max_tail) self.print_info(u"Detecting audio interval... done") self.print_result(audio_file_mfcc.audio_length, start, end) return self.NO_ERROR_EXIT_CODE