def _convert(self): """ Convert the entire audio file into a ``wav`` file. (Head/tail will be cut off later.) Return a triple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file """ self._log("Converting real audio to wav") handler = None path = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) self._log("Creating a FFMPEGWrapper") ffmpeg = FFMPEGWrapper(logger=self.logger) self._log("Converting...") ffmpeg.convert( input_file_path=self.task.audio_file_path_absolute, output_file_path=path) self._log("Converting... done") self._log("Converting real audio to wav: succeeded") return (True, handler, path) except Exception as e: self._log("Converting real audio to wav: failed") self._log(["Message: %s", str(e)]) return (False, handler, path)
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() input_file_path = self.actual_arguments[0] output_file_path = self.actual_arguments[1] if not self.check_input_file(input_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(output_file_path): return self.ERROR_EXIT_CODE try: converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(input_file_path, output_file_path) self.print_success(u"Converted '%s' into '%s'" % (input_file_path, output_file_path)) return self.NO_ERROR_EXIT_CODE except FFMPEGPathError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") except OSError: self.print_error(u"Cannot convert file '%s' into '%s'" % (input_file_path, output_file_path)) self.print_error( u"Make sure the input file has a format supported by ffmpeg") return self.ERROR_EXIT_CODE
def _convert(self): """ Convert the audio file into a ``wav`` file. Return a triple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file """ self._log("Converting real audio to wav") handler = None path = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) self._log("Creating a FFMPEGWrapper") ffmpeg = FFMPEGWrapper(logger=self.logger) self._log("Converting...") ffmpeg.convert( input_file_path=self.task.audio_file_path_absolute, output_file_path=path, head_length=self.task.configuration.is_audio_file_head_length, process_length=self.task.configuration.is_audio_file_process_length) self._log("Converting... done") self._log("Converting real audio to wav: succeeded") return (True, handler, path) except: self._log("Converting real audio to wav: failed") return (False, handler, path)
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() input_file_path = self.actual_arguments[0] output_file_path = self.actual_arguments[1] if not self.check_input_file(input_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(output_file_path): return self.ERROR_EXIT_CODE try: converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(input_file_path, output_file_path) self.print_success(u"Converted '%s' into '%s'" % (input_file_path, output_file_path)) return self.NO_ERROR_EXIT_CODE except FFMPEGPathError: self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH])) self.print_error(u"Make sure the path to ffmpeg is correct") except OSError: self.print_error(u"Cannot convert file '%s' into '%s'" % (input_file_path, output_file_path)) self.print_error(u"Make sure the input file has a format supported by ffmpeg") return self.ERROR_EXIT_CODE
def _convert(self): """ Convert the entire audio file into a ``wav`` file. (Head/tail will be cut off later.) Return a triple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file """ self._log("Converting real audio to wav") handler = None path = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) self._log("Creating a FFMPEGWrapper") ffmpeg = FFMPEGWrapper(logger=self.logger) self._log("Converting...") ffmpeg.convert(input_file_path=self.task.audio_file_path_absolute, output_file_path=path) self._log("Converting... done") self._log("Converting real audio to wav: succeeded") return (True, handler, path) except Exception as e: self._log("Converting real audio to wav: failed") self._log(["Message: %s", str(e)]) return (False, handler, path)
def test_cannotload(self): input_file_path = get_abs_path("res/this_file_does_not_exist.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() with self.assertRaises(OSError): converter.convert(input_file_path, output_file_path) os.close(handler) os.remove(output_file_path)
def main(): """ Entry point """ if len(sys.argv) < 3: usage() return input_file_path = sys.argv[1] output_file_path = sys.argv[2] converter = FFMPEGWrapper() converter.convert(input_file_path, output_file_path) print "[INFO] Converted '%s' into '%s'" % (input_file_path, output_file_path)
def convert(self, input_file_path): output_path = tempfile.mkdtemp() output_file_path = os.path.join(output_path, "audio.wav") converter = FFMPEGWrapper() result = converter.convert(get_abs_path(input_file_path), output_file_path) self.assertEqual(result, output_file_path) delete_directory(output_path)
def step3(): on_info("Test 3/6 (ffmpeg)...") try: on_info(" Trying to call ffmpeg...") from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = get_abs_path( "aeneas/tests/res/container/job/assets/p001.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) os.close(handler) os.remove(output_file_path) if result: on_info(" Trying to call ffmpeg... succeeded.") return True else: on_error(" Unable to call ffmpeg.") on_error( " Please make sure you have ffmpeg installed correctly and that it is in your $PATH." ) except: on_error(" Unable to call ffmpeg.") on_error( " Please make sure you have ffmpeg installed correctly and that it is in your $PATH." ) return False
def check_ffmpeg(cls): """ Check whether ``ffmpeg`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__) handler, output_file_path = gf.tmp_file(suffix=u".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"ffmpeg OK") return False except: pass gf.print_error(u"ffmpeg ERROR") gf.print_info( u" Please make sure you have ffmpeg installed correctly") gf.print_info( u" and that its path is in your PATH environment variable") return True
def test_convert(self): input_file_path = get_abs_path("res/container/job/assets/p001.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) self.assertEqual(result, output_file_path) os.close(handler) os.remove(output_file_path)
def convert(self, input_file_path, ofp=None, runtime_configuration=None): if ofp is None: output_path = gf.tmp_directory() output_file_path = os.path.join(output_path, "audio.wav") else: output_file_path = ofp try: converter = FFMPEGWrapper(rconf=runtime_configuration) result = converter.convert( gf.absolute_path(input_file_path, __file__), output_file_path) self.assertEqual(result, output_file_path) gf.delete_directory(output_path) except OSError as exc: if ofp is None: gf.delete_directory(output_path) else: gf.delete_file(None, ofp) raise exc
def convert(self, input_file_path, ofp=None, runtime_configuration=None): if ofp is None: output_path = gf.tmp_directory() output_file_path = os.path.join(output_path, "audio.wav") else: output_file_path = ofp try: converter = FFMPEGWrapper(rconf=runtime_configuration) result = converter.convert( gf.absolute_path(input_file_path, __file__), output_file_path ) self.assertEqual(result, output_file_path) gf.delete_directory(output_path) except OSError as exc: if ofp is None: gf.delete_directory(output_path) else: gf.delete_file(None, ofp) raise exc
def step3(): on_info("Test 3/6 (ffmpeg)...") try: on_info(" Trying to call ffmpeg...") from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = get_abs_path("aeneas/tests/res/container/job/assets/p001.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) os.close(handler) os.remove(output_file_path) if result: on_info(" Trying to call ffmpeg... succeeded.") return True else: on_error(" Unable to call ffmpeg.") on_error(" Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") except: on_error(" Unable to call ffmpeg.") on_error(" Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") return False
def check_ffmpeg(cls): """ Check whether ``ffmpeg`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__) handler, output_file_path = gf.tmp_file(suffix=u".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"ffmpeg OK") return False except: pass gf.print_error(u"ffmpeg ERROR") gf.print_info(u" Please make sure you have ffmpeg installed correctly") gf.print_info(u" and that its path is in your PATH environment variable") return True
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv)-1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval( min_head_length, max_head_length, min_tail_length, max_tail_length ) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv) - 1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval(min_head_length, max_head_length, min_tail_length, max_tail_length) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)
def main(): on_info("Test 1/4...") try: on_info("Trying to import package aeneas...") import aeneas on_info("Trying to import package aeneas... succeeded.") except ImportError: on_error("Unable to import package aeneas.") on_error("Check that you have installed the following Python (2.7.x) packages:") on_error("1. BeautifulSoup") on_error("2. numpy") on_error("3. scikits") return on_info("Test 2/4...") try: on_info("Trying to call ffprobe...") from aeneas.ffprobewrapper import FFPROBEWrapper file_path = get_abs_path("aeneas/tests/res/container/job/assets/p001.mp3") prober = FFPROBEWrapper() properties = prober.read_properties(file_path) on_info("Trying to call ffprobe... succeeded.") except: on_error("Unable to call ffprobe.") on_error("Please make sure you have ffprobe installed correctly and that it is in your $PATH.") return on_info("Test 3/4...") try: on_info("Trying to call ffmpeg...") from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = get_abs_path("aeneas/tests/res/container/job/assets/p001.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) os.close(handler) os.remove(output_file_path) if not result: on_error("Unable to call ffmpeg.") on_error("Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") return on_info("Trying to call ffmpeg... succeeded.") except: on_error("Unable to call ffmpeg.") on_error("Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") return on_info("Test 4/4...") try: on_info("Trying to call espeak...") from aeneas.espeakwrapper import ESPEAKWrapper from aeneas.language import Language text = u"From fairest creatures we desire increase," language = Language.EN handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) os.close(handler) os.remove(output_file_path) if not result: on_error("Unable to call espeak.") on_error("Please make sure you have espeak installed correctly and that it is in your $PATH.") return on_info("Trying to call espeak... succeeded.") except: on_error("Unable to call espeak.") on_error("Please make sure you have espeak installed correctly and that it is in your $PATH.") return on_info("Congratulations, all dependencies are met.") on_info("Enjoy running aeneas!")
def read_samples_from_file(self): """ Load the audio samples from file into memory. If ``self.is_mono_wave`` is ``False``, the file will be first converted to a temporary PCM16 mono WAVE file. Audio data will be read from this temporary file, which will be then deleted from disk immediately. If ``self.is_mono_wave`` is ``True``, the audio data will be read directly from the given file, which will not be deleted from disk. :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported :raises: OSError: if the audio file cannot be read """ self.log(u"Loading audio data...") # check the file can be read if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) # convert file to PCM16 mono WAVE if self.is_mono_wave: self.log(u"is_mono_wave=True => reading self.file_path directly") tmp_handler = None tmp_file_path = self.file_path else: self.log(u"is_mono_wave=False => converting self.file_path") tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) try: self.log(u"Converting audio file to mono...") converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(self.file_path, tmp_file_path) self.log(u"Converting audio file to mono... done") except FFMPEGPathError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError) except OSError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError) # TODO allow calling C extension cwave to read samples faster try: self.audio_format = "pcm16" self.audio_channels = 1 self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path) # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767] # so we convert it to a float64 in [-1, 1] self.__samples = self.__samples.astype("float64") / 32768 self.__samples_capacity = len(self.__samples) self.__samples_length = self.__samples_capacity self._update_length() except ValueError: self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError) if not self.is_mono_wave: gf.delete_file(tmp_handler, tmp_file_path) self.log([u"Deleted temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) self._update_length() self.log([u"Sample length: %.3f", self.audio_length]) self.log([u"Sample rate: %d", self.audio_sample_rate]) self.log([u"Audio format: %s", self.audio_format]) self.log([u"Audio channels: %d", self.audio_channels]) self.log(u"Loading audio data... done")
def read_samples_from_file(self): """ Load the audio samples from file into memory. If ``self.file_format`` is ``None`` or it is not ``("pcm_s16le", 1, self.rconf.sample_rate)``, the file will be first converted to a temporary PCM16 mono WAVE file. Audio data will be read from this temporary file, which will be then deleted from disk immediately. Otherwise, the audio data will be read directly from the given file, which will not be deleted from disk. :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported :raises: OSError: if the audio file cannot be read """ self.log(u"Loading audio data...") # check the file can be read if not gf.file_can_be_read(self.file_path): self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError) # determine if we need to convert the audio file convert_audio_file = ( (self.file_format is None) or ( (self.rconf.safety_checks) and (self.file_format != ("pcm_s16le", 1, self.rconf.sample_rate)) ) ) # convert the audio file if needed if convert_audio_file: # convert file to PCM16 mono WAVE with correct sample rate self.log(u"self.file_format is None or not good => converting self.file_path") tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path]) try: self.log(u"Converting audio file to mono...") converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger) converter.convert(self.file_path, tmp_file_path) self.file_format = ("pcm_s16le", 1, self.rconf.sample_rate) self.log(u"Converting audio file to mono... done") except FFMPEGPathError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError) except OSError: gf.delete_file(tmp_handler, tmp_file_path) self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError) else: # read the file directly if self.rconf.safety_checks: self.log(u"self.file_format is good => reading self.file_path directly") else: self.log_warn(u"Safety checks disabled => reading self.file_path directly") tmp_handler = None tmp_file_path = self.file_path # TODO allow calling C extension cwave to read samples faster try: self.audio_format = "pcm16" self.audio_channels = 1 self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path) # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767] # so we convert it to a float64 in [-1, 1] self.__samples = self.__samples.astype("float64") / 32768 self.__samples_capacity = len(self.__samples) self.__samples_length = self.__samples_capacity self._update_length() except ValueError: self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError) # if we converted the audio file, delete the temporary converted audio file if convert_audio_file: gf.delete_file(tmp_handler, tmp_file_path) self.log([u"Deleted temporary audio file: '%s'", tmp_file_path]) self._update_length() self.log([u"Sample length: %.3f", self.audio_length]) self.log([u"Sample rate: %d", self.audio_sample_rate]) self.log([u"Audio format: %s", self.audio_format]) self.log([u"Audio channels: %d", self.audio_channels]) self.log(u"Loading audio data... done")
def main(): """ Entry point """ if len(sys.argv) < 4: usage() return audio_file_path = sys.argv[1] tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) mode = sys.argv[2] output_file_path = sys.argv[3] verbose = (sys.argv[-1] == "-v") if mode not in ["speech", "nonspeech", "both"]: usage() return if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Converting audio file to mono..." converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) print "[INFO] Converting audio file to mono... done" vad = VAD(tmp_file_path, logger=logger) print "[INFO] Extracting MFCCs..." vad.compute_mfcc() print "[INFO] Extracting MFCCs... done" print "[INFO] Executing VAD..." vad.compute_vad() print "[INFO] Executing VAD... done" print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path) print "[INFO] Cleaning up... done" if mode == "speech": print "[INFO] Creating speech file..." output_file = open(output_file_path, "w") for interval in vad.speech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating speech file... done" if mode == "nonspeech": print "[INFO] Creating nonspeech file..." output_file = open(output_file_path, "w") for interval in vad.nonspeech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating nonspeech file... done" if mode == "both": print "[INFO] Creating speech and nonspeech file..." output_file = open(output_file_path, "w") speech = [[x[0], x[1], "speech"] for x in vad.speech] nonspeech = [[x[0], x[1], "nonspeech"] for x in vad.nonspeech] both = sorted(speech + nonspeech) for interval in both: output_file.write("%.3f\t%.3f\t%s\n" % ( interval[0], interval[1], interval[2] )) output_file.close() print "[INFO] Creating speech and nonspeech file... done" print "[INFO] Created file %s" % output_file_path