def _read_vtt(self, input_file): """ Read from WebVTT file """ lines = input_file.readlines() # ignore the first line containing "WEBVTT" and the following blank line i = 2 while i < len(lines): line = lines[i].strip() if len(line) > 0: identifier_index = int(line) identifier = "f" + str(identifier_index).zfill(6) i += 1 if i < len(lines): line = lines[i].strip() timings = line.split(" --> ") if len(timings) == 2: begin = gf.time_from_hhmmssmmm(timings[0]) end = gf.time_from_hhmmssmmm(timings[1]) fragment_lines = [] while (i + 1 < len(lines)) and (len(line) > 0): i += 1 line = lines[i].strip() if len(line) > 0: fragment_lines.append(line) # should never happen, but just in case... if len(fragment_lines) == 0: fragment_lines = [""] text_fragment = TextFragment(identifier=identifier, lines=fragment_lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) i += 1
def _read_smil(self, input_file): """ Read from SMIL file. Limitations: 1. parses only <par> elements, in order 2. timings must have hh:mm:ss.mmm or ss.mmm format (autodetected) 3. both clipBegin and clipEnd attributes of <audio> must be populated """ smil_ns = "{http://www.w3.org/ns/SMIL}" contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) for par in root.iter(smil_ns + "par"): for child in par: if child.tag == (smil_ns + "text"): identifier = gf.split_url(child.get("src"))[1] elif child.tag == (smil_ns + "audio"): begin = gf.time_from_hhmmssmmm(child.get("clipBegin")) if begin is None: begin = gf.time_from_ssmmm(child.get("clipBegin")) end = gf.time_from_hhmmssmmm(child.get("clipEnd")) if end is None: end = gf.time_from_ssmmm(child.get("clipEnd")) # TODO read text from additional text_file? text = u"" text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment)
def _read_srt(self, input_file): """ Read from SRT file """ lines = input_file.readlines() i = 0 while i < len(lines): line = lines[i].strip() if len(line) > 0: identifier_index = int(line) identifier = "f" + str(identifier_index).zfill(6) i += 1 if i < len(lines): line = lines[i].strip() timings = line.split(" --> ") if len(timings) == 2: begin = gf.time_from_hhmmssmmm(timings[0], decimal_separator=",") end = gf.time_from_hhmmssmmm(timings[1], decimal_separator=",") fragment_lines = [] while (i + 1 < len(lines)) and (len(line) > 0): i += 1 line = lines[i].strip() if len(line) > 0: fragment_lines.append(line) # should never happen, but just in case... if len(fragment_lines) == 0: fragment_lines = [""] text_fragment = TextFragment(identifier=identifier, lines=fragment_lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) i += 1
def parse(self, input_text, syncmap): """ Read from SMIL file. Limitations: 1. parses only ``<par>`` elements, in order 2. timings must have ``hh:mm:ss.mmm`` or ``ss.mmm`` format (autodetected) 3. both ``clipBegin`` and ``clipEnd`` attributes of ``<audio>`` must be populated """ from lxml import etree smil_ns = "{http://www.w3.org/ns/SMIL}" root = etree.fromstring(gf.safe_bytes(input_text)) for par in root.iter(smil_ns + "par"): for child in par: if child.tag == (smil_ns + "text"): identifier = gf.safe_unicode(gf.split_url(child.get("src"))[1]) elif child.tag == (smil_ns + "audio"): begin_text = child.get("clipBegin") if ":" in begin_text: begin = gf.time_from_hhmmssmmm(begin_text) else: begin = gf.time_from_ssmmm(begin_text) end_text = child.get("clipEnd") if ":" in end_text: end = gf.time_from_hhmmssmmm(end_text) else: end = gf.time_from_ssmmm(end_text) # TODO read text from additional text_file? self._add_fragment( syncmap=syncmap, identifier=identifier, lines=[u""], begin=begin, end=end )
def test_time_from_hhmmssmmm(self): tests = [ [None, 0.000], ["", 0.000], ["23:45.678", 0.000], # no 2 ":" ["3:45.678", 0.000], # no 2 ":" ["45.678", 0.000], # no 2 ":" ["5.678", 0.000], # no 2 ":" ["5", 0.000], # no 2 ":" ["00:00:01", 0.000], # no "." ["1:23:45.678", 5025.678], # tolerate this (?) ["1:2:45.678", 3765.678], # tolerate this (?) ["1:23:4.678", 4984.678], # tolerate this (?) ["1:23:4.", 4984.000], # tolerate this (?) ["00:00:00.000", 0.000], ["00:00:12.000", 12.000], ["00:00:12.345", 12.345], ["00:01:00.000", 60], ["00:01:23.000", 83.000], ["00:01:23.456", 83.456], ["01:00:00.000", 3600.000], ["01:00:12.000", 3612.000], ["01:00:12.345", 3612.345], ["01:23:00.000", 4980.000], ["01:23:45.000", 5025.000], ["01:23:45.678", 5025.678], ] for test in tests: self.assertEqual(gf.time_from_hhmmssmmm(test[0]), test[1])
def test_time_from_hhmmssmmm(self): tests = [ (None, TimeValue("0.000")), ("", TimeValue("0.000")), ("23:45.678", TimeValue("0.000")), # no 2 ":" ("3:45.678", TimeValue("0.000")), # no 2 ":" ("45.678", TimeValue("0.000")), # no 2 ":" ("5.678", TimeValue("0.000")), # no 2 ":" ("5", TimeValue("0.000")), # no 2 ":" ("00:00:01", TimeValue("0.000")), # no "." ("1:23:45.678", TimeValue("5025.678")), # tolerate this (?) ("1:2:45.678", TimeValue("3765.678")), # tolerate this (?) ("1:23:4.678", TimeValue("4984.678")), # tolerate this (?) ("1:23:4.", TimeValue("4984.000")), # tolerate this (?) ("00:00:00.000", TimeValue("0.000")), ("00:00:12.000", TimeValue("12.000")), ("00:00:12.345", TimeValue("12.345")), ("00:01:00.000", TimeValue("60")), ("00:01:23.000", TimeValue("83.000")), ("00:01:23.456", TimeValue("83.456")), ("01:00:00.000", TimeValue("3600.000")), ("01:00:12.000", TimeValue("3612.000")), ("01:00:12.345", TimeValue("3612.345")), ("01:23:00.000", TimeValue("4980.000")), ("01:23:45.000", TimeValue("5025.000")), ("01:23:45.678", TimeValue("5025.678")), ] for test in tests: self.assertEqual(gf.time_from_hhmmssmmm(test[0]), test[1])
def read_properties(self, audio_file_path): """ Read the properties of an audio file and return them as a dictionary. Example: :: d["index"]=0 d["codec_name"]=mp3 d["codec_long_name"]=MP3 (MPEG audio layer 3) d["profile"]=unknown d["codec_type"]=audio d["codec_time_base"]=1/44100 d["codec_tag_string"]=[0][0][0][0] d["codec_tag"]=0x0000 d["sample_fmt"]=s16p d["sample_rate"]=44100 d["channels"]=1 d["channel_layout"]=mono d["bits_per_sample"]=0 d["id"]=N/A d["r_frame_rate"]=0/0 d["avg_frame_rate"]=0/0 d["time_base"]=1/14112000 d["start_pts"]=0 d["start_time"]=0.000000 d["duration_ts"]=1545083190 d["duration"]=109.487188 d["bit_rate"]=128000 d["max_bit_rate"]=N/A d["bits_per_raw_sample"]=N/A d["nb_frames"]=N/A d["nb_read_frames"]=N/A d["nb_read_packets"]=N/A d["DISPOSITION:default"]=0 d["DISPOSITION:dub"]=0 d["DISPOSITION:original"]=0 d["DISPOSITION:comment"]=0 d["DISPOSITION:lyrics"]=0 d["DISPOSITION:karaoke"]=0 d["DISPOSITION:forced"]=0 d["DISPOSITION:hearing_impaired"]=0 d["DISPOSITION:visual_impaired"]=0 d["DISPOSITION:clean_effects"]=0 d["DISPOSITION:attached_pic"]=0 :param string audio_file_path: the path of the audio file to analyze :rtype: dict :raises: TypeError: if ``audio_file_path`` is None :raises: OSError: if the file at ``audio_file_path`` cannot be read :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe`` """ # test if we can read the file at audio_file_path if audio_file_path is None: self.log_exc(u"The audio file path is None", None, True, TypeError) if not gf.file_can_be_read(audio_file_path): self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError) # call ffprobe arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]] arguments.extend(self.FFPROBE_PARAMETERS) arguments.append(audio_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE ) (stdoutdata, stderrdata) = proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc(u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError) self.log(u"Call completed") # check there is some output if (stdoutdata is None) or (len(stderrdata) == 0): self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError) # decode stdoutdata and stderrdata to Unicode string try: stdoutdata = gf.safe_unicode(stdoutdata) stderrdata = gf.safe_unicode(stderrdata) except UnicodeDecodeError as exc: self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError) # dictionary for the results results = { self.STDOUT_CHANNELS : None, self.STDOUT_CODEC_NAME : None, self.STDOUT_DURATION : None, self.STDOUT_SAMPLE_RATE : None } # scan the first audio stream the ffprobe stdout output # TODO more robust parsing # TODO deal with multiple audio streams for line in stdoutdata.splitlines(): if line == self.STDOUT_END_STREAM: self.log(u"Reached end of the stream") break elif len(line.split("=")) == 2: key, value = line.split("=") results[key] = value self.log([u"Found property '%s'='%s'", key, value]) try: self.log([u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION]]) results[self.STDOUT_DURATION] = TimeValue(results[self.STDOUT_DURATION]) self.log(u"Valid duration") except: self.log_warn(u"Invalid duration") results[self.STDOUT_DURATION] = None # try scanning ffprobe stderr output for line in stderrdata.splitlines(): match = self.STDERR_DURATION_REGEX.search(line) if match is not None: self.log([u"Found matching line '%s'", line]) results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm(line) self.log([u"Extracted duration '%.3f'", results[self.STDOUT_DURATION]]) break if results[self.STDOUT_DURATION] is None: self.log_exc(u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError) # return dictionary self.log(u"Returning dict") return results
def read_properties(self, audio_file_path): """ Read the properties of an audio file and return them as a dictionary. Example: :: d["index"]=0 d["codec_name"]=mp3 d["codec_long_name"]=MP3 (MPEG audio layer 3) d["profile"]=unknown d["codec_type"]=audio d["codec_time_base"]=1/44100 d["codec_tag_string"]=[0][0][0][0] d["codec_tag"]=0x0000 d["sample_fmt"]=s16p d["sample_rate"]=44100 d["channels"]=1 d["channel_layout"]=mono d["bits_per_sample"]=0 d["id"]=N/A d["r_frame_rate"]=0/0 d["avg_frame_rate"]=0/0 d["time_base"]=1/14112000 d["start_pts"]=0 d["start_time"]=0.000000 d["duration_ts"]=1545083190 d["duration"]=109.487188 d["bit_rate"]=128000 d["max_bit_rate"]=N/A d["bits_per_raw_sample"]=N/A d["nb_frames"]=N/A d["nb_read_frames"]=N/A d["nb_read_packets"]=N/A d["DISPOSITION:default"]=0 d["DISPOSITION:dub"]=0 d["DISPOSITION:original"]=0 d["DISPOSITION:comment"]=0 d["DISPOSITION:lyrics"]=0 d["DISPOSITION:karaoke"]=0 d["DISPOSITION:forced"]=0 d["DISPOSITION:hearing_impaired"]=0 d["DISPOSITION:visual_impaired"]=0 d["DISPOSITION:clean_effects"]=0 d["DISPOSITION:attached_pic"]=0 :param string audio_file_path: the path of the audio file to analyze :rtype: dict :raises: TypeError: if ``audio_file_path`` is None :raises: OSError: if the file at ``audio_file_path`` cannot be read :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe`` """ # test if we can read the file at audio_file_path if audio_file_path is None: self.log_exc(u"The audio file path is None", None, True, TypeError) if not gf.file_can_be_read(audio_file_path): self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError) # call ffprobe arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]] arguments.extend(self.FFPROBE_PARAMETERS) arguments.append(audio_file_path) self.log([u"Calling with arguments '%s'", arguments]) try: proc = subprocess.Popen(arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() except OSError as exc: self.log_exc( u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError) self.log(u"Call completed") # check there is some output if (stdoutdata is None) or (len(stderrdata) == 0): self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError) # decode stdoutdata and stderrdata to Unicode string try: stdoutdata = gf.safe_unicode(stdoutdata) stderrdata = gf.safe_unicode(stderrdata) except UnicodeDecodeError as exc: self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError) # dictionary for the results results = { self.STDOUT_CHANNELS: None, self.STDOUT_CODEC_NAME: None, self.STDOUT_DURATION: None, self.STDOUT_SAMPLE_RATE: None } # scan the first audio stream the ffprobe stdout output # TODO more robust parsing # TODO deal with multiple audio streams for line in stdoutdata.splitlines(): if line == self.STDOUT_END_STREAM: self.log(u"Reached end of the stream") break elif len(line.split("=")) == 2: key, value = line.split("=") results[key] = value self.log([u"Found property '%s'='%s'", key, value]) try: self.log([ u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION] ]) results[self.STDOUT_DURATION] = TimeValue( results[self.STDOUT_DURATION]) self.log(u"Valid duration") except: self.log_warn(u"Invalid duration") results[self.STDOUT_DURATION] = None # try scanning ffprobe stderr output for line in stderrdata.splitlines(): match = self.STDERR_DURATION_REGEX.search(line) if match is not None: self.log([u"Found matching line '%s'", line]) results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm( line) self.log([ u"Extracted duration '%.3f'", results[self.STDOUT_DURATION] ]) break if results[self.STDOUT_DURATION] is None: self.log_exc( u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError) # return dictionary self.log(u"Returning dict") return results