def read_captions(captions, options): reader_kwargs = { 'read_invalid_positioning': options.read_invalid_positioning } scc_reader = pycaption.SCCReader(**reader_kwargs) srt_reader = pycaption.SRTReader(**reader_kwargs) sami_reader = pycaption.SAMIReader(**reader_kwargs) dfxp_reader = pycaption.DFXPReader(**reader_kwargs) vtt_reader = pycaption.WebVTTReader(**reader_kwargs) if scc_reader.detect(captions): if options.lang: return scc_reader.read(captions, lang=options.lang, offset=int(options.offset)) else: return scc_reader.read(captions, offset=int(options.offset)) elif srt_reader.detect(captions): return srt_reader.read(captions) elif sami_reader.detect(captions): return sami_reader.read(captions) elif dfxp_reader.detect(captions): return dfxp_reader.read(captions) elif vtt_reader.detect(captions): return vtt_reader.read(captions) else: raise Exception('No caption format detected :(')
def main(argv): inputfile = '' inputType = '' outputType = '' try: opts, args = getopt.getopt(argv, "h:i:f:t:") except getopt.GetoptError: print('test.py -i <inputfile> -f <intputType> -t <outputType>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('test.py -i <inputfile> -f <intputType> -t <outputType>') sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-f", "--sfile"): inputType = arg elif opt in ("-t", "--tfile"): outputType = arg if inputType == outputType: print('Error: input type and output type are same format') sys.exit(1) with io.open(inputfile) as f: str1 = f.read() inputValue = inputType.lower() if inputValue == 'scc': c = pycaption.SCCReader().read(str1) elif inputValue == 'srt': c = pycaption.SRTReader().read(str1) elif inputValue == 'dfxp': c = pycaption.DFXPReader().read(str1) elif inputValue == 'webvtt': c = pycaption.WebVTTReader().read(str1) else: print('Error: invalid input type. <srt/scc/webvtt/dfxp> allowed') sys.exit(1) outputValue = outputType.lower() if outputValue == 'scc': print(pycaption.SCCWriter().write(c)) elif outputValue == 'srt': print(pycaption.SRTWriter().write(c)) elif outputValue == 'dfxp': print(pycaption.DFXPWriter().write(c)) elif outputValue == 'webvtt': print(pycaption.WebVTTWriter().write(c)) else: print('Error: invalid output type. <srt/scc/webvtt/dfxp> allowed') sys.exit(1)
def test_captions(): srt_caps = '''1 00:00:09,209 --> 00:00:12,312 This is an example SRT file,which, while extremely short,is still a valid SRT file. 2 00:00:19,209 --> 00:00:22,312 This is an example SRT file,which, while extremely short,is still a valid SRT file. ''' converter = pycaption.CaptionConverter() converter.read(srt_caps,pycaption.SRTReader()) print(converter.write(pycaption.WebVTTWriter()))
def read_captions(captions, options): scc_reader = pycaption.SCCReader() srt_reader = pycaption.SRTReader() sami_reader = pycaption.SAMIReader() dfxp_reader = pycaption.DFXPReader() if scc_reader.detect(captions): if options.lang: return scc_reader.read(captions, lang=options.lang, offset=int(options.offset)) else: return scc_reader.read(captions, offset=float(options.offset)) elif srt_reader.detect(captions): return srt_reader.read(captions) elif sami_reader.detect(captions): return sami_reader.read(captions) elif dfxp_reader.detect(captions): return dfxp_reader.read(captions) else: raise Exception('No caption format detected :(')
def run(self): for input_file in self.input_files: input_file_name = os.path.basename(input_file) input_type = os.path.splitext(input_file)[1].lower()[1:] output_file = os.path.join( self.output_folder, os.path.splitext(input_file_name)[0] + '.' + self.output_type) output_file_name = os.path.basename(output_file) if os.path.exists(output_file) and not self.overwrite_on: self.log_signal.emit("{}을 건너뜁니다...".format(input_file_name)) continue self.log_signal.emit("{}을 읽습니다...".format(input_file_name)) with open(output_file, 'w', encoding=self.output_encoding) as file_out: reader = None encoding = None content = None with open(input_file, 'rb') as file_in: encoding = chardet.detect(file_in.read())['encoding'] with open(input_file, 'r', encoding=encoding) as file_in: content = file_in.read() if input_type == "smi": reader = pycaption.SAMIReader().read(content) elif input_type == "srt": reader = pycaption.SRTReader().read(content) if self.output_type == "smi": file_out.write(pycaption.SAMIWriter().write(reader)) elif self.output_type == "srt": file_out.write(pycaption.SRTWriter().write(reader)) elif self.output_type == "txt": file_out.write(TextWriter().write(reader)) elif self.output_type == "ats": open(output_file, 'wb').write(AtsWriter().write(reader)) self.log_signal.emit("{}으로 변환했습니다".format(output_file_name))
def read_captions(input_filename): byte_chunk = os.path.getsize(input_filename) raw = open(input_filename, 'rb').read(byte_chunk) if raw.startswith(codecs.BOM_UTF8): encoding = 'utf-8-sig' else: result = chardet.detect(raw) encoding = result['encoding'] try: captions = io.open(input_filename, 'r', encoding=encoding).read() except UnicodeDecodeError as e: print(e.message + " from file - " + input_filename) raise e srt_reader = pycaption.SRTReader() if srt_reader.detect(captions): return srt_reader.read(captions) else: raise Exception('Illegal srt format %s :(' % input_filename)