def to_srt(df, filename): out = SubRipFile(encoding='utf-8') for i, r in df.iterrows(): begin = convert_time(r['begin']) end = convert_time(r['end']) out.append(SubRipItem(0, begin, end, r['text'])) out.save(filename)
def add_videos_to_index(subtitle_index, output_file, index): vindexReader = csv.reader(open(subtitle_index, 'rb')) vinfoWriter = csv.writer(open(output_file, 'wt')) vinfoWriter.writerow(['title', 'filename', 'id', 'views', 'type', 'url', 'text']) for row in vindexReader: try: filename = row[1] + '.en.srt' url = 'http://www.youtube.com/watch?v=' + row[2] text = open(filename).read() text_ascii = removeNonAscii(text) subtitles = SubRipFile.open(filename) vinfoWriter.writerow([row[0], row[1], row[2], row[3], row[4], url, text_ascii]) punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' stopwords = [''] with open('/Users/connormendenhall/Python/DaveDaveFind/DaveDaveFind/data/stopwords.csv', 'rb') as f: wordlist = csv.reader(f) for stopword in wordlist: stopwords.append(stopword[0]) for sentence in subtitles: text = (sentence.text) wordlist = text.split() for word in wordlist: word = word.lstrip(punctuation) word = word.rstrip(punctuation) word = word.lower() if word not in stopwords: add_to_index(index, word, url) except: pass print "[add_videos_to_index()] Videos added." return index
def test_windows1252(self): srt_string = codecs.open(self.windows_path, encoding='windows-1252').read() srt_file = SubRipFile.from_string(srt_string, encoding='windows-1252', eol='\r\n') self.assertEquals(len(srt_file), 1332) self.assertEquals(srt_file.eol, '\r\n') self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.utf8_path, encoding='ascii')
def mostrarSubtitulos(self, escena, ruta): if (self.ok==1): self.escena= escena #subs = SubRipFile.open(ruta, encoding='iso-8859-1') subs = SubRipFile.open(ruta, encoding='UTF-8') # Con esta codificacion logramos ver los tildes #print("Hay" ,subs.__len__()," subtitulos") #print "SEGUNDOS=", cant_segs if (self.tmp== subs.__len__()): # cuando llega al final de los subtitulos #self.tmp= subs.__len__()-1 self.tmp= 0 self.ok= 0 #print("entro en tiempo " ,self.tiempoActual) self.tiempoActual= 0 linea= subs[self.tmp] tics_ini = (linea.start.minutes*60*1000)+(linea.start.seconds*1000)+linea.start.milliseconds tics_fin = (linea.end.minutes*60*1000)+(linea.end.seconds*1000)+linea.end.milliseconds if ((tics_ini<=(pygame.time.get_ticks()-self.offset)) and ((pygame.time.get_ticks()-self.offset)<=tics_fin)): if (self.imprimir==1): self.escena.draw() # reimprime la escena self.printTexto(linea.text) # imprime mensaje self.imprimir= 0 self.tmp= self.tmp+1 self.entrar= 1 else: if (self.entrar==1): self.printTexto("") self.imprimir= 1 self.entrar=0
def convert(content, input_format, output_format): """ Convert transcript `content` from `input_format` to `output_format`. Accepted input formats: sjson, srt. Accepted output format: srt, txt. """ assert input_format in ('srt', 'sjson') assert output_format in ('txt', 'srt', 'sjson') if input_format == output_format: return content if input_format == 'srt': if output_format == 'txt': text = SubRipFile.from_string(content.decode('utf8')).text return HTMLParser().unescape(text) elif output_format == 'sjson': raise NotImplementedError if input_format == 'sjson': if output_format == 'txt': text = json.loads(content)['text'] return HTMLParser().unescape("\n".join(text)) elif output_format == 'srt': return generate_srt_from_sjson(json.loads(content), speed=1.0)
def convert(content, input_format, output_format): """ Convert transcript `content` from `input_format` to `output_format`. Accepted input formats: sjson, srt. Accepted output format: srt, txt, sjson. Raises: TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson. """ assert input_format in ('srt', 'sjson') assert output_format in ('txt', 'srt', 'sjson') if input_format == output_format: return content if input_format == 'srt': if output_format == 'txt': text = SubRipFile.from_string(content.decode('utf8')).text return HTMLParser().unescape(text) elif output_format == 'sjson': try: # With error handling (set to 'ERROR_RAISE'), we will be getting # the exception if something went wrong in parsing the transcript. srt_subs = SubRipFile.from_string( # Skip byte order mark(BOM) character content.decode('utf-8-sig'), error_handling=SubRipFile.ERROR_RAISE ) except Error as ex: # Base exception from pysrt raise TranscriptsGenerationException(text_type(ex)) return json.dumps(generate_sjson_from_srt(srt_subs)) if input_format == 'sjson': if output_format == 'txt': text = json.loads(content)['text'] text_without_none = [line if line else '' for line in text] return HTMLParser().unescape("\n".join(text_without_none)) elif output_format == 'srt': return generate_srt_from_sjson(json.loads(content), speed=1.0)
def input_file(self): if not hasattr(self, '_source_file'): with open(self.arguments.file, 'rb') as f: content = f.read() encoding = detect(content).get('encoding') encoding = self.normalize_encoding(encoding) self._source_file = SubRipFile.open(self.arguments.file, encoding=encoding, error_handling=SubRipFile.ERROR_LOG) return self._source_file
def save(self, path): if path.endswith('srt'): verify_dependencies(['pysrt']) from pysrt import SubRipFile, SubRipItem from datetime import time out = SubRipFile() for elem in self._elements: start = time(*self._to_tup(elem.onset)) end = time(*self._to_tup(elem.onset + elem.duration)) out.append(SubRipItem(0, start, end, elem.text)) out.save(path) else: with open(path, 'w') as f: f.write('onset\ttext\tduration\n') for elem in self._elements: f.write('{}\t{}\t{}\n'.format(elem.onset, elem.text, elem.duration))
def test_eol_conversion(self): input_file = open(self.windows_path, 'rU') input_file.read() self.assertEquals(input_file.newlines, '\r\n') srt_file = SubRipFile.open(self.windows_path, encoding='windows-1252') srt_file.save(self.temp_path, eol='\n') output_file = open(self.temp_path, 'rU') output_file.read() self.assertEquals(output_file.newlines, '\n')
def test_eol_conversion(self): input_file = open(self.windows_path, "rU") input_file.read() self.assertEquals(input_file.newlines, "\r\n") srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252") srt_file.save(self.temp_path, eol="\n") output_file = open(self.temp_path, "rU") output_file.read() self.assertEquals(output_file.newlines, "\n")
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'): """ 合并两种不同言语的srt字幕 因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕, 导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免 参考https://github.com/byroot/pysrt/issues/17 https://github.com/byroot/pysrt/issues/15 :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding) :param sub_b: :param delta: :return: """ out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i - 1]) end = SubRipTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def get_captions(client_name, clip_id): h = httplib2.Http() g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id) print "Fetching URL: %s" % g_url try: response, j = h.request(g_url) except httplib.BadStatusLine as exception: return None dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name filename = dirname + "%s.srt" % clip_id subs = SubRipFile() if response.get('status') == '200': captions = [] try: j = json.loads(j, strict=False)[0] except ValueError: ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "") try: j = json.loads(ts, strict=False)[0] except UnicodeDecodeError: ts = unicode(ts, errors='ignore') j = json.loads(ts, strict=False)[0] except: j = False sub_count = 0 for item in j: if item["type"] == "text": cap = item["text"] offset = round(float(item["time"]), 3) captions.append({'time': offset, 'text': cap}) end = get_cap_end(j, sub_count) if end: subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap) subs.append(subtitle) sub_count = sub_count + 1 try: subs.save(path=filename, encoding="utf-8") except IOError: p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE) t = p.wait() subs.save(path=filename, encoding="utf-8") s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id)) return (captions, s3_url) else: return ([], '')
def save(self, *args, **kwargs): episode = super(Episode, self).save(*args, **kwargs) # Delete existing subtitles self.subtitle_set.all().delete() # Import subtitles from file subs = SubRipFile.open(self.subtitles.path) with transaction.commit_on_success(): for sub in subs: self.subtitle_set.create( start=sub.start.ordinal, end=sub.end.ordinal, text=sub.text)
def merge_subtitle(sub_a, sub_b, delta): out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i-1]) end = SubRipTime.from_ordinal(intervals[i]) if (end-start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def GetSrtCaptions(self): """Retrieves and parses the actual ASR captions track's data. Given the URL of an ASR captions track, this retrieves it in the SRT format and uses the pysrt library to parse it into a format we can manipulate. Raises: Error: The ASR caption track could not be retrieved. """ response_headers, body = self.http.request("%s?fmt=srt" % self.track_url, "GET", headers=self.headers) if response_headers["status"] == "200": self.srt_captions = SubRipFile.from_string(body) else: raise Error("Received HTTP response %s when requesting %s?fmt=srt." % (response_headers["status"], self.track_url))
def generate_subs_from_source(speed_subs, subs_type, subs_filedata, item, language='en'): """Generate transcripts from source files (like SubRip format, etc.) and save them to assets for `item` module. We expect, that speed of source subs equal to 1 :param speed_subs: dictionary {speed: sub_id, ...} :param subs_type: type of source subs: "srt", ... :param subs_filedata:unicode, content of source subs. :param item: module object. :param language: str, language of translation of transcripts :returns: True, if all subs are generated and saved successfully. """ _ = item.runtime.service(item, "i18n").ugettext if subs_type.lower() != 'srt': raise TranscriptsGenerationException(_("We support only SubRip (*.srt) transcripts format.")) try: srt_subs_obj = SubRipFile.from_string(subs_filedata) except Exception as ex: msg = _("Something wrong with SubRip transcripts file during parsing. Inner message is {error_message}").format( error_message=ex.message ) raise TranscriptsGenerationException(msg) if not srt_subs_obj: raise TranscriptsGenerationException(_("Something wrong with SubRip transcripts file during parsing.")) sub_starts = [] sub_ends = [] sub_texts = [] for sub in srt_subs_obj: sub_starts.append(sub.start.ordinal) sub_ends.append(sub.end.ordinal) sub_texts.append(sub.text.replace('\n', ' ')) subs = { 'start': sub_starts, 'end': sub_ends, 'text': sub_texts} for speed, subs_id in speed_subs.iteritems(): save_subs_to_store( generate_subs(speed, 1, subs), subs_id, item, language ) return subs
def get_transcript_format(transcript_content): """ Returns transcript format. Arguments: transcript_content (str): Transcript file content. """ try: sjson_obj = json.loads(transcript_content) except ValueError: # With error handling (set to 'ERROR_RAISE'), we will be getting # the exception if something went wrong in parsing the transcript. srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE) if len(srt_subs) > 0: return TranscriptFormat.SRT return TranscriptFormat.SJSON
def generate_subs_from_source(speed_subs, subs_type, subs_filedata, item): """Generate transcripts from source files (like SubRip format, etc.) and save them to assets for `item` module. We expect, that speed of source subs equal to 1 :param speed_subs: dictionary {speed: sub_id, ...} :param subs_type: type of source subs: "srt", ... :param subs_filedata:unicode, content of source subs. :param item: module object. :returns: True, if all subs are generated and saved successfully. """ if subs_type != 'srt': raise TranscriptsGenerationException("We support only SubRip (*.srt) transcripts format.") try: srt_subs_obj = SubRipFile.from_string(subs_filedata) except Exception as e: raise TranscriptsGenerationException( "Something wrong with SubRip transcripts file during parsing. " "Inner message is {}".format(e.message) ) if not srt_subs_obj: raise TranscriptsGenerationException("Something wrong with SubRip transcripts file during parsing.") sub_starts = [] sub_ends = [] sub_texts = [] for sub in srt_subs_obj: sub_starts.append(sub.start.ordinal) sub_ends.append(sub.end.ordinal) sub_texts.append(sub.text.replace('\n', ' ')) subs = { 'start': sub_starts, 'end': sub_ends, 'text': sub_texts} for speed, subs_id in speed_subs.iteritems(): save_subs_to_store( generate_subs(speed, 1, subs), subs_id, item ) return subs
def generate_vocap_file(self): ######### Generate subs in vocap format subs = SubRipFile.open(self.path+"/"+self.srt_file, encoding="utf-8") fileobj=codecs.open(self.path+"/"+self.vocap_file, "w", "utf-8") for i in range(len(subs)): text = subs[i].text text = text.replace(u"###", u"#.#.#") text = text.replace(u"\n", u" ") #text = cgi.escape(text) start = subs[i].start.seconds start += 60*subs[i].start.minutes start += 3600*subs[i].start.hours time = unicode(str(start),"utf-8") line = u"###"+time+u" "+text+u"\n" fileobj.write(line) fileobj.close()
def __init__(self, filename): self.filename = filename self.model = Gtk.ListStore(object, str) self.srt_model = [] if not os.path.exists(filename) : raise(FileNameError(filename)) try: self.srt_model = SubRipFile.open(path=filename) except UnicodeDecodeError as unic: debug(unic) try: info("trying ...", "ISO-8859-1") self.srt_model = SubRipFile(path = filename, encoding = "iso-8859-1") except Exception as excep : debug(excep) self.model = None except IOError as error: info("Impossible de lire le fichier de sous titre: error {}".format(error)) for line in self.srt_model: # print("appending",line) self.model.append([line, line.text])
def get_srt_data(source): captions = SubRipFile.from_string(source.srt_data) for c in captions: start = c.start.to_time() end = c.end.to_time() offset = start.second + (start.minute * 60) + (start.hour * 60 * 60) + (start.microsecond / 1000000) #it can't possibly be more than hours. end_offset = end.second + (end.minute * 60) + (end.hour * 60 * 60) + (end.microsecond / 1000000) note, created = Note.objects.get_or_create( text = c.text, offset = end_offset, #end_offset = end_offset, user = source.user, user_name = source.user.username, video = source.video, private = False, import_source = source, import_source_name = source.name, source = 'SRT File', original_source = 'SRT File', source_link = source.url, #they're probably not going to have one of these... type = "caption" )
def test_empty_file(self): file = SubRipFile.open('/dev/null', error_handling=SubRipFile.ERROR_RAISE) self.assertEquals(len(file), 0)
def __test_encoding(self, encoding): srt_file = SubRipFile.open(os.path.join(self.base_path, encoding)) self.assertEquals(len(srt_file), 7) self.assertEquals(srt_file[0].index, 1)
def test_length(self): path = os.path.join(self.base_path, 'capability_tester.srt') file = SubRipFile.open(path) self.assertEquals(len(file), 37)
import sys, pytesseract from pgsreader import PGSReader from imagemaker import make_image from pysrt import SubRipFile, SubRipItem, SubRipTime from tqdm import tqdm supFile = sys.argv[1] pgs = PGSReader(supFile) srtFile = ".".join(supFile.split('.')[:-1])+".srt" with open('myfile.txt', 'w') as fp: pass srt = SubRipFile() # get all DisplaySets that contain an image print("Loading DisplaySets...") allsets = [ds for ds in tqdm(pgs.iter_displaysets())] print(f"Running OCR on {len(allsets)} DisplaySets and building SRT file...") subText = "" subStart = 0 subIndex = 0 for ds in tqdm(allsets): try: if ds.has_image: # get Palette Display Segment pds = ds.pds[0] # get Object Display Segment
def setUp(self): self.file = SubRipFile.open( os.path.join(file_path, 'tests', 'static', 'utf-8.srt'))
def test_utf8(self): self.assertEquals(len(SubRipFile.open(self.utf8_path)), 1332) self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.windows_path, encoding="utf_8")
def test_utf8(self): unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read() self.assertEquals(len(SubRipFile.from_string(unicode_content)), 1332) self.assertRaises(UnicodeDecodeError, SubRipFile.from_string, open(self.windows_path).read())
def test_utf8(self): unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read() self.assertEquals(len(SubRipFile.from_string(unicode_content)), 1332) self.assertRaises(UnicodeDecodeError, SubRipFile.from_string, open(self.windows_path).read())
except getopt.GetoptError, err: print str(err) usage() sys.exit(2) #Settings default values delta = SubRipTime(milliseconds=500) encoding = "utf_8" #- if len(args) <> 3: usage() sys.exit(2) for o, a in opts: if o in ("-d", "--delta"): delta = SubRipTime(milliseconds=int(a)) elif o in ("-e", "--encoding"): encoding = a elif o in ("-h", "--help"): usage() sys.exit() subs_a = SubRipFile.open(args[0], encoding=encoding) subs_b = SubRipFile.open(args[1], encoding=encoding) out = merge_subtitle(subs_a, subs_b, delta) out.save(args[2], encoding=encoding) if __name__ == "__main__": main()
from pysrt import SubRipFile, SubRipTime # hay que ver todavia como usar SubRipTime que nos va a solucionar la lectura subs = SubRipFile.open("14Blades.srt", encoding="iso-8859-1") print("Hay", subs.__len__(), " subtitulos") linea = subs[0] print(linea.text) print("inicio", linea.start.seconds, " segundos.") print("fin", linea.end.seconds, " segundos.") linea = subs[1] print(linea.text) print("inicio", linea.start.seconds, " segundos.") print("fin", linea.end.seconds, " segundos.") linea = subs[14] print(linea.text) print("inicio", linea.start.minutes, "minutos con", linea.start.seconds, "segundos.") print("fin", linea.start.minutes, "minutos con", linea.end.seconds, "segundos.") # equivalent # part = subs.slice(ends_after=SubRipTime(0, 0, 40)) # part = subs.slice(ends_after=(0, 0, 40)) # part = subs.slice(ends_after={'seconds': 40}) # part.shift(seconds=-2) # subs.save('other/path.srt', 'utf-8');
def test_blank_lines(self): items = list( SubRipFile.stream([u'\n'] * 20, error_handling=SubRipFile.ERROR_RAISE)) self.assertEquals(len(items), 0)
def test_single_item(self): srt_file = SubRipFile( [SubRipItem(1, {'seconds': 1}, {'seconds': 2}, 'Hello')]) self.assertEquals(srt_file.text, 'Hello')
def syncSrts(subs_L1, subs_L2): """Sync subs_L1 by subs_L2 timings and return a SubRipFile. """ out = SubRipFile() subs_L2_out = SubRipFile() j = 0 last_j = -1 dupes = 0 L2_ind = -1 for L2_sub in subs_L2: L2_ind = L2_ind + 1 start = L2_sub.start end = L2_sub.end j = matchSubtitle(subs_L1, start, end, max(last_j, 0)) L1_sub = subs_L1[j] if (j > -1) else None if L1_sub is None: text = L2_sub.text print("---- Missing: {}: {}".format( L2_sub.index, L2_sub.text.replace("\n", "[[NL]]"))) else: text = L1_sub.text if j - 1 > last_j and last_j > -1: # we skipped a sub in L1_subs if isSubMatch(subs_L1[j - 1], subs_L2[L2_ind - 1].start, subs_L2[L2_ind - 1].end): out[len(out) - 1].text = out[len(out) - 1].text + "\n" + subs_L1[j - 1].text elif isSubMatch(subs_L1[j - 1], start, end): text = subs_L1[j - 1].text + "\n" + text else: # A sub line in L1 does not match any in L2 # We add it to synced L1, and add an empty one to subs L2 item = SubRipItem(0, subs_L1[j - 1].start, subs_L1[j - 1].end, subs_L1[j - 1].text) out.append(item) item2 = SubRipItem(0, subs_L1[j - 1].start, subs_L1[j - 1].end, " ") subs_L2_out.append(item2) if j == last_j: dupes = dupes + 1 #print("---- OOPS. {}: {} - {}".format(L2_sub.index, L2_sub.text.replace("\n",""), L1_sub.text.replace("\n",""))) last_j = j item = SubRipItem(0, start, end, text) out.append(item) item2 = SubRipItem(0, start, end, L2_sub.text) subs_L2_out.append(item2) out.clean_indexes() subs_L2_out.clean_indexes() fixed = 0 for i in range(1, len(out)): sub1 = out[i - 1].text sub2 = out[i].text if ((sub1 == sub2) and (subs_L2_out[i - 1].text != subs_L2_out[i].text)): if (trySplitLine(out, i, sub1)): fixed = fixed + 1 i = i + 1 else: print("---- Oy. {}: {} not fixed".format( i, sub1.replace("\n", "[[NL]]"))) return out, dupes, fixed, subs_L2_out
def export_subtitle(source_file_path: str, subs: List[SubRipItem], target_file_path: str, frame_rate: float = 25.0) -> None: """Export subtitle in the format determined by the file extension. Arguments: source_file_path {string} -- The path to the original subtitle file. subs {list} -- A list of SubRipItems. target_file_path {string} -- The path to the exported subtitle file. frame_rate {float} -- The frame rate for frame-based subtitle formats {default: 25.0}. """ encoding = Utils.detect_encoding(source_file_path) _, file_extension = os.path.splitext(source_file_path.lower()) if file_extension in Subtitle.SUBRIP_EXTENTIONS: SubRipFile(subs).save(target_file_path, encoding=encoding) Utils.remove_trailing_newlines(target_file_path, encoding) elif file_extension in Subtitle.TTML_EXTENSIONS: tree = ElementTree.parse(source_file_path) tt = tree.getroot() cues = (tt.find("tt:body", Subtitle.TT_NS).find("tt:div", Subtitle.TT_NS).findall("tt:p", Subtitle.TT_NS)) # type: ignore for index, cue in enumerate(cues): cue.attrib["begin"] = str(subs[index].start).replace(",", ".") cue.attrib["end"] = str(subs[index].end).replace(",", ".") # Change single quotes in the XML header to double quotes with open(target_file_path, "w", encoding=encoding) as target: if "xml_declaration" in inspect.getfullargspec(ElementTree.tostring).kwonlyargs: # for >= python 3.8 encoded = ElementTree.tostring(tt, encoding=encoding, method="xml", xml_declaration=True) else: encoded = ElementTree.tostring(tt, encoding=encoding, method="xml") normalised = encoded.decode(encoding) \ .replace("<?xml version='1.0' encoding='", '<?xml version="1.0" encoding="',) \ .replace("'?>", '"?>') target.write(normalised) elif file_extension in Subtitle.WEBVTT_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2vtt(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ssa(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ass(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.MICRODVD_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2microdvd(path, target_file_path, frame_rate=frame_rate) finally: os.remove(path) elif file_extension in Subtitle.MPL2_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2mpl2(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.TMP_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2tmp(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SAMI_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2sami(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.STL_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(target_file_path, encoding=encoding) finally: os.remove(path) else: raise UnsupportedFormatException( "Unknown subtitle format for file: {}".format(source_file_path) )
def save_subs_as_target_format(subs: List[SubRipItem], source_file_path: str, target_file_path: str) -> None: """Save SubRipItems with the format determined by the target file extension. Arguments: subs {list} -- A list of SubRipItems. source_file_path {string} -- The path to the original subtitle file. target_file_path {string} -- The path to the output subtitle file. """ encoding = Utils.detect_encoding(source_file_path) _, file_extension = os.path.splitext(target_file_path.lower()) if file_extension in Subtitle.SUBRIP_EXTENTIONS: SubRipFile(subs).save(target_file_path, encoding=encoding) Utils.remove_trailing_newlines(target_file_path, encoding) elif file_extension in Subtitle.TTML_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ttml(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.WEBVTT_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2vtt(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ssa(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ass(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.MICRODVD_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2microdvd(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.MPL2_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2mpl2(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.TMP_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2tmp(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SAMI_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2sami(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.STL_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(target_file_path, encoding=encoding) finally: os.remove(path) else: raise UnsupportedFormatException( "Unknown subtitle format for file: {}".format(source_file_path) )
def main(): global hue_list, bridge, SRT_FILENAME, HUE_IP_ADDRESS, MAX_BRIGHTNESS global DMX_INTERVAL, INTERVAL, TRANSITION_TIME, HUE_IP_ADDRESS, DEBUG, VERBOSE global subs, srtFile global ipcon, tfIDs, dmx f1 = Figlet(font='standard') print(f1.renderText('LushRoom')) f2 = Figlet(font='standard') print(f2.renderText('OSC live record')) parser = argparse.ArgumentParser() parser.add_argument("--ip", default="127.0.0.1", help="OSC ip address to listen to") parser.add_argument("--port", type=int, default=8000, help="OSC port to listen to") parser.add_argument("-s", "--srt", default=SRT_FILENAME, help=".srt file name for lighting events") parser.add_argument("-b", "--brightness", default=MAX_BRIGHTNESS, help="maximum brightness") parser.add_argument("-i", "--interval", default=INTERVAL, help="sampling interval for Philips Hue events") parser.add_argument("-d", "--dmx_interval", default=DMX_INTERVAL, help="sampling interval for DMX events") parser.add_argument("-t", "--transition_time", default=TRANSITION_TIME, help="transition time between Philips Hue events") parser.add_argument("--hue", default=HUE_IP_ADDRESS, help="Philips Hue bridge IP address") args = parser.parse_args() print(args) MAX_BRIGHTNESS = int(args.brightness) SRT_FILENAME = args.srt INTERVAL = float(args.interval) DMX_INTERVAL = float(args.dmx_interval) TRANSITION_TIME = float(args.transition_time) HUE_IP_ADDRESS = args.hue # VERBOSE = args.verbose # DEBUG = args.debug if SRT_FILENAME != "": print("Start recording the %s subtitles track for light events." % SRT_FILENAME) srtFile = SubRipFile(path=SRT_FILENAME) if PLAY_HUE: bridge = Bridge(HUE_IP_ADDRESS) bridge.connect() bridge.get_api() lights = bridge.lights for l in lights: print(l.name) for l in lights: l.on = True l.brightness = MAX_BRIGHTNESS light_names = bridge.get_light_objects('name') print("Light names:", light_names) if PLAY_HUE: hue_list = hue_build_lookup_table(lights) # else: # hue_list = [[0],['1'],[2],[3],[4],[5],[6],[7],[8],[9]] print(hue_list) if PLAY_DMX: ipcon.connect(HOST, PORT) # Register Enumerate Callback ipcon.register_callback(IPConnection.CALLBACK_ENUMERATE, cb_enumerate) # Trigger Enumerate ipcon.enumerate() sleep(2) if DEBUG: print(tfIDs) dmxcount = 0 for tf in tfIDs: # try: if True: # print(len(tf[0])) if len( tf[0] ) <= 3: # if the device UID is 3 characters it is a bricklet if tf[1] in deviceIDs: if VERBOSE: print(tf[0], tf[1], getIdentifier(tf)) if tf[1] == 285: # DMX Bricklet if dmxcount == 0: print( "Registering %s as slave DMX device for capturing DMX frames" % tf[0]) dmx = BrickletDMX(tf[0], ipcon) dmx.set_dmx_mode(dmx.DMX_MODE_MASTER) # channels = int((int(MAX_BRIGHTNESS)/255.0)*ones(512,)*255) # dmx.write_frame([255,255]) sleep(1) # channels = int((int(MAX_BRIGHTNESS)/255.0)*zeros(512,)*255) # dmx.write_frame(channels) dmxcount += 1 disp = dispatcher.Dispatcher() # print(dir(dispatcher)) for h in range(512): disp.map("/hue%s" % h, play_record_hue, "%s" % h) for h in range(512): disp.map("/dmx%s" % h, play_record_dmx, "%s" % h) server = osc_server.ThreadingOSCUDPServer((args.ip, args.port), disp) print("Serving OSC on {}".format(server.server_address)) signal.signal(signal.SIGINT, signal_handler) server.serve_forever()
def test_utf8(self): self.assertEquals(len(SubRipFile.open(self.utf8_path)), 1332) self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.windows_path)
#!/usr/bin/python from pysrt import SubRipFile import sys subs = SubRipFile.open(sys.argv[1]) for s in subs: print s.text print s.start.milliseconds
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read() iterator = izip(SubRipFile.open(self.utf8_path), SubRipFile.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEquals(unicode(file_item), unicode(string_item))
def test_save(self): srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252") srt_file.save(self.temp_path, eol="\n", encoding="utf-8") self.assertEquals(open(self.temp_path, "rb").read(), open(self.utf8_path, "rb").read()) os.remove(self.temp_path)
from numpy import array, zeros, array_equal # import pysrt import signal import sys from pysrt import SubRipFile, SubRipItem, SubRipTime from tf_device_ids import deviceIdentifiersList import argparse SRT_FILENAME = "output_dmx.srt" AUDIO_FILENAME = "input.mp4" MAX_BRIGHTNESS = 254 TICK_TIME = 0.05 # seconds srtFile = SubRipFile() tfIDs = [] tfConnect = True prevFrame = zeros(512) prevTime = 0 subs = [] sub_incr = 1 ipcon = IPConnection() # if tfConnect: # tfIDs = []
def setUp(self): self.file = SubRipFile.open(os.path.join(file_path, "tests", "static", "utf-8.srt"))
def test_windows1252(self): srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252") self.assertEquals(len(srt_file), 1332) self.assertEquals(srt_file.eol, "\r\n") self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.utf8_path, encoding="ascii")
def merge_video_subtitle(video_id): """ 将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕 :param video_id: :return: """ video = Video.objects.get(pk=video_id) # Settings default values delta = SubRipTime(milliseconds=500) encoding = "utf_8" if (video.subtitle_cn != '') & (video.subtitle_en != ''): # convert_file(input_captions = video.subtitle_cn, output_writer) # vtt格式的字幕 # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path, # encoding=encoding) # subs_en_vtt = SubRipFile.open(video.subtitle_en.path, # encoding=encoding) # 将vtt字幕转换为srt subs_cn_srt_filename = '%s-%s.cn.srt' % (get_valid_filename( video.title), video.video_id) subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, subs_cn_srt_filename) # 此功能失效 # subs_cn_srt_result = convert_file( # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt) subs_cn_srt_result = convert_subtilte_format( srt_file=video.subtitle_cn.path, ass_file=subs_cn_srt_path) subs_en_srt_filename = '%s-%s.en.srt' % (get_valid_filename( video.title), video.video_id) subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, subs_en_srt_filename) # subs_en_srt_result = convert_file( # input_captions=video.subtitle_en.path,output_writer = subs_en_srt) subs_en_srt_path = convert_subtilte_format( srt_file=video.subtitle_en.path, ass_file=subs_en_srt_path) subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding) subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding) merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta) # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符 # 所以使用django utils自带的get_valid_filename()转化一下 # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样, # 也就是merge_subs_filename 与 subtitle_cn, subtitle_cn中名称可能会不一样 # 标题中的 . 依然会保留 merge_subs_filename = '%s-%s.zh-Hans.en.srt' % (get_valid_filename( video.title), video.video_id) merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, merge_subs_filename) merge_subs.save(merge_subs_path, encoding=encoding) video.subtitle_merge = merge_subs_path video.save(update_fields=['subtitle_merge']) return merge_subs_path else: return False
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read() iterator = izip(SubRipFile.open(self.utf8_path), SubRipFile.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEquals(unicode(file_item), unicode(string_item))
def main(): os.chdir(sys.argv[1]) raw_input("are the subtitle timings correct?".upper()) # ##### extract quotes from IMDB html-file ################################################### f = open(r"quotes.htm", "r") parser = etree.HTMLParser() tree = etree.parse(f, parser) f.close() root = tree.getroot() quotes = [] for div in root.xpath("//div"): try: c = div.attrib["class"] if c == "sodatext": s = etree.tostring(div) s = re.sub("\<div.*\>\n", "", s) s = re.sub("\</div.*\>", "", s) #s = re.sub("\<b\>.*\</b\>:\n", "- ", s) # names s = re.sub("\<b\>\<a.*\"\>", "", s) s = re.sub("\</a\>\</b\>:\n", ": ", s) # share this quote s = re.sub("\<p.*\>.*\</p\>", "", s) s = re.sub("\<span.*\>.*\</span\>", "", s) s = re.sub("\[.*\]", "", s) # stage directions s = re.sub("\<br/\>", "", s) s = re.sub(" ", " ", s) lines = [line.strip() for line in s.split("\n")] lines = [line for line in lines if len(line) > 0] if len(lines) == 1: lines[0] = lines[0][1:].strip() quote = "\n".join(lines) # ####### '''if len(quote) >= QUOTE_MIN_LEN and len(quote) <= QUOTE_MAX_LEN: quotes.append(quote)''' quotes.append(quote) # ####### except: continue quotes = list(set(quotes)) quotes_clean = [ re.sub("[%s]+" % re.escape(string.punctuation), "", x) for x in quotes ] quotes_clean = [x.lower().strip() for x in quotes_clean] """for quote in quotes_clean: print quote, "\n" """ # ##### read subtitles from srt-file ################################################### subs = SubRipFile.open('subtitles.srt') """for sub in subs: #print sub.from_string() print sub.index #print sub.shift() print sub.start print sub.end print sub.text print "\n" """ #print dir(subs) timecode_quote = {} for item in subs: item.text = re.sub("[%s]+" % re.escape(string.punctuation), "", item.text) item.text = item.text.lower().strip() text = item.text.split("\n")[0] # first line only for i, quote in enumerate(quotes_clean): if len( text.split(" ") ) >= 3 and text in quote: # we'll get a lot of false hits with only one word :/ if quotes[i] not in timecode_quote.values(): timecode_quote[str(item.start)] = quotes[i] # ##### ################################################### tree = et.parse("project.xml") movie = tree.getroot() fps = float(movie.attrib["fps"]) frames = float(movie.attrib["frames"]) seconds = frames / fps #print seconds """start_frame = float( movie.attrib["start_frame"] ) start_sec = startframe / fps""" # sort by timecode timecodes = timecode_quote.keys() timecodes.sort() f = open("quotes.txt", "w") for tc in timecodes: #print tc print "%.1f" % (100 * timecode_to_seconds(tc) / seconds) + "%", tc print timecode_quote[tc] print "" f.write("%f#%s\n" % (timecode_to_seconds(tc) / seconds, timecode_quote[tc].replace("\n", "#"))) f.close() print "<<", len(timecodes), "QUOTES >>" #raw_input("- done -") return
def input_file(self): if not hasattr(self, '_source_file'): self._source_file = SubRipFile.open( self.arguments.file, error_handling=SubRipFile.ERROR_LOG) return self._source_file
def setUp(self): self.duck = SubRipFile()
def setUp(self): self.file = SubRipFile()
def test_default_value(self): self.assertEquals(self.file.eol, os.linesep) srt_file = SubRipFile(eol='\r\n') self.assertEquals(srt_file.eol, '\r\n')
def test_multiple_item(self): srt_file = SubRipFile([ SubRipItem(1, {'seconds': 0}, {'seconds': 3}, 'Hello'), SubRipItem(1, {'seconds': 1}, {'seconds': 2}, 'World !') ]) self.assertEquals(srt_file.text, 'Hello\nWorld !')
def export_subtitle(source_file_path, subs, target_file_path, frame_rate=25.0): """Export subtitle in the format determined by the file extension. Arguments: source_file_path {string} -- The path to the original subtitle file. subs {list} -- A list of SubRipItems. target_file_path {string} -- The path to the exported subtitle file. frame_rate {float} -- The frame rate for frame-based subtitle formats {default: 25.0}. """ filename, file_extension = os.path.splitext(source_file_path.lower()) if file_extension in Subtitle.SUBRIP_EXTENTIONS: SubRipFile(subs).save(target_file_path, encoding="utf8") Utils.remove_trailing_newlines(target_file_path) elif file_extension in Subtitle.TTML_EXTENSIONS: tree = ElementTree.parse(source_file_path) tt = tree.getroot() cues = (tt.find("tt:body", Subtitle.TT_NS).find( "tt:div", Subtitle.TT_NS).findall("tt:p", Subtitle.TT_NS)) for index, cue in enumerate(cues): cue.attrib["begin"] = str(subs[index].start).replace(",", ".") cue.attrib["end"] = str(subs[index].end).replace(",", ".") # Change single quotes in the XML header to double quotes with open(target_file_path, "w", encoding="utf8") as target: normalised = (ElementTree.tostring( tt, encoding="utf8", method="xml").decode("utf-8").replace( "<?xml version='1.0' encoding='utf8'?>", '<?xml version="1.0" encoding="utf8"?>', )) target.write(normalised) elif file_extension in Subtitle.WEBVTT_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2vtt(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2ssa(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2ass(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.MICRODVD_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2microdvd(path, target_file_path, frame_rate=frame_rate) finally: os.remove(path) elif file_extension in Subtitle.MPL2_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2mpl2(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.TMP_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding="utf8") Utils.srt2tmp(path, target_file_path) finally: os.remove(path) else: raise UnsupportedFormatException( "Unknown subtitle format for file: {}".format( source_file_path))
def onInit( self ): filename = os.path.join(os.path.split(xbmc.Player().getPlayingFile())[0], xbmc.Player().getSubtitles()) if not os.path.exists(filename): filename = os.path.join("special://temp", xbmc.Player().getSubtitles()) if not os.path.exists(filename): xbmc.log(__scriptname__ + ": cannot find subtitle file!", xbmc.LOGERROR) dialog = xbmcgui.Dialog() dialog.ok('SubSeek', 'Sorry, the subtitle file could not be found...') xbmc.executebuiltin('XBMC.RunPlugin(plugin://script.xbmc.subtitles/)') self.exit_script() if not xbmc.Player().getSubtitles().split('.')[-1] == "srt": xbmc.log(__scriptname__ + ": incompatible subtitles", xbmc.LOGERROR) dialog = xbmcgui.Dialog() dialog.ok('SubSeek', 'Sorry, the subtitle file is not compatible. Please load a .srt') xbmc.executebuiltin('XBMC.RunPlugin(plugin://script.xbmc.subtitles/)') self.exit_script() xbmc.log(__scriptname__ + ": Subtitle file: " + filename, xbmc.LOGDEBUG) hashmatch = False pDialog = xbmcgui.DialogProgress() pDialog.create('SubSeek', 'Hashing subtitle file...') pDialog.update(0) f = open(filename, 'r') m = md5py.md5() for line in f: m.update(line) hash = m.hexdigest() f.close() xbmc.log(__scriptname__ + ": Subtitle hash is "+hash, xbmc.LOGDEBUG) if os.path.exists(os.path.join("special://temp","subseek-indexdir","hash.txt")): f = open(os.path.join('special://temp', 'subseek-indexdir',"hash.txt"), 'r') if f.readline() == hash: hashmatch = True xbmc.log(__scriptname__ + ": Subtitle hash matches stored database, reusing archive", xbmc.LOGDEBUG) else: xbmc.log(__scriptname__ + ": Subtitle hash does not match stored database, building new database", xbmc.LOGDEBUG) f.close() if not hashmatch: shutil.rmtree(os.path.join("special://temp","subseek-indexdir")) os.mkdir(os.path.join("special://temp","subseek-indexdir")) self.archive = Nucular.Nucular(os.path.join("special://temp","subseek-indexdir")) (self.archive).create() pDialog = xbmcgui.DialogProgress() pDialog.create('SubSeek', 'Opening Subtitle File...') pDialog.update(0) subs = SubRipFile.open(filename, encoding='iso-8859-1') pDialog.create('SubSeek', 'Populating Database...') pDialog.update(0) for i in range(len(subs)): sub = subs[i] D = { "content": sub.text.replace("\n", " ").replace("<i>", "[I]").replace("</i>", "[/I]"), "start": str(datetime.datetime(1,1,1, sub.start.hours, sub.start.minutes, sub.start.seconds, sub.start.milliseconds*1000)).split()[1]} (self.archive).indexDictionary(str(uuid.uuid4()), D) pDialog.update(int(math.floor(100*i/len(subs)))) pDialog.update(100, 'Storing Database...') (self.archive).store(lazy=False) f = open(os.path.join('special://temp', 'subseek-indexdir',"hash.txt"), "w") f.write(hash) f.close() else: (self.archive) = Nucular.Nucular(os.path.join("special://temp","subseek-indexdir"), readOnly=True) pDialog.close() pass
def makeL1L2(L1_srt, L2_srt, out_srt, levels, save_sync, out_L1_utf8bom_srt, out_L2_utf8bom_srt, \ show_L2, encoding, L1_color, L1_size, L2_color, L2_size): """ Joins L1_srt and L2_srt subtitles and saves the result to out_srt. If save_sync is True, saves the synced srt files. If out_L1_utf8bom_srt is not empty, saves the L1 srt file converted to utf8-BOM to that path. If out_L2_utf8bom_srt is not empty, saves the L2 srt file converted to utf8-BOM to that path. If L1_color, L1_size, L2_color, L2_size are given, the subs are formatted accordingly """ log("L1_srt: " + L1_srt) log("L2_srt: " + L2_srt) log("show_L2: " + show_L2) log("encoding: " + encoding) log("save_sync: ", save_sync) log("levels: ", levels) log("L1 color: {}, size: {}.".format(L1_color, L1_size)) log("L2 color: {}, size: {}.".format(L2_color, L2_size)) log("out_L1_utf8bom_srt: ", out_L1_utf8bom_srt) log("out_L2_utf8bom_srt: ", out_L2_utf8bom_srt) setSrtTemplates(L1_color, L1_size, L2_color, L2_size) # try to decode and save as utf8-bom L1_srt_bom = L1_srt + ".utf8bom" L2_srt_bom = L2_srt + ".utf8bom" makeFileUtf8Bom(L1_srt, L1_srt_bom) makeFileUtf8Bom(L2_srt, L2_srt_bom) subs_L1_orig = SubRipFile.open(L1_srt_bom) subs_L2_orig = SubRipFile.open(L2_srt_bom) subs_L1, dupes, fixed, subs_L2 = syncSrts(subs_L1_orig, subs_L2_orig) if save_sync: out_synced_L1 = L1_srt.replace(".srt", ".synced.srt") out_synced_L2 = L2_srt.replace(".srt", ".synced.srt") subs_L1.save(out_synced_L1, encoding=encoding) subs_L2.save(out_synced_L2, encoding=encoding) log("Saved {} and {}. Duplicate lines: {} Fixed: {}".format( out_synced_L1, out_synced_L2, dupes, fixed)) outs = {} removed_lines = {} out_srts = {} for level in levels: out_srts[level] = out_srt.replace("{{LEVEL}}", level) outs[level] = SubRipFile() removed_lines[level] = 0 for i in range(0, len(subs_L2)): processSub(subs_L1[i], subs_L2[i], levels, outs, removed_lines, show_L2) for level in levels: summary = "level_criteria: {}. Hidden L1 lines: {} out of {}".format( level_criterias[level] if level != "0" else 'none', removed_lines[level], len(subs_L2)) summaryItem = SubRipItem(1, {'milliseconds': 0}, {'milliseconds': 1}, summary) outs[level].append(summaryItem) outs[level].clean_indexes() outs[level].save(path=out_srts[level], encoding=encoding) log("Saved {}. {} ".format(out_srts[level], summary)) if (out_L1_utf8bom_srt): if os.path.isfile(out_L1_utf8bom_srt): os.remove(out_L1_utf8bom_srt) os.rename(L1_srt_bom, out_L1_utf8bom_srt) else: os.remove(L1_srt_bom) if (out_L2_utf8bom_srt): if os.path.isfile(out_L2_utf8bom_srt): os.remove(out_L2_utf8bom_srt) os.rename(L2_srt_bom, out_L2_utf8bom_srt) else: os.remove(L2_srt_bom)
def merge_video_subtitle(video_id): """ 将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕 :param video_id: :return: """ video = Video.objects.get(pk=video_id) # Settings default values delta = SubRipTime(milliseconds=500) encoding = "utf_8" if (video.subtitle_cn != '') & (video.subtitle_en != ''): # convert_file(input_captions = video.subtitle_cn, output_writer) # vtt格式的字幕 # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path, # encoding=encoding) # subs_en_vtt = SubRipFile.open(video.subtitle_en.path, # encoding=encoding) # 将vtt字幕转换为srt subs_cn_srt_filename = '%s-%s.cn.srt' % ( get_valid_filename(video.title), video.video_id) subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, subs_cn_srt_filename) # 此功能失效 # subs_cn_srt_result = convert_file( # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt) subs_cn_srt_result = convert_subtilte_format(srt_file= video.subtitle_cn.path, ass_file=subs_cn_srt_path) subs_en_srt_filename = '%s-%s.en.srt' % ( get_valid_filename(video.title), video.video_id) subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, subs_en_srt_filename) # subs_en_srt_result = convert_file( # input_captions=video.subtitle_en.path,output_writer = subs_en_srt) subs_en_srt_path = convert_subtilte_format(srt_file= video.subtitle_en.path, ass_file=subs_en_srt_path) subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding) subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding) merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta) # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符 # 所以使用django utils自带的get_valid_filename()转化一下 # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样, # 也就是merge_subs_filename 与 subtitle_cn, subtitle_cn中名称可能会不一样 # 标题中的 . 依然会保留 merge_subs_filename = '%s-%s.zh-Hans.en.srt' % ( get_valid_filename(video.title), video.video_id) merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR, merge_subs_filename) merge_subs.save(merge_subs_path, encoding=encoding) video.subtitle_merge = merge_subs_path video.save(update_fields=['subtitle_merge']) return merge_subs_path else: return False
def handle_tracks(tracks, start, end, fps, srt_filename): global XML_FILENAME, HUE_SAMPLING, DMX_SAMPLING, TRANSITION_TIME, DEBUG, VERBOSE track_list = [] for track in tracks: track_list = handle_track_list(track, start, end, fps) # print(track_list[3][0]) # try: # print(len(track_list[3]),len(track_list[3][0]),track_list[3][0][1:10],track_list[3][-1][1:10]) # except: # pass # srt_file = open(srt_filename,"w") dmx_frame = zeros(512) prev_dmx_frame = zeros(512) prev_dmx_valid_frame = zeros(512) subrip_file = SubRipFile(path=srt_filename) print(40 * "-") print("Processing frames") print(40 * "-") # print(track_list[3][1]) # print(len(track_list[1])) if len(track_list[1]) > 0: # If there isn't only an audio track # print(track_list[1][0]) # print(track_list[1][0]!="audio") # print(len(track_list[1]) != 1 and track_list[1][0]!="audio") if (len(track_list[1]) != 1 or track_list[1][0] != "audio"): print("Number of lighting events: ", len(track_list[3][0])) frame_no = 0 for i in range(len(track_list[3][0])): # frame_no = track_list[4][i] frame_no = i t = i * (1.0 / float(fps)) if VERBOSE: print(40 * "-") # print(frame_no,fps) print("Frame %s / time %s seconds" % (frame_no, t)) print(40 * "-") hue_cmd = "" dmx_cmd = "" # for the bug, len(of track_list[0]) is greater than # len(track_list[3]) for j in range(len(track_list[0])): # print(track_list[1][j]) if track_list[1][j] != "audio": name = track_list[0][j] type = track_list[1][j] addr = track_list[2][j] # print(name,type,addr) # TODO: if frame_no = i as on line 181, the following line fails! # [3][j] is out of range therefore j is the problem try: payload = track_list[3][j][i] except Exception as e: print( 'ERROR: could not get payload, len(of track_list[0]) is likely greater than \ len (track_list[3])') # print(name, type, addr, payload) # Convert Hue payload to hue command if payload != "": if addr[1:4].lower( ) == "hue" and type == "OSCColor/floatarray": if VERBOSE: print("hue", addr, payload) r, g, b, a = 0, 0, 0, 0 try: payload_list = payload.split(",") # print(payload_list) if len(payload_list) == 3: r, g, b = payload_list elif len(payload_list) == 4: r, g, b, a = payload_list except Exception as e: print(e) h, s, v = rgb_to_hsv(float(r), float(g), float(b)) h *= 65535.0 s *= 254.0 v *= 254.0 h = int(h) s = int(s) v = int(v) # print("hue", addr, payload, h,s,v) n = int(addr[4:]) # print("hue", n, h,s,v) if len(hue_cmd) == 0: hue_cmd += "HUE%s(%s,%s,%s,%s)" % ( n, h, s, v, TRANSITION_TIME) else: hue_cmd += ";HUE%s(%s,%s,%s,%s)" % ( n, h, s, v, TRANSITION_TIME) # Convert single DMX channel to command elif addr[1:4].lower( ) == "dmx" and type == "OSCValue/float": if VERBOSE: print("dmx value", addr, payload) n = int(addr[4:]) if payload != "": dmx_frame[int(n)] = int( float(payload) * 254) # Convert multiple DMX channels to command elif addr[1:4].lower() == "dmx" and ( type == "OSCColor/floatarray" or type == "OSCValue/standard"): if VERBOSE: print("dmx colour", addr, payload) n = int(addr[4:]) if payload != "": payload_list = payload.split(",") for channel in payload_list: dmx_frame[int(n)] = int( float(channel) * 254) n += 1 # Output HUE commands # hue_t = frame_no * (1.0/HUE_SAMPLING) if frame_no % fps == 0 and hue_cmd != "": item = SubRipItem(frame_no, text=hue_cmd) item.shift(seconds=t) item.end.shift(seconds=1) if VERBOSE: print(item) else: print("h", end="") stdout.flush() subrip_file.append(item) frame_no += 1 # Output DMX command dmx_frame_trimmed = trim_zeros(dmx_frame, 'b').astype('uint8') # print("dmx_frame_trimmed before",dmx_frame_trimmed) # if len(dmx_frame_trimmed)==0: # dmx_frame_trimmed = zeros(512) # print("dmx_frame_trimmed after",dmx_frame_trimmed) dmx_cmd = "DMX1" + str(tuple(dmx_frame_trimmed)[1:]).replace( " ", "") if VERBOSE: print('dmx_cmd to be written: ', dmx_cmd) # cmd = hue_cmd + ";" + dmx_cmd if (not array_equal(dmx_frame_trimmed, prev_dmx_frame)) or (frame_no % fps == 0): # if frame_no % fps == 0 and dmx_cmd=="": # if frame_no % fps == 0: # print(dmx_cmd, prev_dmx_frame) # Fix for and empty DMX command # Usually found at the start of a treatment track if dmx_cmd == "DMX1()": item = dmx_cmd = "DMX1" + str( tuple(zeros(512, dtype=int))[1:]).replace(" ", "") item = SubRipItem(frame_no, text=dmx_cmd) item.shift(seconds=t) item.end.shift(seconds=1.0 / fps) if VERBOSE: print(item) else: print("d", end="") stdout.flush() subrip_file.append(item) frame_no += 1 prev_dmx_frame = dmx_frame_trimmed # print(cmd) if VERBOSE: print(40 * "-") # print(track_list[0][j], track_list[1][j], track_list[2][j], track_list[3][j][i]) # print(frame) # j = 1 # for frame in track: # print(track_list[0][i] + " " +frame, end = " ") # j += 1 # print() encoding = "utf_8" subrip_file.save(srt_filename, encoding=encoding) print()
def test_shift(self): srt_file = SubRipFile([SubRipItem()]) srt_file.shift(1, 1, 1, 1) self.assertEquals(srt_file[0].end, (1, 1, 1, 1)) srt_file.shift(ratio=2) self.assertEquals(srt_file[0].end, (2, 2, 2, 2))
def __save_subtitle_by_extension(file_extension: str, subs: List[SubRipItem], source_file_path: str, target_file_path: str, encoding: str, frame_rate: Optional[float], is_exporting: bool = False): if file_extension in Subtitle.SUBRIP_EXTENTIONS: SubRipFile(subs).save(target_file_path, encoding=encoding) Utils.remove_trailing_newlines(target_file_path, encoding) elif file_extension in Subtitle.TTML_EXTENSIONS: if is_exporting: tree = ElementTree.parse(source_file_path) tt = tree.getroot() cues = (tt.find("tt:body", Subtitle.TT_NS).find( "tt:div", Subtitle.TT_NS).findall("tt:p", Subtitle.TT_NS)) # type: ignore for index, cue in enumerate(cues): cue.attrib["begin"] = str(subs[index].start).replace( ",", ".") cue.attrib["end"] = str(subs[index].end).replace(",", ".") # Change single quotes in the XML header to double quotes with open(target_file_path, "w", encoding=encoding) as target: if "xml_declaration" in inspect.getfullargspec( ElementTree.tostring ).kwonlyargs: # for >= python 3.8 encoded = ElementTree.tostring(tt, encoding=encoding, method="xml", xml_declaration=True) else: encoded = ElementTree.tostring(tt, encoding=encoding, method="xml") normalised = encoded.decode(encoding) \ .replace("<?xml version='1.0' encoding='", '<?xml version="1.0" encoding="',) \ .replace("'?>", '"?>') target.write(normalised) else: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ttml(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.WEBVTT_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2vtt(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ssa(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2ass(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.MICRODVD_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2microdvd(path, target_file_path, frame_rate=frame_rate) finally: os.remove(path) elif file_extension in Subtitle.MPL2_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2mpl2(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.TMP_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2tmp(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.SAMI_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(path, encoding=encoding) Utils.srt2sami(path, target_file_path) finally: os.remove(path) elif file_extension in Subtitle.STL_EXTENSIONS: try: _, path = tempfile.mkstemp() SubRipFile(subs).save(target_file_path, encoding=encoding) finally: os.remove(path) else: raise UnsupportedFormatException( "Unknown subtitle format for file: {}".format( source_file_path))