def readSrt(self, fname_srt): fi = open(fname_srt) idx = None tm = None txt = None lastLineEmpty = True set = False lines = [] subReplace = {} if self.__STNGS.has_key('subReplace'): subReplace = self.__STNGS['subReplace'] subStyle = None fCoding = fileCoding.file_encoding(fname_srt) for line in fi: line = unicode(line, fCoding).strip() if len(line)>3 and line[:3]==u'\xEF\xBB\xBF': line = line[3:] line = re.sub(r'\{\\[^\}]*\}', '', line) if len(line)!=0: subStyle = [] if subReplace.has_key(line): if subReplace[line].has_key('text'): line = subReplace[line]['text'] if lastLineEmpty: try: x = int(line) except: x = None if x!=None and txt!=None: ttt = tm.split('-->') if len(ttt)==2: tm1 = ttt[0].strip() tm2 = ttt[1].strip() val = (['', ''], tm1, tm2, txt.encode('utf-8'), subStyle) self.__insert(lines, val) if len(subStyle)>0: sys.exit(0) #print '%d\n-%s\n--%s'%(idx, tm, txt) #fo.write('%d\n%s\n%s\n\n'%(idx, tm, txt)) idx = x tm = None txt = None set= True # first if idx==None and x!=None: idx = x set= True if not set: if tm==None: tm = line elif txt==None: txt = line else: txt += u'\n%s'%line set= False lastLineEmpty = False else: lastLineEmpty = True if idx!=None and tm!=None and txt!=None: ttt = tm.split('-->') if len(ttt)==2: tm1 = ttt[0].strip() tm2 = ttt[1].strip() val = (['', ''], ttt[0].strip(), ttt[1].strip(), txt.encode('utf-8'), subStyle) self.__insert(lines, val) fi.close() # join lines in sub if lines > 2 for i in range(len(lines)): val = lines[i] if len(val[3].split('\n'))>2: tmp = string.join(val[3].split('\n'), ' ') val = (val[0], val[1], val[2], tmp, val[4]) lines[i] = val for i in range(len(lines)): val = lines[i] tmp = self.stylesFromSrtLine(unicode(val[3], 'utf-8')) if len(tmp[1]): val = (val[0], val[1], val[2], tmp[0].encode('utf-8'), tmp[1]) lines[i] = val #print '%s %s'%(val[4], val[3]) return lines
def fileInfo(self, filename): ''' input: video file name output: { 'informer': <informer app> 'filename': filename 'streams': [ [ streamType, // 0 - video, 1 - audio, 2 - subs, 3 - image trackID, // ffmpeg track id ('0:0', '0:1') lang, // language (3 chars) params = {} // additional params (codec, width, height, dwidth, dheight ...) ], ... ] 'tags': { '<tag_name>': '<value>', ... } } ''' rv = cMediaInfo('error', filename) tmp, ext = os.path.splitext(filename) ext = ext.lower() if ext=='.ass' or ext=='.srt' or ext=='.ttxt' or ext=='.ssa': rv.informer = 'none' rv.stream_add(cStream(2, '0%s0'%self.mapStreamSeparatedSymbol(filename), None, {'codec': ext[1:], 'encoding': fileCoding.file_encoding(filename)})) else: rv = self.fileInfoUsingMediaInfo(filename) if isMatroshkaMedia(filename): tmp = self.fileInfoUsingMKV(filename) if len(rv.streams)==len(tmp.streams): for stream in rv.streams: for i in range(len(tmp.streams)): if stream.trackID==tmp.streams[i].trackID and stream.type==tmp.streams[i].type and tmp.streams[i].params.has_key('mkvinfo_trackNumber'): stream.params['mkvinfo_trackNumber'] = tmp.streams[i].params['mkvinfo_trackNumber'] if len(rv.streams)==0: rv = self.fileInfoUsingFFMPEG(filename) try: rv.general['mediaDuration'] = self.__mediaDuration(filename) except Exception, e: pass
def readAss(self, fname_ass): fi = open(fname_ass) block = 0 lines = [] black_list = () if self.__STNGS.has_key('ASSremoveItems'): black_list = self.__STNGS['ASSremoveItems'] subReplace = {} if self.__STNGS.has_key('subReplace'): subReplace = self.__STNGS['subReplace'] lastVal = None styles = {} canMergeLines = True fCoding = fileCoding.file_encoding(fname_ass) for line in fi: if block==2: elems = line.split(',') t = re.compile('Style:\s*([^,]+)').match(elems[0]) if t: sName = t.groups()[0] t = re.compile('\&(H[0-9a-fA-F]{2})([0-9a-fA-F]{6})').match(elems[3]) if t: col = t.groups()[1] styles[sName] = (col,) if line[:8] == 'alogue: ': line = 'Di%s'%line if line[:10] == 'Dialogue: ': block = 3 if block==3: #print line if line[:8] == 'alogue: ': line = 'Di%s'%line if line[:10] == 'Dialogue: ': line = line[8:] elems = line.split(',') linetext = ",".join(elems[9:]) linetext = unicode(linetext, fCoding) #if len(linetext)>12 and ((linetext[:7]=='{\\bord3') or (linetext[:5]=='{\\be1')) and (len(elems[3])>3 and elems[3][:3]=="ed_"): # linetext='' #if len(linetext)>12 and (linetext[:15]=='{\\fad(200,200)}') and (len(elems[3])>3 and elems[3][:3]=="ed_"): # linetext='' linetext = linetext.replace('\\n','\\N') linetext = linetext.replace('\\N','\n') linetext = re.sub(r'\{\\[^\}]*\}', '', linetext) linetext = re.sub(r'([lmb](\s\-{0,1}\d+){2,8}\s{0,1}){2,}', '', linetext) # m 0 0 l 0 150 l 250 150 l 250 0 linetext = re.sub(r'm\s\-{0,1}\d+\s+\-{0,1}\d+\s+s(\s+\-{0,1}\d+){14}\s+c', '', linetext) # m 5 0 s 95 0 100 5 100 95 95 100 5 100 0 95 0 5 c linetext = linetext.replace('\\h','') linetext = re.sub(r'\{[^\}]*\}', '', linetext) # remove from subs comments {xxxx} blackCheck = True subEnd = self.timesrt(elems[2]) if subReplace.has_key(linetext.strip()): v = subReplace[linetext.strip()] if (not v.has_key('style')) or (v.has_key('style') and v['style']==elems[3].strip()): linetext = v['text'] if v.has_key('duration'): subDuration = v['duration'] subEnd = self.int2time(self.time2int(self.timesrt(elems[1]))+subDuration) blackCheck = False if blackCheck: bl = False for style in black_list: if style==unicode(elems[3], 'utf-8'): bl = True break if bl: continue while linetext.find('\n\n')>=0: linetext = linetext.replace('\n\n','\n') tmpStr = '' canMerge = True for l in linetext.split('\n'): ch = '\n' if l.find(' ')==-1 and l.find('.')==-1 and l.find(',')==-1: if canMerge: ch = ' ' canMerge = True else: canMerge = False tmpStr = '%s%s%s'%(tmpStr, ch, l) linetext = tmpStr.strip() linetext8 = linetext.encode('utf-8') #print len(unicode(linetext,'utf-8')),linetext if len(linetext8)>0: _style = elems[3].strip() if _style[0]=='*': _style = _style[1:] _name = elems[4].strip() val = [[_style, _name], self.timesrt(elems[1]), subEnd, linetext8, []] if lastVal<>None: if canMergeLines and lastVal[0][0]==val[0][0] and lastVal[1]==val[1] and lastVal[2]==lastVal[2] and linetext.find(' ')==-1 and linetext.find('.')==-1 and linetext.find(',')==-1: lastVal[3] = '%s %s'%(unicode(lastVal[3], 'utf-8'), linetext) lastVal[3] = lastVal[3].encode('utf-8') else: self.__insert(lines, val) lastVal = val else: self.__insert(lines, val) lastVal = val canMergeLines = (linetext.find(' ')==-1 and linetext.find('.')==-1 and linetext.find(',')==-1) line = line.strip() if '[Script Info]' == line: block = 1 if ('[V4+ Styles]' == line) or ('[V4 Styles]' == line): block = 2 if '[Events]' == line: block = 3 #if re.compile('Format\: Layer, Start, End').match(line): # start = True; #if re.compile('\[Events\]').match(line): # start = True; fi.close() c = None needFontTag = False; for l in lines: style = self.getSubStyle(l[0], styles) if c != None and c != style: needFontTag = True break c = style if needFontTag: for i in range(len(lines)): l = lines[i] style = self.getSubStyle(l[0], styles) strLineStyle = self.stylesFromSrtLine(unicode(l[3], 'utf-8')) if style!=None: if strLineStyle[1]==[]: lines[i][4].append(("#%s"%style,)) if strLineStyle[1]!=[]: lines[i][3] = strLineStyle[0].encode('utf-8') for s in strLineStyle[1]: lines[i][4].append(s) return lines