Ejemplo n.º 1
0
 def smi(self, subdata):
     if is_py3:
         subdata = subdata.decode("latin1")
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<[^>]+>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                     if text[len(text)-2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs)).replace('&quot;', '"')
     if is_py3:
         return text.encode("utf-8")
     return text
Ejemplo n.º 2
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                pass
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i)
                hour1 = int(matchx.group(1))
                hour2 = int(matchx.group(4))
                if int(number) == 1:
                    if hour1 > 9:
                        subtract = True
                if subtract:
                    hour1 -= 10
                    hour2 -= 10
                time = "%s:%s:%s --> %s:%s:%s\n" % (hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt+="\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Ejemplo n.º 3
0
 def smi(self, subdata):
     if requests_version < 0x20300:
         if is_py2:
             subdata = subdata.content
         else:
             subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<(?!\/?i).*?>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea),
                                                  timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('',
                                                data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs))
     if is_py2 and isinstance(text, unicode):
         return text.encode("utf-8")
     return text
Ejemplo n.º 4
0
 def smi(self, subdata):
     if requests_version < 0x20300:
         if is_py2:
             subdata = subdata.content
         else:
             subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<(?!\/?i).*?>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs))
     if is_py2 and isinstance(text, unicode):
         return text.encode("utf-8")
     return text
Ejemplo n.º 5
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                pass
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)',
                    i)
                hour1 = int(matchx.group(1))
                hour2 = int(matchx.group(4))
                if int(number) == 1:
                    if hour1 > 9:
                        subtract = True
                if subtract:
                    hour1 -= 10
                    hour2 -= 10
                time = "%s:%s:%s --> %s:%s:%s\n" % (
                    hour1, matchx.group(2), matchx.group(3).replace(".", ","),
                    hour2, matchx.group(5), matchx.group(6).replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)

                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Ejemplo n.º 6
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)',
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)',
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(
                    hour1, matchx.group("m1"),
                    matchx.group("s1").replace(".", ","), hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Ejemplo n.º 7
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),
                                                                      hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00',
                              '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'}
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt