Exemple #1
0
    def get(self):
        if self.exclude():
            yield ServiceError("Excluding video")
            return

        vid = self.get_vid()
        if not vid:
            yield ServiceError("Can't find video id")
            return

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(
            vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]

        url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format(
            decode_html_entities(vid))
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #2
0
    def outputfilename(self, data, filename, raw):
        directory = os.path.dirname(filename)
        if is_py2:
            id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7]
        else:
            id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7]

        datatitle = re.search('data-title="([^"]+)"', self.get_urldata())
        if not datatitle:
            return None
        datat = decode_html_entities(datatitle.group(1))
        name = self.name(datat)
        episode = self.seasoninfo(datat)
        if is_py2:
            name = name.encode("utf8")
        if episode:
            title = "{0}.{1}-{2}-svtplay".format(name, episode, id)
        else:
            title = "{0}-{1}-svtplay".format(name, id)
        title = filenamify(title)
        if len(directory):
            output = os.path.join(directory, title)
        else:
            output = title
        return output
Exemple #3
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search('="(http://www.expressen.se/tvspelare[^"]+)"', data)
        if not match:
            log.error("Can't find video id")
            return
        url = decode_html_entities(match.group(1))
        data = self.http.request("get", url)

        match = re.search("window.Player.settings = ({.*});", data.text)
        if not match:
            log.error("Can't find json info.")

        dataj = json.loads(match.group(1))
        if "streams" in dataj:
            if "iPad" in dataj["streams"]:
                streams = hlsparse(
                    self.options,
                    self.http.request("get", dataj["streams"]["iPad"]),
                    dataj["streams"]["iPad"])
                for n in list(streams.keys()):
                    yield streams[n]
            if "hashHls" in dataj["streams"]:
                streams = hlsparse(
                    self.options,
                    self.http.request("get", dataj["streams"]["hashHls"]),
                    dataj["streams"]["hashHls"])
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #4
0
    def sami(self, subdata):
        text = subdata.text
        if is_py2:
            text = text.encode("utf8")
        text = re.sub(r'&', '&', text)
        tree = ET.fromstring(text)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]

                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))
                else:
                    subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))
            else:
                if int(n) > 0 and i.text:
                    subs += "%s\n" % decode_html_entities(i.text)

        if is_py2:
            subs = subs.encode('utf8')
        subs = re.sub('&', r'&', subs)
        return subs
Exemple #5
0
    def get(self):
        vid = None
        data = self.get_urldata()

        if self.exclude(self.options):
            yield ServiceError("Excluding video")
            return

        match = re.search(r'video url-([^"]+)', data)
        if not match:
            match = re.search(r'embed.jsp\?([^"]+)"', self.get_urldata())
            if not match:
                yield ServiceError("Can't find video id")
                return
            vid = match.group(1)
        if not vid:
            path = unquote_plus(match.group(1))
            data = self.http.request("get", "http://www.svd.se%s" % path).content
            match = re.search(r'embed.jsp\?([^"]+)', data)
            if not match:
                yield ServiceError("Can't find video id")
                return
            vid = match.group(1)

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        videos = jdata["videos"][0]["media"]["streams"]
        for i in videos:
            if i["name"] == "auto":
                hls = "%s%s" % (jdata["videos"][0]["media"]["base"], i["url"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        if streams:
            for n in list(streams.keys()):
                yield streams[n]
Exemple #6
0
def get_one_media(stream, options):
    if not options.output or os.path.isdir(options.output):
        data = stream.get_urldata()
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            options.output_auto = True
            title_tag = decode_html_entities(match.group(1))
            if not options.output:
                options.output = filenamify(title_tag)
            else:
                # output is a directory
                options.output = os.path.join(options.output, filenamify(title_tag))

    if platform.system() == "Windows":
        # ugly hack. replace \ with / or add extra \ because c:\test\kalle.flv will add c:_tab_est\kalle.flv
        if options.output.find("\\") > 0:
            options.output = options.output.replace("\\", "/")

    videos = []
    subs = []
    streams = stream.get(options)
    if streams:
        for i in streams:
            if isinstance(i, VideoRetriever):
                if options.preferred:
                    if options.preferred == i.name():
                        videos.append(i)
                else:
                    videos.append(i)
            if isinstance(i, subtitle):
                subs.append(i)

        if options.subtitle and options.output != "-":
            if subs:
                subs[0].download(copy.copy(options))
            if options.force_subtitle:
                return

        if len(videos) > 0:
            stream = select_quality(options, videos)
            try:
                stream.download()
            except UIException as e:
                if options.verbose:
                    raise e
                log.error(e.message)
                sys.exit(2)

            if options.thumbnail:
                if hasattr(stream, "get_thumbnail"):
                    log.info("thumb requested")
                    if options.output != "-":
                        log.info("getting thumbnail")
                        stream.get_thumbnail(options)
            else:
                log.info("no thumb requested")
        else:
            log.error("Can't find any streams for that url")
    else:
        log.error("Can't find any streams for that url")
Exemple #7
0
def filename(options, stream):
    if options.output:
        if is_py2:
            if platform.system() == "Windows":
                options.output = options.output.decode("latin1")
            else:
                options.output = options.output.decode("utf-8")
        options.output = options.output.replace('"', '').replace("'", "").rstrip('\\')
    if not options.output or os.path.isdir(options.output):
        error, data = stream.get_urldata()
        if error:
            log.error("Cant find that page")
            return False
        if data is None:
            return False
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            options.output_auto = True
            title_tag = decode_html_entities(match.group(1))
            if not options.output:
                options.output = filenamify(title_tag)
            else:
                # output is a directory
                options.output = os.path.join(options.output, filenamify(title_tag))

    if platform.system() == "Windows":
        # ugly hack. replace \ with / or add extra \ because c:\test\kalle.flv will add c:_tab_est\kalle.flv
        if options.output and options.output.find("\\") > 0:
            options.output = options.output.replace("\\", "/")
    return True
Exemple #8
0
    def outputfilename(self, data, filename, raw):
        directory = os.path.dirname(filename)
        if is_py2:
            id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7]
        else:
            id = hashlib.sha256(
                data["programVersionId"].encode("utf-8")).hexdigest()[:7]

        datatitle = re.search('data-title="([^"]+)"', self.get_urldata())
        if not datatitle:
            return None
        datat = decode_html_entities(datatitle.group(1))
        name = self.name(datat)
        episode = self.seasoninfo(datat)
        if is_py2:
            name = name.encode("utf8")
        if episode:
            title = "{0}.{1}-{2}-svtplay".format(name, episode, id)
        else:
            title = "{0}-{1}-svtplay".format(name, id)
        title = filenamify(title)
        if len(directory):
            output = os.path.join(directory, title)
        else:
            output = title
        return output
Exemple #9
0
    def sami(self, subdata):
        text = subdata.text
        if is_py2:
            text = text.encode("utf8")
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]

                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
                else:
                    subs += "\n%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
            else:
                if int(n) > 0 and i.text:
                    subs += "%s\n" % decode_html_entities(i.text)

        if is_py2:
            subs = subs.encode('utf8')
        subs = re.sub('&amp;', r'&', subs)
        return subs
Exemple #10
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        apiurl = None
        match = re.search('data-player-config="([^"]+)"', data)
        if not match:
            yield ServiceError("Can't find video info")
            return
        janson = json.loads(decode_html_entities(match.group(1)))

        videoId = janson["playerOptions"]["id"]
        apiurl = janson["playerOptions"]["api"]
        vendor = janson["playerOptions"]["vendor"]
        self.options.live = janson["live"]
        if not self.options.live:
            dataurl = "{}{}/assets/{}?appName=svp-player".format(
                apiurl, vendor, videoId)
            data = self.http.request("get", dataurl).text
            data = json.loads(data)

            streams = hlsparse(
                self.options,
                self.http.request("get", data["streamUrls"]["hls"]),
                data["streamUrls"]["hls"])
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #11
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search('="(https://www.expressen.se/tvspelare[^"]+)"', data)
        if not match:
            log.error("Can't find video id")
            return
        url = decode_html_entities(match.group(1))
        data = self.http.request("get", url)

        match = re.search("window.Player.settings = ({.*});", data.text)
        if not match:
            log.error("Can't find json info.")

        dataj = json.loads(match.group(1))
        if "streams" in dataj:
            if "iPad" in dataj["streams"]:
                streams = hlsparse(self.options, self.http.request("get", dataj["streams"]["iPad"]), dataj["streams"]["iPad"])
                for n in list(streams.keys()):
                    yield streams[n]
            if "hashHls" in dataj["streams"]:
                streams = hlsparse(self.options, self.http.request("get", dataj["streams"]["hashHls"]), dataj["streams"]["hashHls"])
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #12
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        apiurl = None
        match = re.search('data-player-config="([^"]+)"', data)
        if not match:
            yield ServiceError("Can't find video info")
            return
        janson = json.loads(decode_html_entities(match.group(1)))

        videoId = janson["playerOptions"]["id"]
        apiurl = janson["playerOptions"]["api"]
        vendor = janson["playerOptions"]["vendor"]
        self.options.live = janson["live"]
        if not self.options.live:
            dataurl = "{}{}/assets/{}?appName=svp-player".format(apiurl, vendor, videoId)
            data = self.http.request("get", dataurl).text
            data = json.loads(data)

            streams = hlsparse(self.options, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"])
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #13
0
    def get(self):
        if self.exclude():
            yield ServiceError("Excluding video")
            return

        vid = self.get_vid()
        if not vid:
            yield ServiceError("Can't find video id")
            return

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]

        url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format(decode_html_entities(vid))
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #14
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search('data-aptomaId="([-0-9a-z]+)"', data)
        if not match:
            match = re.search('data-player-config="([^"]+)"', data)
            if not match:
                yield ServiceError("Can't find video info")
                return
            janson = json.loads(decode_html_entities(match.group(1)))
            videoId = janson["videoId"]
        else:
            videoId = match.group(1)
            match = re.search(r'data-isLive="(\w+)"', data)
            if not match:
                yield ServiceError("Can't find live info")
                return
            if match.group(1) == "true":
                self.options.live = True

        if not self.options.live:
            dataurl = "http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json" % videoId
            data = self.http.request("get", dataurl).text
            data = json.loads(data)
            videoId = data["videoId"]

        streamsurl = "http://aftonbladet-play-static-ext.cdn.drvideo.aptoma.no/actions/video/?id=%s&formats&callback=" % videoId
        data = self.http.request("get", streamsurl).text
        streams = json.loads(data)
        hlsstreams = streams["formats"]["hls"]
        if "level3" in hlsstreams.keys():
            hls = hlsstreams["level3"]
        else:
            hls = hlsstreams["akamai"]
        if "csmil" in hls.keys():
            hls = hls["csmil"][0]
        else:
            hls = hls["m3u8"][0]
        address = hls["address"]
        path = hls["path"]

        for i in hls["files"]:
            if "filename" in i.keys():
                plist = "http://%s/%s/%s/master.m3u8" % (address, path,
                                                         i["filename"])
            else:
                plist = "http://%s/%s/%s" % (address, path, hls["filename"])

            streams = hlsparse(self.options, self.http.request("get", plist),
                               plist)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #15
0
    def get(self, options):
        data = self.get_urldata()

        if self.exclude(options):
            yield ServiceError("Excluding video")
            return

        match = re.search('data-aptomaId="([-0-9a-z]+)"', data)
        if not match:
            match = re.search('data-player-config="([^"]+)"', data)
            if not match:
                yield ServiceError("Can't find video info")
                return
            janson = json.loads(decode_html_entities(match.group(1)))
            videoId = janson["videoId"]
        else:
            videoId = match.group(1)
            match = re.search(r'data-isLive="(\w+)"', data)
            if not match:
                yield ServiceError("Can't find live info")
                return
            if match.group(1) == "true":
                options.live = True

        if not options.live:
            dataurl = "http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json" % videoId
            data = self.http.request("get", dataurl).text
            data = json.loads(data)
            videoId = data["videoId"]

        streamsurl = "http://aftonbladet-play-static-ext.cdn.drvideo.aptoma.no/actions/video/?id=%s&formats&callback=" % videoId
        data = self.http.request("get", streamsurl).text
        streams = json.loads(data)
        hlsstreams = streams["formats"]["hls"]
        if "level3" in hlsstreams.keys():
            hls = hlsstreams["level3"]
        else:
            hls = hlsstreams["akamai"]
        if "csmil" in hls.keys():
            hls = hls["csmil"][0]
        else:
            hls = hls["m3u8"][0]
        address = hls["address"]
        path = hls["path"]

        for i in hls["files"]:
            if "filename" in i.keys():
                plist = "http://%s/%s/%s/master.m3u8" % (address, path, i["filename"])
            else:
                plist = "http://%s/%s/%s" % (address, path, hls["filename"])

            streams = hlsparse(options, self.http.request("get", plist), plist)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]
Exemple #16
0
    def find_video_id(self):
        match = re.search('data-video-id="([^"]+)"', self.get_urldata())
        if match:
            return match.group(1)
        parse = urlparse(self.url)
        query = parse_qs(parse.query)
        match = re.search("/video/([0-9]+)/", parse.path)
        if match:
            return match.group(1)
        match = re.search("/klipp/([0-9]+)/", parse.path)
        if match:
            return match.group(1)
        match = re.search("data-video-id='([^']+)'", self.get_urldata())
        if match:
            return match.group(1)
        match = re.search("/videoEpisod-([^/]+)/", parse.path)
        if not match:
            match = re.search(r'data-id="(\d+)-', self.get_urldata())
        vid = None
        if match:
            vid = match.group(1)
        if not vid:
            for i in query.keys():
                if i == "articleId":
                    vid = query["articleId"][0]
                    break
        if vid:
            vtype = None
            for i in ["video", "klipp"]:
                url = "http://www.svtplay.se/%s/%s/" % (i, vid)
                data = self.http.request("get", url)
                if data.status_code == 200:
                    vtype = i
                    break
            if vtype:
                self._url = "http://www.svtplay.se/%s/%s/" % (vtype, vid)
                self._urldata = None
                self.get_urldata()
                return self.find_video_id()
        if not match:
            match = re.search(r'src="(//www.svt.se/wd?[^"]+)"',
                              self.get_urldata())
            if match:
                self._urldata = None
                self._url = "http:%s" % decode_html_entities(match.group(1))
                self.get_urldata()
                return self.find_video_id()

        return None
Exemple #17
0
    def find_video_id(self):
        match = re.search('data-video-id="([^"]+)"', self.get_urldata())
        if match:
            return match.group(1)
        parse = urlparse(self.url)
        query = parse_qs(parse.query)
        match = re.search("/video/([0-9]+)/", parse.path)
        if match:
            return match.group(1)
        match = re.search("/klipp/([0-9]+)/", parse.path)
        if match:
            return match.group(1)
        match = re.search("data-video-id='([^']+)'", self.get_urldata())
        if match:
            return match.group(1)
        match = re.search("/videoEpisod-([^/]+)/", parse.path)
        if not match:
            match = re.search(r'data-id="(\d+)-', self.get_urldata())
        vid = None
        if match:
            vid = match.group(1)
        if not vid:
            for i in query.keys():
                if i == "articleId":
                    vid = query["articleId"][0]
                    break
        if vid:
            vtype = None
            for i in ["video", "klipp"]:
                url = "http://www.svtplay.se/%s/%s/" % (i, vid)
                data = self.http.request("get", url)
                if data.status_code == 200:
                    vtype = i
                    break
            if vtype:
                self._url = "http://www.svtplay.se/%s/%s/" % (vtype, vid)
                self._urldata = None
                self.get_urldata()
                return self.find_video_id()
        if not match:
            match = re.search(r'src="(//www.svt.se/wd?[^"]+)"', self.get_urldata())
            if match:
                self._urldata = None
                self._url = "http:%s" % decode_html_entities(match.group(1))
                self.get_urldata()
                return self.find_video_id()

        return None
Exemple #18
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                pass
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i)
                hour1 = int(matchx.group(1))
                hour2 = int(matchx.group(4))
                if int(number) == 1:
                    if hour1 > 9:
                        subtract = True
                if subtract:
                    hour1 -= 10
                    hour2 -= 10
                time = "%s:%s:%s --> %s:%s:%s\n" % (hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt+="\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Exemple #19
0
 def smi(self, subdata):
     if requests_version < 0x20300:
         if is_py2:
             subdata = subdata.content
         else:
             subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<(?!\/?i).*?>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea),
                                                  timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('',
                                                data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs))
     if is_py2 and isinstance(text, unicode):
         return text.encode("utf-8")
     return text
Exemple #20
0
def filename(stream):
    if stream.options.output:
        if is_py2:
            if platform.system() == "Windows":
                stream.options.output = stream.options.output.decode("latin1")
            else:
                stream.options.output = stream.options.output.decode("utf-8")
    if not stream.options.output or os.path.isdir(stream.options.output):
        data = ensure_unicode(stream.get_urldata())
        if data is None:
            return False
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            stream.options.output_auto = True
            title_tag = decode_html_entities(match.group(1))
            if not stream.options.output:
                stream.options.output = filenamify(title_tag)
            else:
                # output is a directory
                stream.options.output = os.path.join(stream.options.output, filenamify(title_tag))

    return True
Exemple #21
0
def filename(stream):
    if stream.options.output:
        if is_py2:
            if platform.system() == "Windows":
                stream.options.output = stream.options.output.decode("latin1")
            else:
                stream.options.output = stream.options.output.decode("utf-8")
    if not stream.options.output or os.path.isdir(stream.options.output):
        data = ensure_unicode(stream.get_urldata())
        if data is None:
            return False
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            stream.options.output_auto = True
            title_tag = decode_html_entities(match.group(1))
            if not stream.options.output:
                stream.options.output = filenamify(title_tag)
            else:
                # output is a directory
                stream.options.output = os.path.join(stream.options.output, filenamify(title_tag))

    return True
Exemple #22
0
 def smi(self, subdata):
     if requests_version < 0x20300:
         if is_py2:
             subdata = subdata.content
         else:
             subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<(?!\/?i).*?>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs))
     if is_py2 and isinstance(text, unicode):
         return text.encode("utf-8")
     return text
Exemple #23
0
    def get(self):
        vid = None
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search(r'video url-([^"]+)', data)
        if not match:
            match = re.search(r'embed.jsp\?([^"]+)"', self.get_urldata())
            if not match:
                yield ServiceError("Can't find video id")
                return
            vid = match.group(1)
        if not vid:
            path = unquote_plus(match.group(1))
            data = self.http.request("get",
                                     "http://www.svd.se%s" % path).content
            match = re.search(r'embed.jsp\?([^"]+)', data)
            if not match:
                yield ServiceError("Can't find video id")
                return
            vid = match.group(1)

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(
            vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        videos = jdata["videos"][0]["media"]["streams"]
        for i in videos:
            if i["name"] == "auto":
                hls = "%s%s" % (jdata["videos"][0]["media"]["base"], i["url"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        if streams:
            for n in list(streams.keys()):
                yield streams[n]
Exemple #24
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),
                                                                      hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00',
                              '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'}
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Exemple #25
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                pass
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)',
                    i)
                hour1 = int(matchx.group(1))
                hour2 = int(matchx.group(4))
                if int(number) == 1:
                    if hour1 > 9:
                        subtract = True
                if subtract:
                    hour1 -= 10
                    hour2 -= 10
                time = "%s:%s:%s --> %s:%s:%s\n" % (
                    hour1, matchx.group(2), matchx.group(3).replace(".", ","),
                    hour2, matchx.group(5), matchx.group(6).replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)

                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt
Exemple #26
0
    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)',
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)',
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(
                    hour1, matchx.group("m1"),
                    matchx.group("s1").replace(".", ","), hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.options.convert_subtitle_colors:
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        if is_py2:
            return srt.encode("utf-8")
        return srt