예제 #1
0
 def test_parse_html(self):
     assert validate(
         parse_html(),
         '<!DOCTYPE html><body>&quot;perfectly&quot;<a>valid<div>HTML'
     ).tag == "html"
     with self.assertRaises(ValueError) as cm:
         validate(parse_html(), None)
     assert str(cm.exception
                ) == "Unable to parse HTML: can only parse strings (None)"
예제 #2
0
    def niconico_web_login(self):
        user_session = self.get_option("user-session")
        email = self.get_option("email")
        password = self.get_option("password")

        if user_session is not None:
            log.info("Logging in via provided user session cookie")
            self.session.http.cookies.set(
                "user_session",
                user_session,
                path="/",
                domain="nicovideo.jp"
            )
            self.save_cookies()

        elif self.session.http.cookies.get("user_session"):
            log.info("Logging in via cached user session cookie")

        elif email is not None and password is not None:
            log.info("Logging in via provided email and password")
            root = self.session.http.post(
                self.LOGIN_URL,
                data={"mail_tel": email, "password": password},
                params=self.LOGIN_URL_PARAMS,
                schema=validate.Schema(validate.parse_html()))

            input_with_value = {}
            for elem in root.xpath(".//input"):
                if elem.attrib.get("value"):
                    input_with_value[elem.attrib.get("name")] = elem.attrib.get("value")
                else:
                    if elem.attrib.get("id") == "oneTimePw":
                        maxlength = int(elem.attrib.get("maxlength"))
                        try:
                            oneTimePw = self.input_ask("Enter the 6 digit number included in email")
                            if len(oneTimePw) > maxlength:
                                log.error("invalid user input")
                                return
                        except FatalPluginError:
                            return
                        input_with_value[elem.attrib.get("name")] = oneTimePw
                    else:
                        log.debug(f"unknown input: {elem.attrib.get('name')}")

            root = self.session.http.post(
                urljoin("https://account.nicovideo.jp", root.xpath("string(.//form[@action]/@action)")),
                data=input_with_value,
                schema=validate.Schema(validate.parse_html()))
            log.debug(f"Cookies: {self.session.http.cookies.get_dict()}")
            if self.session.http.cookies.get("user_session") is None:
                error = root.xpath("string(//div[@class='formError']/div/text())")
                log.warning(f"Login failed: {error or 'unknown reason'}")
            else:
                log.info("Logged in.")
                self.save_cookies()
예제 #3
0
    def __init__(self, url: str):
        super().__init__(url)
        self._json_data_re = re.compile(r'teliaPlayer\((\{.*?\})\);',
                                        re.DOTALL)

        self.main_page_schema = validate.Schema(
            validate.parse_html(),
            validate.xml_xpath_string(
                ".//iframe[contains(@src, 'ltv.lsm.lv/embed')][1]/@src"),
            validate.url())

        self.embed_code_schema = validate.Schema(
            validate.parse_html(),
            validate.xml_xpath_string(".//live[1]/@*[name()=':embed-data']"),
            str,
            validate.parse_json(),
            {"source": {
                "embed_code": str
            }},
            validate.get(("source", "embed_code")),
            validate.parse_html(),
            validate.xml_xpath_string(".//iframe[@src][1]/@src"),
        )

        self.player_apicall_schema = validate.Schema(
            validate.transform(self._json_data_re.search),
            validate.any(
                None,
                validate.all(
                    validate.get(1),
                    validate.transform(lambda s: s.replace("'", '"')),
                    validate.transform(
                        lambda s: re.sub(r",\s*\}", "}", s, flags=re.DOTALL)),
                    validate.parse_json(), {"channel": str},
                    validate.get("channel"))))

        self.sources_schema = validate.Schema(
            validate.parse_json(), {
                "source": {
                    "sources":
                    validate.all([{
                        "type": str,
                        "src": validate.url()
                    }],
                                 validate.filter(lambda src: src["type"] ==
                                                 "application/x-mpegURL"),
                                 validate.map(lambda src: src.get("src"))),
                }
            }, validate.get(("source", "sources")))
예제 #4
0
    def _get_streams(self):
        self.id, self.title = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.union((
                    validate.xml_xpath_string(
                        ".//script[@class='dacast-video'][@id]/@id"),
                    validate.xml_xpath_string(".//head/title[1]/text()"),
                ))))

        if not self.id:
            return

        if re.match(r"\w+_\w+_\w+", self.id):
            provider = "dacast"
        else:
            provider = "universe"

        data = self.session.http.get(
            f"https://playback.dacast.com/content/access?contentId={self.id}&provider={provider}",
            acceptable_status=(200, 400, 403, 404),
            schema=validate.Schema(
                validate.parse_json(),
                validate.any(
                    {"error": str},
                    {"hls": validate.url()},
                )))

        if data.get("error"):
            log.error(data["error"])
            return

        return HLSStream.parse_variant_playlist(self.session, data["hls"])
예제 #5
0
    def _get_streams(self):
        try:
            hls = self.session.http.get(self.url, schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(".//script[@type='application/json'][@id='__NEXT_DATA__']/text()"),
                str,
                validate.parse_json(),
                {
                    "props": {
                        "pageProps": {
                            "type": "live",
                            "url": validate.all(
                                str,
                                validate.transform(lambda url: url.replace("https:////", "https://")),
                                validate.url(path=validate.endswith(".m3u8")),
                            )
                        }
                    }
                },
                validate.get(("props", "pageProps", "url")),
            ))
        except PluginError:
            return

        return HLSStream.parse_variant_playlist(self.session, hls)
예제 #6
0
    def _get_streams(self):
        root = self.session.http.get(self.url,
                                     schema=validate.Schema(
                                         validate.parse_html()))

        video_id = root.xpath(
            "string(.//div[@data-provider='dvideo'][@data-id][1]/@data-id)")
        if video_id:
            return self._get_streams_api(str(video_id))

        yt_id = root.xpath(
            "string(.//script[contains(@src,'/yt.js')][@data-video]/@data-video)"
        )
        if yt_id:
            return self.session.streams(
                f"https://www.youtube.com/watch?v={yt_id}")

        yt_iframe = root.xpath(
            "string(.//iframe[starts-with(@src,'https://www.youtube.com/')][1]/@src)"
        )
        if yt_iframe:
            return self.session.streams(str(yt_iframe))

        delfi = root.xpath(
            "string(.//iframe[@name='delfi-stream'][@src][1]/@src)")
        if delfi:
            return self._get_streams_delfi(str(delfi))
예제 #7
0
    def follow_vk_redirect(self):
        if self._has_video_id():
            return

        try:
            parsed_url = urlparse(self.url)
            true_path = next(
                unquote(v).split("/")[0]
                for k, v in parse_qsl(parsed_url.query)
                if k == "z" and len(v) > 0)
            self.url = f"{parsed_url.scheme}://{parsed_url.netloc}/{true_path}"
            if self._has_video_id():
                return
        except StopIteration:
            pass

        try:
            self.url = self.session.http.get(
                self.url,
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_xpath_string(
                        ".//head/meta[@property='og:url'][@content]/@content"),
                    str))
        except PluginError:
            pass
        if self._has_video_id():
            return

        raise NoStreamsError(self.url)
예제 #8
0
    def _get_live_streams(self):
        video_id = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(
                    ".//div[@data-google-src]/@data-video-id")))

        if video_id:
            return self.session.streams(
                f"https://www.youtube.com/watch?v={video_id}")

        info_url = self.session.http.get(
            self.API_URL.format(subdomain=self.match.group("subdomain")),
            schema=validate.Schema(
                validate.parse_json(), {"url": validate.url()},
                validate.get("url"),
                validate.transform(
                    lambda url: update_scheme("https://", url))))
        hls_url = self.session.http.get(info_url,
                                        schema=validate.Schema(
                                            validate.parse_json(), {
                                                "status": "ok",
                                                "protocol": "hls",
                                                "primary": validate.url()
                                            }, validate.get("primary")))

        return HLSStream.parse_variant_playlist(self.session, hls_url)
예제 #9
0
 def login_csrf(self):
     return self.session.http.get(
         self.login_url,
         schema=validate.Schema(
             validate.parse_html(),
             validate.xml_xpath_string(
                 ".//input[@name='{0}'][1]/@value".format(self.CSRF_NAME))))
예제 #10
0
    def _get_streams_delfi(self, src):
        try:
            data = self.session.http.get(src, schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"),
                validate.any(None, validate.all(
                    validate.text,
                    validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search),
                    validate.any(None, validate.all(
                        validate.get(1),
                        validate.transform(lambda url: parse_qsd(urlparse(url).fragment)),
                        {"stream": validate.text},
                        validate.get("stream"),
                        validate.parse_json(),
                        {"versions": [{
                            "hls": validate.text
                        }]},
                        validate.get("versions")
                    ))
                ))
            ))
        except PluginError:
            log.error("Failed to get streams from iframe")
            return

        for stream in data:
            src = update_scheme("https://", stream["hls"], force=False)
            for s in HLSStream.parse_variant_playlist(self.session, src).items():
                yield s
예제 #11
0
    def get_wss_api_url(self):
        try:
            data = self.session.http.get(
                self.url,
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_find(
                        ".//script[@id='embedded-data'][@data-props]"),
                    validate.get("data-props"), validate.parse_json(), {
                        "site": {
                            "relive": {
                                "webSocketUrl": validate.url(scheme="wss")
                            },
                            validate.optional("frontendId"): int
                        }
                    }, validate.get("site"),
                    validate.union_get(("relive", "webSocketUrl"),
                                       "frontendId")))
        except PluginError:
            return

        wss_api_url, frontend_id = data
        if frontend_id is not None:
            wss_api_url = update_qsd(wss_api_url, {"frontend_id": frontend_id})

        return wss_api_url
예제 #12
0
 def _get_streams(self):
     self.title, hls_url = self.session.http.get(
         self.url,
         schema=validate.Schema(
             validate.parse_html(),
             validate.xml_xpath_string(".//script[contains(text(), 'HLS')]/text()"),
             validate.any(None, validate.all(
                 validate.transform(self._re_content.search),
                 validate.any(None, validate.all(
                     validate.get(1),
                     validate.parse_json(),
                     {str: {"children": {"top": {"model": {"videos": [{
                         "title": str,
                         "sources": validate.all(
                             [{"url": str, "type": str}],
                             validate.filter(lambda p: p["type"].lower() == "hls"),
                             validate.get((0, "url")))
                     }]}}}}},
                     validate.transform(lambda k: next(iter(k.values()))),
                     validate.get(("children", "top", "model", "videos", 0)),
                     validate.union_get("title", "sources")
                 ))
             ))
         )
     )
     return HLSStream.parse_variant_playlist(self.session, urljoin(self.url, hls_url))
예제 #13
0
    def _get_streams(self):
        root = self.session.http.get(self.url,
                                     schema=validate.Schema(
                                         validate.parse_html()))

        # https://www.ntv.com.tr/canli-yayin/ntv?youtube=true
        yt_iframe = root.xpath(
            "string(.//iframe[contains(@src,'youtube.com')][1]/@src)")
        # https://www.startv.com.tr/canli-yayin
        dm_iframe = root.xpath(
            "string(.//iframe[contains(@src,'dailymotion.com')][1]/@src)")
        # https://www.kralmuzik.com.tr/tv/kral-tv
        # https://www.kralmuzik.com.tr/tv/kral-pop-tv
        yt_script = root.xpath(
            "string(.//script[contains(text(), 'youtube.init')][1]/text())")
        if yt_script:
            m = self._re_yt_script.search(yt_script)
            if m:
                yt_iframe = "https://www.youtube.com/watch?v={0}".format(
                    m.group(1))

        iframe = yt_iframe or dm_iframe
        if iframe:
            return self.session.streams(iframe)

        # http://eurostartv.com.tr/canli-izle
        dd_script = root.xpath(
            "string(.//script[contains(text(), '/live/hls/')][1]/text())")
        if dd_script:
            m = self._re_live_hls.search(dd_script)
            if m:
                return HLSStream.parse_variant_playlist(
                    self.session, m.group(1))
예제 #14
0
 def test_failure(self):
     with pytest.raises(validate.ValidationError) as cm:
         validate.validate(validate.parse_html(), None)
     assert_validationerror(
         cm.value, """
         ValidationError:
           Unable to parse HTML: can only parse strings (None)
     """)
예제 #15
0
 def _schema_canonical(self, data):
     schema_canonical = validate.Schema(
         validate.parse_html(),
         validate.xml_xpath_string(".//link[@rel='canonical'][1]/@href"),
         validate.transform(self.matcher.match),
         validate.get("video_id")
     )
     return schema_canonical.validate(data)
예제 #16
0
    def _get_streams(self):
        root = self.session.http.get(self.url,
                                     schema=validate.Schema(
                                         validate.parse_html()))

        return (self._streams_brightcove(root)
                or self._streams_dailymotion(root)
                or self._streams_brightcove_js(root)
                or self._streams_audio(root))
예제 #17
0
    def _get_streams(self):
        re_room_id = re.compile(
            r"share_url:\"https:[^?]+?\?room_id=(?P<room_id>\d+)\"")
        room_id = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(
                    ".//script[contains(text(),'share_url:\"https:')][1]/text()"
                ),
                validate.any(
                    None,
                    validate.all(validate.transform(re_room_id.search),
                                 validate.any(None,
                                              validate.get("room_id"))))))
        if not room_id:
            return

        live_status, self.title = self.session.http.get(
            "https://www.showroom-live.com/api/live/live_info",
            params={"room_id": room_id},
            schema=validate.Schema(
                validate.parse_json(), {
                    "live_status": int,
                    "room_name": str,
                }, validate.union_get(
                    "live_status",
                    "room_name",
                )))
        if live_status != self.LIVE_STATUS:
            log.info("This stream is currently offline")
            return

        url = self.session.http.get(
            "https://www.showroom-live.com/api/live/streaming_url",
            params={
                "room_id": room_id,
                "abr_available": 1,
            },
            schema=validate.Schema(
                validate.parse_json(), {
                    "streaming_url_list": [{
                        "type": str,
                        "url": validate.url(),
                    }]
                }, validate.get("streaming_url_list"),
                validate.filter(lambda p: p["type"] == "hls_all"),
                validate.get((0, "url"))),
        )

        res = self.session.http.get(url, acceptable_status=(200, 403, 404))
        if res.headers["Content-Type"] != "application/x-mpegURL":
            log.error("This stream is restricted")
            return

        return HLSStream.parse_variant_playlist(self.session, url)
예제 #18
0
    def get_live(self, username):
        netloc = self.session.http.get(self.url, schema=validate.Schema(
            validate.parse_html(),
            validate.xml_xpath_string(".//script[contains(@src,'/stream/player.js')][1]/@src"),
            validate.any(None, validate.transform(lambda src: urlparse(src).netloc))
        ))
        if not netloc:
            log.error("Could not find server netloc")
            return

        channel, multistreams = self.session.http.get(self.API_URL_LIVE.format(username=username), schema=validate.Schema(
            validate.parse_json(),
            {
                "channel": validate.any(None, {
                    "stream_name": str,
                    "title": str,
                    "online": bool,
                    "private": bool,
                    "categories": [{"label": str}],
                }),
                "getMultiStreams": validate.any(None, {
                    "multistream": bool,
                    "streams": [{
                        "name": str,
                        "online": bool,
                    }],
                }),
            },
            validate.union_get("channel", "getMultiStreams")
        ))
        if not channel or not multistreams:
            log.debug("Missing channel or streaming data")
            return

        log.trace(f"netloc={netloc!r}")
        log.trace(f"channel={channel!r}")
        log.trace(f"multistreams={multistreams!r}")

        if not channel["online"]:
            log.error("User is not online")
            return

        if channel["private"]:
            log.info("This is a private stream")
            return

        self.author = username
        self.category = channel["categories"][0]["label"]
        self.title = channel["title"]

        hls_url = self.HLS_URL.format(
            netloc=netloc,
            file_name=channel["stream_name"]
        )

        return HLSStream.parse_variant_playlist(self.session, hls_url)
예제 #19
0
    def _get_streams(self):
        self.session.http.headers.update(
            {"Referer": "https://tviplayer.iol.pt/"})
        data = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(
                    ".//script[contains(text(),'.m3u8')]/text()"),
                validate.text, validate.transform(self._re_jsonData.search),
                validate.any(
                    None,
                    validate.all(
                        validate.get("json"), validate.parse_json(), {
                            "id":
                            validate.text,
                            "liveType":
                            validate.text,
                            "videoType":
                            validate.text,
                            "videoUrl":
                            validate.url(path=validate.endswith(".m3u8")),
                            validate.optional("channel"):
                            validate.text,
                        }))))
        if not data:
            return
        log.debug("{0!r}".format(data))

        if data["liveType"].upper() == "DIRETO" and data["videoType"].upper(
        ) == "LIVE":
            geo_path = "live"
        else:
            geo_path = "vod"
        data_geo = self.session.http.get(
            "https://services.iol.pt/direitos/rights/{0}?id={1}".format(
                geo_path, data['id']),
            acceptable_status=(200, 403),
            schema=validate.Schema(
                validate.parse_json(), {
                    "code": validate.text,
                    "error": validate.any(None, validate.text),
                    "detail": validate.text,
                }))
        log.debug("{0!r}".format(data_geo))
        if data_geo["detail"] != "ok":
            log.error("{0}".format(data_geo['detail']))
            return

        wmsAuthSign = self.session.http.get(
            "https://services.iol.pt/matrix?userId=",
            schema=validate.Schema(validate.text))
        hls_url = update_qsd(data["videoUrl"], {"wmsAuthSign": wmsAuthSign})
        return HLSStream.parse_variant_playlist(self.session, hls_url)
예제 #20
0
 def _find_steamid(self, url):
     return self.session.http.get(
         url,
         schema=validate.Schema(
             validate.parse_html(),
             validate.xml_xpath_string(
                 ".//div[@id='webui_config']/@data-broadcast"),
             validate.any(
                 None,
                 validate.all(str, validate.parse_json(), {"steamid": str},
                              validate.get("steamid")))))
예제 #21
0
    def _get_streams(self):
        try:
            data_url = self.session.http.get(
                self.url,
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_find(".//*[@data-ctrl-player]"),
                    validate.get("data-ctrl-player"),
                    validate.transform(lambda s: s.replace("'", "\"")),
                    validate.parse_json(), {"url": validate.text},
                    validate.get("url")))
        except PluginError:
            return

        data_url = urljoin(self._URL_DATA_BASE, data_url)
        log.debug("Player URL: '{0}'", data_url)

        self.title, media = self.session.http.get(
            data_url,
            schema=validate.Schema(
                validate.parse_json(name="MEDIAINFO"), {
                    "mc": {
                        validate.optional("_title"):
                        validate.text,
                        "_mediaArray": [
                            validate.all(
                                {
                                    "_mediaStreamArray": [
                                        validate.all(
                                            {
                                                "_quality":
                                                validate.any(
                                                    validate.text, int),
                                                "_stream": [validate.url()],
                                            },
                                            validate.union_get(
                                                "_quality", ("_stream", 0)))
                                    ]
                                }, validate.get("_mediaStreamArray"),
                                validate.transform(dict))
                        ]
                    }
                }, validate.get("mc"),
                validate.union_get("_title", ("_mediaArray", 0))))

        if media.get("auto"):
            for s in HLSStream.parse_variant_playlist(
                    self.session, media.get("auto")).items():
                yield s
        else:
            for quality, stream in media.items():
                yield self._QUALITY_MAP.get(quality, quality), HTTPStream(
                    self.session, stream)
예제 #22
0
    def _get_streams(self):
        hls_url = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(
                    ".//video/source[@src][@type='application/x-mpegURL'][1]/@src"
                )))
        if not hls_url:
            return

        return HLSStream.parse_variant_playlist(self.session, hls_url)
예제 #23
0
    def _get_streams(self):
        root = self.session.http.get(self.url,
                                     schema=validate.Schema(
                                         validate.parse_html()))
        player_type = root.xpath(
            "string(.//input[@type='hidden'][@name='player_type'][1]/@value)")

        if player_type == "dwlivestream":
            return self._get_live_streams(root)
        elif player_type == "video":
            return self._get_vod_streams(root)
        elif player_type == "audio":
            return self._get_audio_streams(root)
예제 #24
0
    def get_hls_url(self):
        self.session.http.cookies.clear()
        url_parts = self.session.http.get(
            url=self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(".//iframe[contains(@src,'embed')]/@src")))
        if not url_parts:
            raise NoStreamsError("Missing url_parts")

        log.trace(f"url_parts={url_parts}")
        self.session.http.headers.update({"Referer": self.url})

        try:
            url_ovva = self.session.http.get(
                url=urljoin(self.url, url_parts),
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_xpath_string(".//script[@type='text/javascript'][contains(text(),'ovva-player')]/text()"),
                    str,
                    validate.transform(self._re_data.search),
                    validate.get(1),
                    validate.transform(lambda x: b64decode(x).decode()),
                    validate.parse_json(),
                    {"balancer": validate.url()},
                    validate.get("balancer")
                ))
        except (PluginError, TypeError) as err:
            log.error(f"ovva-player: {err}")
            return

        log.debug(f"url_ovva={url_ovva}")
        url_hls = self.session.http.get(
            url=url_ovva,
            schema=validate.Schema(
                validate.transform(lambda x: x.split("=")),
                ["302", validate.url(path=validate.endswith(".m3u8"))],
                validate.get(1)))
        return url_hls
예제 #25
0
    def get_channels(self):
        data = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath(
                    ".//*[contains(@class,'channel-list')]//a[@data-id][@data-code]"
                ),
                [
                    validate.union_get("data-id", "data-code"),
                ],
            ))

        return {k: v for k, v in data}
예제 #26
0
    def _get_streams(self):
        data = self.session.http.get(
            self.url,
            schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(
                    ".//script[@id='js-live-data'][@data-json]/@data-json"),
                validate.any(
                    None,
                    validate.all(
                        validate.parse_json(),
                        {
                            "is_live": int,
                            "room_id": int,
                            validate.optional("room"): {
                                "content_region_permission": int,
                                "is_free": int
                            }
                        },
                    ))))
        if not data:  # URL without livestream
            return

        log.debug(f"{data!r}")
        if data["is_live"] != 1:
            log.info("This stream is currently offline")
            return

        url = self.session.http.get(
            "https://www.showroom-live.com/api/live/streaming_url",
            params={
                "room_id": data["room_id"],
                "abr_available": 1
            },
            schema=validate.Schema(
                validate.parse_json(), {
                    "streaming_url_list": [{
                        "type": str,
                        "url": validate.url(),
                    }]
                }, validate.get("streaming_url_list"),
                validate.filter(lambda p: p["type"] == "hls_all"),
                validate.get((0, "url"))),
        )
        res = self.session.http.get(url, acceptable_status=(200, 403, 404))
        if res.headers["Content-Type"] != "application/x-mpegURL":
            log.error("This stream is restricted")
            return
        return ShowroomHLSStream.parse_variant_playlist(self.session, url)
예제 #27
0
    def _parse_streams(self, res):
        stream_url = validate.Schema(
            validate.parse_html(),
            validate.xml_xpath_string(
                ".//head/meta[@property='og:video:url'][@content][1]/@content")
        ).validate(res.text)
        if not stream_url:
            log.debug("No meta og:video:url")
        else:
            if ".mpd" in stream_url:
                for s in DASHStream.parse_manifest(self.session,
                                                   stream_url).items():
                    yield s
                return
            elif ".mp4" in stream_url:
                yield "vod", HTTPStream(self.session, stream_url)
                return

        for match in self._src_re.finditer(res.text):
            stream_url = match.group("url")
            if "\\/" in stream_url:
                # if the URL is json encoded, decode it
                stream_url = parse_json("\"{}\"".format(stream_url))
            if ".mpd" in stream_url:
                for s in DASHStream.parse_manifest(self.session,
                                                   stream_url).items():
                    yield s
            elif ".mp4" in stream_url:
                yield match.group(1), HTTPStream(self.session, stream_url)
            else:
                log.debug("Non-dash/mp4 stream: {0}".format(stream_url))

        match = self._dash_manifest_re.search(res.text)
        if match:
            # facebook replaces "<" characters with the substring "\\x3C"
            manifest = match.group("manifest").replace("\\/", "/")
            if is_py3:
                manifest = bytes(unquote_plus(manifest),
                                 "utf-8").decode("unicode_escape")
            else:
                manifest = unquote_plus(manifest).decode("string_escape")
            # Ignore unsupported manifests until DASH SegmentBase support is implemented
            if "SegmentBase" in manifest:
                log.error("Skipped DASH manifest with SegmentBase streams")
            else:
                for s in DASHStream.parse_manifest(self.session,
                                                   manifest).items():
                    yield s
예제 #28
0
    def _get_streams(self):
        root = self.session.http.get(self.url,
                                     schema=validate.Schema(
                                         validate.parse_html()))

        for needle, errormsg in (
            (
                "This service is not available in your Country",
                "The content is not available in your region",
            ),
            (
                "Silahkan login Menggunakan akun MyIndihome dan berlangganan minipack",
                "The content is not available without a subscription",
            ),
        ):
            if validate.Schema(
                    validate.xml_xpath(
                        """.//script[contains(text(), '"{0}"')]""".format(
                            needle))).validate(root):
                log.error(errormsg)
                return

        url = validate.Schema(
            validate.any(
                validate.all(
                    validate.xml_xpath_string("""
                        .//script[contains(text(), 'laylist.m3u8') or contains(text(), 'manifest.mpd')][1]/text()
                    """),
                    validate.text,
                    validate.transform(
                        re.compile(
                            r"""(?P<q>['"])(?P<url>https://.*?/(?:[Pp]laylist\.m3u8|manifest\.mpd).+?)(?P=q)"""
                        ).search),
                    validate.any(
                        None, validate.all(validate.get("url"),
                                           validate.url())),
                ),
                validate.all(
                    validate.xml_xpath_string(
                        ".//video[@id='video-player']/source/@src"),
                    validate.any(None, validate.url()),
                ),
            )).validate(root)

        if url and ".m3u8" in url:
            return HLSStream.parse_variant_playlist(self.session, url)
        elif url and ".mpd" in url:
            return DASHStream.parse_manifest(self.session, url)
예제 #29
0
    def _get_vod_stream(self):
        root = self.session.http.get(self.url, schema=validate.Schema(
            validate.parse_html()
        ))

        video_url = root.xpath("string(.//meta[@property='og:video'][1]/@content)")
        if video_url:
            return dict(vod=HTTPStream(self.session, video_url))

        video_id = root.xpath("string(.//div[@data-google-src]/@data-video-id)")
        if video_id:
            return self.session.streams(f"https://www.youtube.com/watch?v={video_id}")

        video_url = root.xpath("string(.//iframe[@id='pfpPlayer'][starts-with(@src,'https://www.youtube.com/')][1]/@src)")
        if video_url:
            return self.session.streams(video_url)
예제 #30
0
 def _get_streams(self):
     hls_url, self.title = self.session.http.get(
         self.url,
         schema=validate.Schema(
             validate.parse_html(),
             validate.union((
                 validate.xml_xpath_string(
                     ".//video/source[@src][@type='application/x-mpegURL'][1]/@src"
                 ),
                 validate.xml_xpath_string(".//head/title[1]/text()"),
             ))))
     if not hls_url:
         return
     return HLSStream.parse_variant_playlist(self.session,
                                             hls_url,
                                             headers={"Referer": self.url})