Example #1
0
    def _get_streams(self):
        self.session.http.headers.update({
           "Referer": self.url,
           "User-Agent": useragents.FIREFOX
        })

        iframe_url = None
        res = self.session.http.get(self.url)
        for iframe in itertags(res.text, "iframe"):
            if "embed.lsm.lv" in iframe.attributes.get("src"):
                iframe_url = iframe.attributes.get("src")
                break

        if not iframe_url:
            log.error("Could not find player iframe")
            return

        log.debug("Found iframe: {0}".format(iframe_url))
        res = self.session.http.get(iframe_url)
        for source in itertags(res.text, "source"):
            if source.attributes.get("src"):
                stream_url = source.attributes.get("src")
                url_path = urlparse(stream_url).path
                if url_path.endswith(".m3u8"):
                    for s in HLSStream.parse_variant_playlist(self.session,
                                                              stream_url).items():
                        yield s
                else:
                    log.debug("Not used URL path: {0}".format(url_path))
Example #2
0
    def _get_source_streams(self):
        res = self.session.http.get(self.url)

        for atag in itertags(res.text, 'a'):
            if "video-play__link" in atag.attributes.get("class", ""):
                href = urljoin(self.url, atag.attributes.get("href"))
                log.debug("Loading embedded video page")
                vpage = self.session.http.get(href, params=dict(ajax="true", npo_cc_skip_wall="true"))
                for source in itertags(vpage.text, 'source'):
                    return HLSStream.parse_variant_playlist(self.session, source.attributes.get("src"))
Example #3
0
    def _get_streams(self):
        """
        Find the streams for vk.com
        :return:
        """
        self.session.http.headers.update({'User-Agent': useragents.IPHONE_6})

        # If this is a 'videos' catalog URL
        # with an video ID in the GET request, get that instead
        url = self.follow_vk_redirect(self.url)

        m = self._url_re.match(url)
        if not m:
            log.error('URL is not compatible: {0}'.format(url))
            return

        video_id = m.group('video_id')
        log.debug('video ID: {0}'.format(video_id))

        params = {
            'act': 'show_inline',
            'al': '1',
            'video': video_id,
        }
        res = self.session.http.post(self.API_URL, params=params)

        for _i in itertags(res.text, 'iframe'):
            if _i.attributes.get('src'):
                iframe_url = update_scheme(self.url, _i.attributes['src'])
                log.debug('Found iframe: {0}'.format(iframe_url))
                for s in self.session.streams(iframe_url).items():
                    yield s

        for _i in itertags(res.text, 'source'):
            if _i.attributes.get('type') == 'application/vnd.apple.mpegurl':
                video_url = _i.attributes['src']
                # Remove invalid URL
                if video_url.startswith('https://vk.com/'):
                    continue
                streams = HLSStream.parse_variant_playlist(self.session,
                                                           video_url)
                if not streams:
                    yield 'live', HLSStream(self.session, video_url)
                else:
                    for s in streams.items():
                        yield s
            elif _i.attributes.get('type') == 'video/mp4':
                q = 'vod'
                video_url = _i.attributes['src']
                m = self._vod_quality_re.search(video_url)
                if m:
                    q = '{0}p'.format(m.group(1))
                yield q, HTTPStream(self.session, video_url)
Example #4
0
    def _get_streams(self):
        self.session.http.headers.update({'User-Agent': useragents.FIREFOX})

        iframe_url = None
        page = self.session.http.get(self.url)
        for a in itertags(page.text, 'a'):
            if a.attributes.get('class') == 'play-live':
                iframe_url = update_scheme(self.url, a.attributes['data-url'])
                break

        if not iframe_url:
            raise PluginError('Could not find iframe.')

        parsed = urlparse(iframe_url)
        path_list = parsed.path.split('/')
        if len(path_list) != 6:
            # only support a known iframe url style,
            # the video id might be on a different spot if the url changes
            raise PluginError('unsupported iframe URL: {0}'.format(iframe_url))

        res = self.session.http.get(
            self.API_URL.format(netloc=parsed.netloc, id=path_list[4]))

        data = self.session.http.json(res, schema=self._api_schema)
        log.trace('{0!r}'.format(data))

        url = self.PLAYLIST_URL.format(
            app=data['streamProperties']['application'],
            name=data['playStreamName'],
            netloc=data['cdnHost'],
        )
        return HLSStream.parse_variant_playlist(self.session, url)
Example #5
0
    def _get_streams(self):
        streamdata = None
        if self.get_option("email"):
            if self.login(self.get_option("email"), self.get_option("password")):
                log.info("Logged in as {0}".format(self.get_option("email")))
                self.save_cookies(lambda c: "steamMachineAuth" in c.name)

        # Handle steam.tv URLs
        if self._steamtv_url_re.match(self.url) is not None:
            # extract the steam ID from the page
            res = self.session.http.get(self.url)
            for div in itertags(res.text, 'div'):
                if div.attributes.get("id") == "webui_config":
                    broadcast_data = html_unescape(div.attributes.get("data-broadcast"))
                    steamid = parse_json(broadcast_data).get("steamid")
                    self.url = self._watch_broadcast_url + steamid

        # extract the steam ID from the URL
        steamid = self._url_re.match(self.url).group(1)
        res = self.session.http.get(self.url)  # get the page to set some cookies
        sessionid = res.cookies.get('sessionid')

        while streamdata is None or streamdata[u"success"] in ("waiting", "waiting_for_start"):
            streamdata = self._get_broadcast_stream(steamid,
                                                    sessionid=sessionid)

            if streamdata[u"success"] == "ready":
                return DASHStream.parse_manifest(self.session, streamdata["url"])
            elif streamdata[u"success"] == "unavailable":
                log.error("This stream is currently unavailable")
                return
            else:
                r = streamdata[u"retry"] / 1000.0
                log.info("Waiting for stream, will retry again in {} seconds...".format(r))
                time.sleep(r)
Example #6
0
    def _get_streams(self):
        """
        Finds the streams from tvcatchup.com.
        """
        token = self.login(self.get_option("username"), self.get_option("password"))
        m = self._url_re.match(self.url)
        scode = m and m.group("scode") or self.get_option("station_code")

        res = http.get(self._guide_url)

        channels = {}
        for t in itertags(res.text, "a"):
            if t.attributes.get('cs'):
                channels[t.attributes.get('cs').lower()] = t.attributes.get('title').replace("Watch ", "")

        if not scode:
            self.logger.error("Station code not provided, use --ustvnow-station-code.")
            self.logger.error("Available stations are: {0}", ", ".join(channels.keys()))
            return

        if scode in channels:
            self.logger.debug("Finding streams for: {0}", channels.get(scode))

            r = http.get(self._stream_url, params={"scode": scode,
                                                   "token": token,
                                                   "br_n": "Firefox",
                                                   "br_v": "52",
                                                   "br_d": "desktop"},
                         headers={"User-Agent": useragents.FIREFOX})

            data = http.json(r)
            return HLSStream.parse_variant_playlist(self.session, data["stream"])
        else:
            self.logger.error("Invalid station-code: {0}", scode)
Example #7
0
    def _find_video_id(self, url):

        m = _url_re.match(url)
        if m.group("video_id"):
            log.debug("Video ID from URL")
            return m.group("video_id")

        res = self.session.http.get(url)
        datam = _ytdata_re.search(res.text)
        if datam:
            data = parse_json(datam.group(1))
            # find the videoRenderer object, where there is a LVE NOW badge
            for vid_ep in search_dict(data, 'currentVideoEndpoint'):
                video_id = vid_ep.get("watchEndpoint", {}).get("videoId")
                if video_id:
                    log.debug("Video ID from currentVideoEndpoint")
                    return video_id
            for x in search_dict(data, 'videoRenderer'):
                for bstyle in search_dict(x.get("badges", {}), "style"):
                    if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW":
                        if x.get("videoId"):
                            log.debug("Video ID from videoRenderer (live)")
                            return x["videoId"]

        if "/embed/live_stream" in url:
            for link in itertags(res.text, "link"):
                if link.attributes.get("rel") == "canonical":
                    canon_link = link.attributes.get("href")
                    if canon_link != url:
                        log.debug("Re-directing to canonical URL: {0}".format(canon_link))
                        return self._find_video_id(canon_link)

        raise PluginError("Could not find a video on this page")
Example #8
0
    def _get_streams(self):
        log.debug('Version 2018-07-12')
        log.info('This is a custom plugin. '
                 'For support visit https://github.com/back-to/plugins')
        self.session.http.headers.update({'User-Agent': useragents.FIREFOX})

        res = self.session.http.get(self.url)
        iframe_res = ''
        for iframe in itertags(res.text, 'iframe'):
            log.debug('Found iframe: {0}'.format(iframe))
            if iframe.attributes.get('id') == 'videoFrame':
                iframe_res = self.session.http.get(iframe.attributes['src'])
                break

        if not iframe_res:
            log.debug('No iframe found.')
            return

        xml_url = ''
        for span in itertags(iframe_res.text, 'span'):
            if span.attributes.get('class') == 'webcaster-player':
                xml_url = span.attributes['data-config']
                xml_url = re.sub(r'^config=(.*)', r'\1', xml_url)
                break

        if not xml_url:
            log.debug('No xml_url found.')
            return

        res = self.session.http.get(xml_url)
        root = self.session.http.xml(res, ignore_ns=True)

        for child in root.findall('./video_hd'):
            log.debug('Found video_hd')
            res = self.session.http.get(child.text)
            root = self.session.http.xml(res, ignore_ns=True)
            for child in root.findall('./iphone/track'):
                log.debug('Found iphone/track')
                hls_url = child.text
                log.debug('URL={0}'.format(hls_url))
                streams = HLSStream.parse_variant_playlist(self.session,
                                                           hls_url)
                if not streams:
                    return {'live': HLSStream(self.session, hls_url)}
                else:
                    return streams
Example #9
0
 def _get_streams(self):
     res = self.session.http.get(self.url)
     for div in itertags(res.text, 'div'):
         if div.attributes.get("data-provider") == "dvideo":
             video_id = div.attributes.get("data-id")
             log.debug("Found video ID: {0}".format(video_id))
             for s in self._get_streams_api(video_id):
                 yield s
Example #10
0
 def test_no_end_tag(self):
     links = list(itertags(self.test_html, "link"))
     self.assertTrue(len(links), 1)
     self.assertEqual(links[0].tag, "link")
     self.assertEqual(links[0].text, None)
     self.assertEqual(links[0].attributes, {"rel": "stylesheet",
                                            "type": "text/css",
                                            "href": "https://test.se/test.css"})
 def test_no_end_tag(self):
     links = list(itertags(self.test_html, "link"))
     self.assertTrue(len(links), 1)
     self.assertEqual(links[0].tag, "link")
     self.assertEqual(links[0].text, None)
     self.assertEqual(links[0].attributes, {"rel": "stylesheet",
                                            "type": "text/css",
                                            "href": "https://test.se/test.css"})
 def _get_streams(self):
     res = self.session.http.get(self.url)
     for div in itertags(res.text, 'div'):
         if div.attributes.get("data-provider") == "dvideo":
             video_id = div.attributes.get("data-id")
             log.debug("Found video ID: {0}".format(video_id))
             for s in self._get_streams_api(video_id):
                 yield s
Example #13
0
    def _get_streams(self):
        self.session.set_option("ffmpeg-start-at-zero", True)
        self.session.http.headers.update({"Accept-Language": "en-US"})

        done = False
        res = self.session.http.get(self.url)
        log.trace(f"{res.url}")
        for title in itertags(res.text, "title"):
            if title.text.startswith("Log into Facebook"):
                log.error("Video is not available, You must log in to continue.")
                return

        for s in self._parse_streams(res):
            done = True
            yield s
        if done:
            return

        # fallback on to playlist
        log.debug("Falling back to playlist regex")
        match = self._playlist_re.search(res.text)
        playlist = match and match.group(1)
        if playlist:
            match = self._plurl_re.search(playlist)
            if match:
                url = match.group(1)
                yield "sd", HTTPStream(self.session, url)
                return

        # fallback to tahoe player url
        log.debug("Falling back to tahoe player")
        video_id = self.match.group("video_id")
        url = self._TAHOE_URL.format(video_id)
        data = {
            "__a": 1,
            "__pc": self._DEFAULT_PC,
            "__rev": self._DEFAULT_REV,
            "fb_dtsg": "",
        }
        match = self._pc_re.search(res.text)
        if match:
            data["__pc"] = match.group(1)
        match = self._rev_re.search(res.text)
        if match:
            data["__rev"] = match.group(1)
        match = self._dtsg_re.search(res.text)
        if match:
            data["fb_dtsg"] = match.group(1)
        res = self.session.http.post(
            url,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data=urlencode(data).encode("ascii")
        )

        for s in self._parse_streams(res):
            yield s
Example #14
0
 def _get_res(self, url):
     res = self.session.http.get(url)
     if urlparse(res.url).netloc == "consent.youtube.com":
         c_data = {}
         for _i in itertags(res.text, "input"):
             if _i.attributes.get("type") == "hidden":
                 c_data[_i.attributes.get("name")] = unescape(_i.attributes.get("value"))
         log.debug(f"c_data_keys: {', '.join(c_data.keys())}")
         res = self.session.http.post("https://consent.youtube.com/s", data=c_data)
     return res
Example #15
0
 def _get_streams(self):
     page = self.session.http.get(self.url)
     for iframe in itertags(page.text, u"iframe"):
         url = iframe.attributes["src"]
         self.logger.debug("Handing off of {0}".format(url))
         try:
             return self.session.streams(update_scheme(self.url, url))
         except NoPluginError:
             self.logger.error("Handing off of {0} failed".format(url))
             return None
Example #16
0
 def _get_streams(self):
     page = self.session.http.get(self.url)
     for iframe in itertags(page.text, u"iframe"):
         url = iframe.attributes["src"]
         log.debug("Handing off of {0}".format(url))
         try:
             return self.session.streams(update_scheme(self.url, url))
         except NoPluginError:
             log.error("Handing off of {0} failed".format(url))
             return None
Example #17
0
    def test_itertags_attrs_text(self):
        script = list(itertags(self.test_html, "script"))
        self.assertTrue(len(script), 2)
        self.assertEqual(script[0].tag, "script")
        self.assertEqual(script[0].text, "")
        self.assertEqual(script[0].attributes, {"src": "https://test.se/test.js"})

        self.assertEqual(script[1].tag, "script")
        self.assertEqual(script[1].text.strip(), """Tester.ready(function () {\nalert("Hello, world!"); });""")
        self.assertEqual(script[1].attributes, {})
Example #18
0
 def _get_streams(self):
     res = http.get(self.url)
     for iframe in itertags(res.text, "iframe"):
         self.logger.debug("Found iframe: {0}".format(iframe))
         iframe_res = http.get(iframe.attributes['src'], headers={"Referer": self.url})
         m = self.src_re.search(iframe_res.text)
         surl = m and m.group("url")
         if surl:
             self.logger.debug("Found stream URL: {0}".format(surl))
             return HLSStream.parse_variant_playlist(self.session, surl)
Example #19
0
 def _get_streams(self):
     res = self.session.http.get(self.url)
     for iframe in itertags(res.text, "iframe"):
         self.logger.debug("Found iframe: {0}".format(iframe))
         iframe_res = self.session.http.get(iframe.attributes['src'], headers={"Referer": self.url})
         m = self.src_re.search(iframe_res.text)
         surl = m and m.group("url")
         if surl:
             surl = update_scheme(self.url, surl)
             self.logger.debug("Found stream URL: {0}".format(surl))
             return HLSStream.parse_variant_playlist(self.session, surl)
Example #20
0
    def _get_data_from_api(self, res):
        _i_video_id = self.match.group("video_id")
        if _i_video_id is None:
            for link in itertags(res.text, "link"):
                if link.attributes.get("rel") == "canonical":
                    try:
                        _i_video_id = self.matcher.match(
                            link.attributes.get("href")).group("video_id")
                    except AttributeError:
                        return
                    break
            else:
                return

        try:
            _i_api_key = re.search(r'"INNERTUBE_API_KEY":\s*"([^"]+)"',
                                   res.text).group(1)
        except AttributeError:
            _i_api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"

        try:
            _i_version = re.search(
                r'"INNERTUBE_CLIENT_VERSION":\s*"([\d\.]+)"',
                res.text).group(1)
        except AttributeError:
            _i_version = "1.20210616.1.0"

        res = self.session.http.post(
            "https://www.youtube.com/youtubei/v1/player",
            headers={"Content-Type": "application/json"},
            params={"key": _i_api_key},
            data=json.dumps({
                "videoId": _i_video_id,
                "contentCheckOk": True,
                "racyCheckOk": True,
                "context": {
                    "client": {
                        "clientName": "WEB",
                        "clientVersion": _i_version,
                        "platform": "DESKTOP",
                        "clientScreen": "EMBED",
                        "clientFormFactor": "UNKNOWN_FORM_FACTOR",
                        "browserName": "Chrome",
                    },
                    "user": {
                        "lockedSafetyMode": "false"
                    },
                    "request": {
                        "useSsl": "true"
                    },
                }
            }),
        )
        return parse_json(res.text)
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        res = self.session.http.get(self.url, headers=headers, verify=False)

        if urlparse(self.url).path == '/live':
            stream = [
                i for i in list(itertags(res.text, 'script'))
                if "hls" in i.text
            ]
            try:
                stream = re.search(r'''['"](http.+?)['"]''',
                                   stream[0].text).group(1)
            except Exception:
                stream = None
            live = True
        else:
            stream = [
                i for i in list(itertags(res.text, 'a'))
                if "mp4" in i.attributes.get('href', '')
            ]
            stream = stream[0].attributes.get('href')
            live = False

        headers.update({"Referer": self.url})

        try:
            parse_hls = bool(strtobool(self.get_option('parse_hls')))
        except AttributeError:
            parse_hls = True

        if stream:

            if parse_hls and live:
                return HLSStream.parse_variant_playlist(self.session,
                                                        stream,
                                                        headers=headers)
            else:
                return dict(
                    vod=HTTPStream(self.session, stream, headers=headers))
Example #22
0
    def _get_api_info(self, page):
        for div in itertags(page.text, 'div'):
            if div.attributes.get("class") == "vrtvideo":
                api_base = div.attributes.get("data-mediaapiurl") + "/"

                data = {"token_url": urljoin(api_base, "tokens")}
                if div.attributes.get("data-videotype") == "live":
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), div.attributes.get("data-livestream"))
                else:
                    resource = "{0}%24{1}".format(div.attributes.get("data-publicationid"), div.attributes.get("data-videoid"))
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), resource)
                return data
Example #23
0
    def test_itertags_multi_attrs(self):
        metas = list(itertags(self.test_html, "meta"))
        self.assertTrue(len(metas), 3)
        self.assertTrue(all(meta.tag == "meta" for meta in metas))

        self.assertEqual(metas[0].text, None)
        self.assertEqual(metas[1].text, None)
        self.assertEqual(metas[2].text, None)

        self.assertEqual(metas[0].attributes, {"property": "og:type", "content": "website"})
        self.assertEqual(metas[1].attributes, {"property": "og:url", "content": "http://test.se/"})
        self.assertEqual(metas[2].attributes, {"property": "og:site_name", "content": "Test"})
Example #24
0
    def _get_api_info(self, page):
        for div in itertags(page.text, 'div'):
            if div.attributes.get("class") == "vrtvideo":
                api_base = div.attributes.get("data-mediaapiurl") + "/"

                data = {"token_url": urljoin(api_base, "tokens")}
                if div.attributes.get("data-videotype") == "live":
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), div.attributes.get("data-livestream"))
                else:
                    resource = "{0}%24{1}".format(div.attributes.get("data-publicationid"), div.attributes.get("data-videoid"))
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), resource)
                return data
Example #25
0
def omegacy(link):

    """ALternative method"""

    cookie = client.request(link, close=False, output='cookie')
    html = client.request(link, cookie=cookie)
    tags = list(itertags(html, 'script'))

    m3u8 = [i for i in tags if i.text.startswith(u'var playerInstance')][0].text

    stream = re.findall('"(.+?)"', m3u8)[1]

    return spoofer(url=stream, referer=True, ref_str=link)
Example #26
0
def ant1cy(link):

    """Alternative method"""

    api_url = 'https://www.ant1.com.cy/ajax.aspx?m=Atcom.Sites.Ant1iwo.Modules.TokenGenerator&videoURL={0}'

    html = client.request(link)

    m3u8 = re.findall("'(.+?)'", list(itertags(html.text, 'script'))[-2].text)[1]

    stream = client.request(api_url.format(m3u8))

    return stream + spoofer()
Example #27
0
    def _get_streams(self):
        self.session.http.headers.update({
            'Referer':
            'http://www.abweb.com/BIS-TV-Online/bistvo-tele-universal.aspx'
        })

        login_username = self.get_option('username')
        login_password = self.get_option('password')

        if self.options.get('purge_credentials'):
            self.clear_cookies()
            self._authed = False
            log.info('All credentials were successfully removed.')

        if self._authed:
            log.info('Attempting to authenticate using cached cookies')
        elif not self._authed and not (login_username and login_password):
            log.error(
                'A login for ABweb is required, use --abweb-username USERNAME --abweb-password PASSWORD'
            )
            return
        elif not self._authed and not self._login(login_username,
                                                  login_password):
            return

        log.debug('get iframe_url')
        res = self.session.http.get(self.url)
        for iframe in itertags(res.text, 'iframe'):
            iframe_url = iframe.attributes.get('src')
            if iframe_url.startswith('/'):
                iframe_url = url_concat('https://www.abweb.com', iframe_url)
            else:
                iframe_url = update_scheme('https://', iframe_url)
            log.debug('iframe_url={0}'.format(iframe_url))
            break
        else:
            raise PluginError('No iframe_url found.')

        self.session.http.headers.update({'Referer': iframe_url})
        res = self.session.http.get(iframe_url)
        m = self._hls_re.search(res.text)
        if not m:
            raise PluginError('No hls_url found.')

        hls_url = update_scheme('https://', m.group('url'))
        streams = HLSStream.parse_variant_playlist(self.session, hls_url)
        if streams:
            for stream in streams.items():
                yield stream
        else:
            yield 'live', HLSStream(self.session, hls_url)
Example #28
0
    def _get_streams(self):
        self.session.http.headers.update({
            "User-Agent": useragents.CHROME,
            "Referer": self.url})

        self.session.http.get(self.url)
        stream_url = None
        for div in itertags(self.session.http.get(self.url).text, "div"):
            if div.attributes.get("id") == "player":
                stream_url = div.attributes.get("data-stream")

        if stream_url:
            log.debug("URL={0}".format(stream_url))
            return HLSStream.parse_variant_playlist(self.session, stream_url, name_fmt="{pixels}_{bitrate}")
Example #29
0
    def _get_streams(self):
        res = self.session.http.get(self.url)
        for script in itertags(res.text, 'script'):
            if script.attributes.get("id") == "playerScript":
                log.debug("Found the playerScript script tag")
                urlparts = urlparse(script.attributes.get("src"))
                i = 0

                for key, url in parse_qsl(urlparts.query):
                    if key == "streamUrl":
                        i += 1
                        yield from HLSStream.parse_variant_playlist(
                            self.session, url, params=dict(id=i),
                            verify=False).items()
Example #30
0
    def _parse_streams(self, res):
        _found_stream_url = False
        for meta in itertags(res.text, "meta"):
            if meta.attributes.get("property") == "og:video:url":
                stream_url = html_unescape(meta.attributes.get("content"))
                if ".mpd" in stream_url:
                    for s in DASHStream.parse_manifest(self.session,
                                                       stream_url).items():
                        yield s
                        _found_stream_url = True
                elif ".mp4" in stream_url:
                    yield "vod", HTTPStream(self.session, stream_url)
                    _found_stream_url = True
                break
        else:
            log.debug("No meta og:video:url")

        if _found_stream_url:
            return

        for match in self._src_re.finditer(res.text):
            stream_url = match.group("url")
            if "\\/" in stream_url:
                # if the URL is json encoded, decode it
                stream_url = parse_json("\"{}\"".format(stream_url))
            if ".mpd" in stream_url:
                for s in DASHStream.parse_manifest(self.session,
                                                   stream_url).items():
                    yield s
            elif ".mp4" in stream_url:
                yield match.group(1), HTTPStream(self.session, stream_url)
            else:
                log.debug("Non-dash/mp4 stream: {0}".format(stream_url))

        match = self._dash_manifest_re.search(res.text)
        if match:
            # facebook replaces "<" characters with the substring "\\x3C"
            manifest = match.group("manifest").replace("\\/", "/")
            if is_py3:
                manifest = bytes(unquote_plus(manifest),
                                 "utf-8").decode("unicode_escape")
            else:
                manifest = unquote_plus(manifest).decode("string_escape")
            # Ignore unsupported manifests until DASH SegmentBase support is implemented
            if "SegmentBase" in manifest:
                log.error("Skipped DASH manifest with SegmentBase streams")
            else:
                for s in DASHStream.parse_manifest(self.session,
                                                   manifest).items():
                    yield s
Example #31
0
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.FIREFOX})
        res = self.session.http.get(self.url)
        for script in itertags(res.text, 'script'):
            if script.attributes.get("id") == "playerScript":
                log.debug("Found the playerScript script tag")
                urlparts = urlparse(script.attributes.get("src"))
                i = 0

                for key, url in parse_qsl(urlparts.query):
                    if key == "streamUrl":
                        i += 1
                        for s in HLSStream.parse_variant_playlist(self.session, url, params=dict(id=i), verify=False).items():
                            yield s
Example #32
0
def _get_eplus_data(session, eplus_url):
    """Return video data for an eplus event/video page.

    URL should be in the form https://live.eplus.jp/ex/player?ib=<key>
    """
    result = {}
    body = session.http.get(eplus_url).text
    for title in itertags(body, "title"):
        result["title"] = html.unescape(title.text.strip())
        break
    m = re.search(r"""var listChannels = \["(?P<channel_url>.*)"\]""", body)
    if m:
        result["channel_url"] = m.group("channel_url").replace(r"\/", "/")
    return result
Example #33
0
    def _get_streams(self):
        self.session.http.headers.update({'User-Agent': useragents.IPHONE_6})

        self.follow_vk_redirect()

        video_id = self.match.group('video_id')
        log.debug('video ID: {0}'.format(video_id))

        params = {
            'act': 'show_inline',
            'al': '1',
            'video': video_id,
        }
        res = self.session.http.post(self.API_URL, params=params)

        for _i in itertags(res.text, 'iframe'):
            if _i.attributes.get('src'):
                iframe_url = update_scheme(self.url, _i.attributes['src'])
                log.debug('Found iframe: {0}'.format(iframe_url))
                yield from self.session.streams(iframe_url).items()

        for _i in itertags(res.text.replace('\\', ''), 'source'):
            if _i.attributes.get('type') == 'application/vnd.apple.mpegurl':
                video_url = html_unescape(_i.attributes['src'])
                streams = HLSStream.parse_variant_playlist(
                    self.session, video_url)
                if not streams:
                    yield 'live', HLSStream(self.session, video_url)
                else:
                    yield from streams.items()
            elif _i.attributes.get('type') == 'video/mp4':
                q = 'vod'
                video_url = _i.attributes['src']
                m = self._vod_quality_re.search(video_url)
                if m:
                    q = '{0}p'.format(m.group(1))
                yield q, HTTPStream(self.session, video_url)
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        res = self.session.http.get(self.url, headers=headers)

        if 'page/live' in self.url:
            stream = ''.join([
                'https:', [i for i in list(itertags(res.text, 'source'))
                           ][0].attributes['src']
            ])
            live = True
        else:
            stream = [(i.attributes['type'],
                       ''.join(['https:', i.attributes['src']]))
                      for i in list(itertags(res.text, 'source'))[:-1]]
            live = False

        headers.update({"Referer": self.url})

        try:
            parse_hls = bool(strtobool(self.get_option('parse_hls')))
        except AttributeError:
            parse_hls = True

        if live:
            if parse_hls:
                yield HLSStream.parse_variant_playlist(self.session,
                                                       stream,
                                                       headers=headers)
            else:
                yield dict(
                    live=HTTPStream(self.session, stream, headers=headers))
        else:
            for q, s in stream:
                yield q, HTTPStream(self.session, s, headers=headers)
Example #35
0
    def _get_streams(self):
        self.session.http.headers.update({"Referer": self.url})

        iframe_url = None
        res = self.session.http.get(self.url)
        for iframe in itertags(res.text, "iframe"):
            if "embed.lsm.lv" in iframe.attributes.get("src"):
                iframe_url = iframe.attributes.get("src")
                break

        if not iframe_url:
            log.error("Could not find player iframe")
            return

        log.debug("Found iframe: {0}".format(iframe_url))
        res = self.session.http.get(iframe_url)
        for source in itertags(res.text, "source"):
            if source.attributes.get("src"):
                stream_url = source.attributes.get("src")
                url_path = urlparse(stream_url).path
                if url_path.endswith(".m3u8"):
                    yield from HLSStream.parse_variant_playlist(self.session, stream_url).items()
                else:
                    log.debug("Not used URL path: {0}".format(url_path))
Example #36
0
    def login(self, username, password):
        r = http.get(self._signin_url)
        csrf = None

        for input in itertags(r.text, "input"):
            if input.attributes['name'] == "csrf_ustvnow":
                csrf = input.attributes['value']

        log.debug("CSRF: {0}", csrf)

        r = http.post(self._login_url, data={'csrf_ustvnow': csrf,
                                             'signin_email': username,
                                             'signin_password': password,
                                             'signin_remember': '1'})
        m = self._token_re.search(r.text)
        return m and m.group(1)
Example #37
0
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.FIREFOX})
        res = self.session.http.get(self.url)
        for script in itertags(res.text, 'script'):
            if script.attributes.get("id") == "playerScript":
                log.debug("Found the playerScript script tag")
                urlparts = urlparse(script.attributes.get("src"))
                i = 0

                for key, url in parse_qsl(urlparts.query):
                    if key == "streamUrl":
                        i += 1
                        for s in HLSStream.parse_variant_playlist(
                                self.session, url, params=dict(id=i),
                                verify=False).items():
                            yield s
Example #38
0
    def login(self, username, password):
        r = http.get(self._signin_url)
        csrf = None

        for input in itertags(r.text, "input"):
            if input.attributes['name'] == "csrf_ustvnow":
                csrf = input.attributes['value']

        self.logger.debug("CSRF: {0}", csrf)

        r = http.post(self._login_url, data={'csrf_ustvnow': csrf,
                                             'signin_email': username,
                                             'signin_password': password,
                                             'signin_remember': '1'})
        m = self._token_re.search(r.text)
        return m and m.group(1)
Example #39
0
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        res = self.session.http.get(self.url, headers=headers)

        if 'live-stream' in self.url:

            html = [i.text for i in list(itertags(res.text, 'script'))]

            html = [i for i in html if 'm3u8' in i][0]

            stream = re.search(r"(?P<url>http.+?\.m3u8)", html)

        elif 'starx' in self.url:

            try:
                vid = re.search(r"kalturaPlayer\('(?P<id>\w+)'",
                                res.text).group('id')
                stream = self._player_url.format(vid)
            except Exception:
                stream = None

        else:

            stream = re.search(r"(?P<url>http.+?\.m3u8)", res.text)

        if not stream:
            raise PluginError('Did not find the playable url')
        elif 'starx' not in self.url:
            stream = stream.group('url')

        headers.update({"Referer": self.url})

        try:
            parse_hls = bool(strtobool(self.get_option('parse_hls')))
        except AttributeError:
            parse_hls = True

        if parse_hls:
            return HLSStream.parse_variant_playlist(self.session,
                                                    stream,
                                                    headers=headers)
        else:
            return dict(
                stream=HTTPStream(self.session, stream, headers=headers))
Example #40
0
    def _fetch_fb_api_key(self):
        # get firebase API key
        body = self.session.http.get(self._BASE_URL).text

        for script in itertags(body, "script"):
            src = script.attributes.get("src", "")
            m = re.match(r"/static/js/main.*\.js", src)
            if m:
                break
        else:
            return None
        body = self.session.http.get(f"{self._BASE_URL}{src}").text
        m = re.search(r'REACT_APP_FB_API_KEY:\s*"(?P<key>[a-zA-Z0-9\-]+)"',
                      body)
        if m:
            return m.group("key")
        return None
Example #41
0
    def _find_video_id(self, url):

        m = _url_re.match(url)
        if m.group("video_id"):
            log.debug("Video ID from URL")
            return m.group("video_id")

        res = self.session.http.get(url)
        datam = _ytdata_re.search(res.text)
        if datam:
            data = parse_json(datam.group(1))
            # find the videoRenderer object, where there is a LVE NOW badge
            for vid_ep in search_dict(data, 'currentVideoEndpoint'):
                video_id = vid_ep.get("watchEndpoint", {}).get("videoId")
                if video_id:
                    log.debug("Video ID from currentVideoEndpoint")
                    return video_id
            for x in search_dict(data, 'videoRenderer'):
                if x.get("viewCountText", {}).get("runs"):
                    if x.get("videoId"):
                        log.debug("Video ID from videoRenderer (live)")
                        return x["videoId"]
                for bstyle in search_dict(x.get("badges", {}), "style"):
                    if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW":
                        if x.get("videoId"):
                            log.debug("Video ID from videoRenderer (live)")
                            return x["videoId"]

        if "/embed/live_stream" in url:
            for link in itertags(res.text, "link"):
                if link.attributes.get("rel") == "canonical":
                    canon_link = link.attributes.get("href")
                    if canon_link != url:
                        if canon_link.endswith("v=live_stream"):
                            log.debug("The video is not available")
                            break
                        else:
                            log.debug(
                                "Re-directing to canonical URL: {0}".format(
                                    canon_link))
                            return self._find_video_id(canon_link)

        raise PluginError("Could not find a video on this page")
Example #42
0
    def _get_streams(self):
        """
        Finds the streams from tvcatchup.com.
        """
        token = self.login(self.get_option("username"),
                           self.get_option("password"))
        m = self._url_re.match(self.url)
        scode = m and m.group("scode") or self.get_option("station_code")

        res = self.session.http.get(self._guide_url, params=dict(token=token))

        channels = OrderedDict()
        for t in itertags(res.text, "a"):
            if t.attributes.get('cs'):
                channels[t.attributes.get('cs').lower()] = t.attributes.get(
                    'title').replace("Watch ", "").strip()

        if not scode:
            log.error("Station code not provided, use --ustvnow-station-code.")
            log.info("Available stations are: \n{0} ".format('\n'.join(
                '    {0} ({1})'.format(c, n) for c, n in channels.items())))
            return

        if scode in channels:
            log.debug("Finding streams for: {0}", channels.get(scode))

            r = self.session.http.get(
                self._stream_url,
                params={
                    "scode": scode,
                    "token": token,
                    "br_n": "Firefox",
                    "br_v": "52",
                    "br_d": "desktop"
                },
                headers={"User-Agent": useragents.FIREFOX})

            data = self.session.http.json(r)
            return HLSStream.parse_variant_playlist(self.session,
                                                    data["stream"])
        else:
            log.error("Invalid station-code: {0}", scode)
Example #43
0
    def _get_streams(self):
        p = urlparse(self.url)
        if "ott.streann.com" != p.netloc:
            self._domain = p.netloc
            res = self.session.http.get(self.url)
            for iframe in itertags(res.text, "iframe"):
                iframe_url = html_unescape(iframe.attributes.get("src"))
                if "ott.streann.com" in iframe_url:
                    self.url = iframe_url
                    break
            else:
                log.error("Could not find 'ott.streann.com' iframe")
                return

        if not self._domain and self.get_option("url"):
            self._domain = urlparse(self.get_option("url")).netloc

        if self._domain is None:
            log.error("Missing source URL use --streann-url")
            return

        self.session.http.headers.update({"Referer": self.url})
        # Get the query string
        encrypted_data = urlparse(self.url).query
        data = base64.b64decode(encrypted_data)
        # and decrypt it
        passphrase = self.passphrase()
        if passphrase:
            log.debug("Found passphrase")
            params = decrypt_openssl(data, passphrase)
            config = parse_qsd(params.decode("utf8"))
            log.trace(f"config: {config!r}")
            token = self.get_token(**config)
            if not token:
                return
            hls_url = self.stream_url.format(time=self.time,
                                             deviceId=self.device_id,
                                             token=token,
                                             **config)
            log.debug("URL={0}".format(hls_url))
            return HLSStream.parse_variant_playlist(
                self.session, hls_url, acceptable_status=(200, 403, 404, 500))
Example #44
0
    def _get_streams(self):
        streamdata = None
        if self.get_option("email"):
            if self.login(self.get_option("email"),
                          self.get_option("password")):
                log.info("Logged in as {0}".format(self.get_option("email")))
                self.save_cookies(lambda c: "steamMachineAuth" in c.name)

        # Handle steam.tv URLs
        if self._steamtv_url_re.match(self.url) is not None:
            # extract the steam ID from the page
            res = self.session.http.get(self.url)
            for div in itertags(res.text, 'div'):
                if div.attributes.get("id") == "webui_config":
                    broadcast_data = html_unescape(
                        div.attributes.get("data-broadcast"))
                    steamid = parse_json(broadcast_data).get("steamid")
                    self.url = self._watch_broadcast_url + steamid

        # extract the steam ID from the URL
        steamid = self._url_re.match(self.url).group(1)
        res = self.session.http.get(
            self.url)  # get the page to set some cookies
        sessionid = res.cookies.get('sessionid')

        while streamdata is None or streamdata[u"success"] in (
                "waiting", "waiting_for_start"):
            streamdata = self._get_broadcast_stream(steamid,
                                                    sessionid=sessionid)

            if streamdata[u"success"] == "ready":
                return DASHStream.parse_manifest(self.session,
                                                 streamdata["url"])
            elif streamdata[u"success"] == "unavailable":
                log.error("This stream is currently unavailable")
                return
            else:
                r = streamdata[u"retry"] / 1000.0
                log.info(
                    "Waiting for stream, will retry again in {} seconds...".
                    format(r))
                time.sleep(r)
Example #45
0
 def login_csrf(self):
     r = self.session.http.get(self.login_url)
     for input in itertags(r.text, "input"):
         if input.attributes.get("name") == self.CSRF_NAME:
             return input.attributes.get("value")
Example #46
0
 def _get_isvp_url(self):
     res = self.session.http.get(self.url)
     for iframe in itertags(res.text, 'iframe'):
         m = self.url_re.match(iframe.attributes.get('src'))
         return m and m.group(1) is not None and iframe.attributes.get('src')
 def test_itertags_single_text(self):
     title = list(itertags(self.test_html, "title"))
     self.assertTrue(len(title), 1)
     self.assertEqual(title[0].tag, "title")
     self.assertEqual(title[0].text, "Title")
     self.assertEqual(title[0].attributes, {})
 def test_tag_inner_tag(self):
     links = list(itertags(self.test_html, "p"))
     self.assertTrue(len(links), 1)
     self.assertEqual(links[0].tag, "p")
     self.assertEqual(links[0].text.strip(), '<a \nhref="http://test.se/foo">bar</a>')
     self.assertEqual(links[0].attributes, {})
Example #49
0
 def video_info(self):
     page = self.session.http.get(self.url)
     for div in itertags(page.text, 'div'):
         if div.attributes.get("id") == "video":
             return div.attributes
 def test_multi_line_a(self):
     anchor = list(itertags(self.test_html, "a"))
     self.assertTrue(len(anchor), 1)
     self.assertEqual(anchor[0].tag, "a")
     self.assertEqual(anchor[0].text, "bar")
     self.assertEqual(anchor[0].attributes, {"href": "http://test.se/foo"})
Example #51
0
 def _get_iframe_url(self, url):
     res = self.session.http.get(url)
     for iframe in itertags(res.text, 'iframe'):
         src = iframe.attributes.get("src")
         if src:
             return src
Example #52
0
 def _resolve_stream(self):
     res = self.session.http.get(self.url)
     for video in itertags(res.text, 'video'):
         stream_url = video.attributes.get("data-stream")
         log.debug("Stream data: {0}".format(stream_url))
         return HLSStream.parse_variant_playlist(self.session, stream_url)