def _get_streams(self): self.session.http.headers.update({ "Referer": self.url, "User-Agent": useragents.FIREFOX }) iframe_url = None res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): if "embed.lsm.lv" in iframe.attributes.get("src"): iframe_url = iframe.attributes.get("src") break if not iframe_url: log.error("Could not find player iframe") return log.debug("Found iframe: {0}".format(iframe_url)) res = self.session.http.get(iframe_url) for source in itertags(res.text, "source"): if source.attributes.get("src"): stream_url = source.attributes.get("src") url_path = urlparse(stream_url).path if url_path.endswith(".m3u8"): for s in HLSStream.parse_variant_playlist(self.session, stream_url).items(): yield s else: log.debug("Not used URL path: {0}".format(url_path))
def _get_source_streams(self): res = self.session.http.get(self.url) for atag in itertags(res.text, 'a'): if "video-play__link" in atag.attributes.get("class", ""): href = urljoin(self.url, atag.attributes.get("href")) log.debug("Loading embedded video page") vpage = self.session.http.get(href, params=dict(ajax="true", npo_cc_skip_wall="true")) for source in itertags(vpage.text, 'source'): return HLSStream.parse_variant_playlist(self.session, source.attributes.get("src"))
def _get_streams(self): """ Find the streams for vk.com :return: """ self.session.http.headers.update({'User-Agent': useragents.IPHONE_6}) # If this is a 'videos' catalog URL # with an video ID in the GET request, get that instead url = self.follow_vk_redirect(self.url) m = self._url_re.match(url) if not m: log.error('URL is not compatible: {0}'.format(url)) return video_id = m.group('video_id') log.debug('video ID: {0}'.format(video_id)) params = { 'act': 'show_inline', 'al': '1', 'video': video_id, } res = self.session.http.post(self.API_URL, params=params) for _i in itertags(res.text, 'iframe'): if _i.attributes.get('src'): iframe_url = update_scheme(self.url, _i.attributes['src']) log.debug('Found iframe: {0}'.format(iframe_url)) for s in self.session.streams(iframe_url).items(): yield s for _i in itertags(res.text, 'source'): if _i.attributes.get('type') == 'application/vnd.apple.mpegurl': video_url = _i.attributes['src'] # Remove invalid URL if video_url.startswith('https://vk.com/'): continue streams = HLSStream.parse_variant_playlist(self.session, video_url) if not streams: yield 'live', HLSStream(self.session, video_url) else: for s in streams.items(): yield s elif _i.attributes.get('type') == 'video/mp4': q = 'vod' video_url = _i.attributes['src'] m = self._vod_quality_re.search(video_url) if m: q = '{0}p'.format(m.group(1)) yield q, HTTPStream(self.session, video_url)
def _get_streams(self): self.session.http.headers.update({'User-Agent': useragents.FIREFOX}) iframe_url = None page = self.session.http.get(self.url) for a in itertags(page.text, 'a'): if a.attributes.get('class') == 'play-live': iframe_url = update_scheme(self.url, a.attributes['data-url']) break if not iframe_url: raise PluginError('Could not find iframe.') parsed = urlparse(iframe_url) path_list = parsed.path.split('/') if len(path_list) != 6: # only support a known iframe url style, # the video id might be on a different spot if the url changes raise PluginError('unsupported iframe URL: {0}'.format(iframe_url)) res = self.session.http.get( self.API_URL.format(netloc=parsed.netloc, id=path_list[4])) data = self.session.http.json(res, schema=self._api_schema) log.trace('{0!r}'.format(data)) url = self.PLAYLIST_URL.format( app=data['streamProperties']['application'], name=data['playStreamName'], netloc=data['cdnHost'], ) return HLSStream.parse_variant_playlist(self.session, url)
def _get_streams(self): streamdata = None if self.get_option("email"): if self.login(self.get_option("email"), self.get_option("password")): log.info("Logged in as {0}".format(self.get_option("email"))) self.save_cookies(lambda c: "steamMachineAuth" in c.name) # Handle steam.tv URLs if self._steamtv_url_re.match(self.url) is not None: # extract the steam ID from the page res = self.session.http.get(self.url) for div in itertags(res.text, 'div'): if div.attributes.get("id") == "webui_config": broadcast_data = html_unescape(div.attributes.get("data-broadcast")) steamid = parse_json(broadcast_data).get("steamid") self.url = self._watch_broadcast_url + steamid # extract the steam ID from the URL steamid = self._url_re.match(self.url).group(1) res = self.session.http.get(self.url) # get the page to set some cookies sessionid = res.cookies.get('sessionid') while streamdata is None or streamdata[u"success"] in ("waiting", "waiting_for_start"): streamdata = self._get_broadcast_stream(steamid, sessionid=sessionid) if streamdata[u"success"] == "ready": return DASHStream.parse_manifest(self.session, streamdata["url"]) elif streamdata[u"success"] == "unavailable": log.error("This stream is currently unavailable") return else: r = streamdata[u"retry"] / 1000.0 log.info("Waiting for stream, will retry again in {} seconds...".format(r)) time.sleep(r)
def _get_streams(self): """ Finds the streams from tvcatchup.com. """ token = self.login(self.get_option("username"), self.get_option("password")) m = self._url_re.match(self.url) scode = m and m.group("scode") or self.get_option("station_code") res = http.get(self._guide_url) channels = {} for t in itertags(res.text, "a"): if t.attributes.get('cs'): channels[t.attributes.get('cs').lower()] = t.attributes.get('title').replace("Watch ", "") if not scode: self.logger.error("Station code not provided, use --ustvnow-station-code.") self.logger.error("Available stations are: {0}", ", ".join(channels.keys())) return if scode in channels: self.logger.debug("Finding streams for: {0}", channels.get(scode)) r = http.get(self._stream_url, params={"scode": scode, "token": token, "br_n": "Firefox", "br_v": "52", "br_d": "desktop"}, headers={"User-Agent": useragents.FIREFOX}) data = http.json(r) return HLSStream.parse_variant_playlist(self.session, data["stream"]) else: self.logger.error("Invalid station-code: {0}", scode)
def _find_video_id(self, url): m = _url_re.match(url) if m.group("video_id"): log.debug("Video ID from URL") return m.group("video_id") res = self.session.http.get(url) datam = _ytdata_re.search(res.text) if datam: data = parse_json(datam.group(1)) # find the videoRenderer object, where there is a LVE NOW badge for vid_ep in search_dict(data, 'currentVideoEndpoint'): video_id = vid_ep.get("watchEndpoint", {}).get("videoId") if video_id: log.debug("Video ID from currentVideoEndpoint") return video_id for x in search_dict(data, 'videoRenderer'): for bstyle in search_dict(x.get("badges", {}), "style"): if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW": if x.get("videoId"): log.debug("Video ID from videoRenderer (live)") return x["videoId"] if "/embed/live_stream" in url: for link in itertags(res.text, "link"): if link.attributes.get("rel") == "canonical": canon_link = link.attributes.get("href") if canon_link != url: log.debug("Re-directing to canonical URL: {0}".format(canon_link)) return self._find_video_id(canon_link) raise PluginError("Could not find a video on this page")
def _get_streams(self): log.debug('Version 2018-07-12') log.info('This is a custom plugin. ' 'For support visit https://github.com/back-to/plugins') self.session.http.headers.update({'User-Agent': useragents.FIREFOX}) res = self.session.http.get(self.url) iframe_res = '' for iframe in itertags(res.text, 'iframe'): log.debug('Found iframe: {0}'.format(iframe)) if iframe.attributes.get('id') == 'videoFrame': iframe_res = self.session.http.get(iframe.attributes['src']) break if not iframe_res: log.debug('No iframe found.') return xml_url = '' for span in itertags(iframe_res.text, 'span'): if span.attributes.get('class') == 'webcaster-player': xml_url = span.attributes['data-config'] xml_url = re.sub(r'^config=(.*)', r'\1', xml_url) break if not xml_url: log.debug('No xml_url found.') return res = self.session.http.get(xml_url) root = self.session.http.xml(res, ignore_ns=True) for child in root.findall('./video_hd'): log.debug('Found video_hd') res = self.session.http.get(child.text) root = self.session.http.xml(res, ignore_ns=True) for child in root.findall('./iphone/track'): log.debug('Found iphone/track') hls_url = child.text log.debug('URL={0}'.format(hls_url)) streams = HLSStream.parse_variant_playlist(self.session, hls_url) if not streams: return {'live': HLSStream(self.session, hls_url)} else: return streams
def _get_streams(self): res = self.session.http.get(self.url) for div in itertags(res.text, 'div'): if div.attributes.get("data-provider") == "dvideo": video_id = div.attributes.get("data-id") log.debug("Found video ID: {0}".format(video_id)) for s in self._get_streams_api(video_id): yield s
def test_no_end_tag(self): links = list(itertags(self.test_html, "link")) self.assertTrue(len(links), 1) self.assertEqual(links[0].tag, "link") self.assertEqual(links[0].text, None) self.assertEqual(links[0].attributes, {"rel": "stylesheet", "type": "text/css", "href": "https://test.se/test.css"})
def _get_streams(self): self.session.set_option("ffmpeg-start-at-zero", True) self.session.http.headers.update({"Accept-Language": "en-US"}) done = False res = self.session.http.get(self.url) log.trace(f"{res.url}") for title in itertags(res.text, "title"): if title.text.startswith("Log into Facebook"): log.error("Video is not available, You must log in to continue.") return for s in self._parse_streams(res): done = True yield s if done: return # fallback on to playlist log.debug("Falling back to playlist regex") match = self._playlist_re.search(res.text) playlist = match and match.group(1) if playlist: match = self._plurl_re.search(playlist) if match: url = match.group(1) yield "sd", HTTPStream(self.session, url) return # fallback to tahoe player url log.debug("Falling back to tahoe player") video_id = self.match.group("video_id") url = self._TAHOE_URL.format(video_id) data = { "__a": 1, "__pc": self._DEFAULT_PC, "__rev": self._DEFAULT_REV, "fb_dtsg": "", } match = self._pc_re.search(res.text) if match: data["__pc"] = match.group(1) match = self._rev_re.search(res.text) if match: data["__rev"] = match.group(1) match = self._dtsg_re.search(res.text) if match: data["fb_dtsg"] = match.group(1) res = self.session.http.post( url, headers={"Content-Type": "application/x-www-form-urlencoded"}, data=urlencode(data).encode("ascii") ) for s in self._parse_streams(res): yield s
def _get_res(self, url): res = self.session.http.get(url) if urlparse(res.url).netloc == "consent.youtube.com": c_data = {} for _i in itertags(res.text, "input"): if _i.attributes.get("type") == "hidden": c_data[_i.attributes.get("name")] = unescape(_i.attributes.get("value")) log.debug(f"c_data_keys: {', '.join(c_data.keys())}") res = self.session.http.post("https://consent.youtube.com/s", data=c_data) return res
def _get_streams(self): page = self.session.http.get(self.url) for iframe in itertags(page.text, u"iframe"): url = iframe.attributes["src"] self.logger.debug("Handing off of {0}".format(url)) try: return self.session.streams(update_scheme(self.url, url)) except NoPluginError: self.logger.error("Handing off of {0} failed".format(url)) return None
def _get_streams(self): page = self.session.http.get(self.url) for iframe in itertags(page.text, u"iframe"): url = iframe.attributes["src"] log.debug("Handing off of {0}".format(url)) try: return self.session.streams(update_scheme(self.url, url)) except NoPluginError: log.error("Handing off of {0} failed".format(url)) return None
def test_itertags_attrs_text(self): script = list(itertags(self.test_html, "script")) self.assertTrue(len(script), 2) self.assertEqual(script[0].tag, "script") self.assertEqual(script[0].text, "") self.assertEqual(script[0].attributes, {"src": "https://test.se/test.js"}) self.assertEqual(script[1].tag, "script") self.assertEqual(script[1].text.strip(), """Tester.ready(function () {\nalert("Hello, world!"); });""") self.assertEqual(script[1].attributes, {})
def _get_streams(self): res = http.get(self.url) for iframe in itertags(res.text, "iframe"): self.logger.debug("Found iframe: {0}".format(iframe)) iframe_res = http.get(iframe.attributes['src'], headers={"Referer": self.url}) m = self.src_re.search(iframe_res.text) surl = m and m.group("url") if surl: self.logger.debug("Found stream URL: {0}".format(surl)) return HLSStream.parse_variant_playlist(self.session, surl)
def _get_streams(self): res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): self.logger.debug("Found iframe: {0}".format(iframe)) iframe_res = self.session.http.get(iframe.attributes['src'], headers={"Referer": self.url}) m = self.src_re.search(iframe_res.text) surl = m and m.group("url") if surl: surl = update_scheme(self.url, surl) self.logger.debug("Found stream URL: {0}".format(surl)) return HLSStream.parse_variant_playlist(self.session, surl)
def _get_data_from_api(self, res): _i_video_id = self.match.group("video_id") if _i_video_id is None: for link in itertags(res.text, "link"): if link.attributes.get("rel") == "canonical": try: _i_video_id = self.matcher.match( link.attributes.get("href")).group("video_id") except AttributeError: return break else: return try: _i_api_key = re.search(r'"INNERTUBE_API_KEY":\s*"([^"]+)"', res.text).group(1) except AttributeError: _i_api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" try: _i_version = re.search( r'"INNERTUBE_CLIENT_VERSION":\s*"([\d\.]+)"', res.text).group(1) except AttributeError: _i_version = "1.20210616.1.0" res = self.session.http.post( "https://www.youtube.com/youtubei/v1/player", headers={"Content-Type": "application/json"}, params={"key": _i_api_key}, data=json.dumps({ "videoId": _i_video_id, "contentCheckOk": True, "racyCheckOk": True, "context": { "client": { "clientName": "WEB", "clientVersion": _i_version, "platform": "DESKTOP", "clientScreen": "EMBED", "clientFormFactor": "UNKNOWN_FORM_FACTOR", "browserName": "Chrome", }, "user": { "lockedSafetyMode": "false" }, "request": { "useSsl": "true" }, } }), ) return parse_json(res.text)
def _get_streams(self): headers = {'User-Agent': CHROME} res = self.session.http.get(self.url, headers=headers, verify=False) if urlparse(self.url).path == '/live': stream = [ i for i in list(itertags(res.text, 'script')) if "hls" in i.text ] try: stream = re.search(r'''['"](http.+?)['"]''', stream[0].text).group(1) except Exception: stream = None live = True else: stream = [ i for i in list(itertags(res.text, 'a')) if "mp4" in i.attributes.get('href', '') ] stream = stream[0].attributes.get('href') live = False headers.update({"Referer": self.url}) try: parse_hls = bool(strtobool(self.get_option('parse_hls'))) except AttributeError: parse_hls = True if stream: if parse_hls and live: return HLSStream.parse_variant_playlist(self.session, stream, headers=headers) else: return dict( vod=HTTPStream(self.session, stream, headers=headers))
def _get_api_info(self, page): for div in itertags(page.text, 'div'): if div.attributes.get("class") == "vrtvideo": api_base = div.attributes.get("data-mediaapiurl") + "/" data = {"token_url": urljoin(api_base, "tokens")} if div.attributes.get("data-videotype") == "live": data["stream_url"] = urljoin(urljoin(api_base, "videos/"), div.attributes.get("data-livestream")) else: resource = "{0}%24{1}".format(div.attributes.get("data-publicationid"), div.attributes.get("data-videoid")) data["stream_url"] = urljoin(urljoin(api_base, "videos/"), resource) return data
def test_itertags_multi_attrs(self): metas = list(itertags(self.test_html, "meta")) self.assertTrue(len(metas), 3) self.assertTrue(all(meta.tag == "meta" for meta in metas)) self.assertEqual(metas[0].text, None) self.assertEqual(metas[1].text, None) self.assertEqual(metas[2].text, None) self.assertEqual(metas[0].attributes, {"property": "og:type", "content": "website"}) self.assertEqual(metas[1].attributes, {"property": "og:url", "content": "http://test.se/"}) self.assertEqual(metas[2].attributes, {"property": "og:site_name", "content": "Test"})
def omegacy(link): """ALternative method""" cookie = client.request(link, close=False, output='cookie') html = client.request(link, cookie=cookie) tags = list(itertags(html, 'script')) m3u8 = [i for i in tags if i.text.startswith(u'var playerInstance')][0].text stream = re.findall('"(.+?)"', m3u8)[1] return spoofer(url=stream, referer=True, ref_str=link)
def ant1cy(link): """Alternative method""" api_url = 'https://www.ant1.com.cy/ajax.aspx?m=Atcom.Sites.Ant1iwo.Modules.TokenGenerator&videoURL={0}' html = client.request(link) m3u8 = re.findall("'(.+?)'", list(itertags(html.text, 'script'))[-2].text)[1] stream = client.request(api_url.format(m3u8)) return stream + spoofer()
def _get_streams(self): self.session.http.headers.update({ 'Referer': 'http://www.abweb.com/BIS-TV-Online/bistvo-tele-universal.aspx' }) login_username = self.get_option('username') login_password = self.get_option('password') if self.options.get('purge_credentials'): self.clear_cookies() self._authed = False log.info('All credentials were successfully removed.') if self._authed: log.info('Attempting to authenticate using cached cookies') elif not self._authed and not (login_username and login_password): log.error( 'A login for ABweb is required, use --abweb-username USERNAME --abweb-password PASSWORD' ) return elif not self._authed and not self._login(login_username, login_password): return log.debug('get iframe_url') res = self.session.http.get(self.url) for iframe in itertags(res.text, 'iframe'): iframe_url = iframe.attributes.get('src') if iframe_url.startswith('/'): iframe_url = url_concat('https://www.abweb.com', iframe_url) else: iframe_url = update_scheme('https://', iframe_url) log.debug('iframe_url={0}'.format(iframe_url)) break else: raise PluginError('No iframe_url found.') self.session.http.headers.update({'Referer': iframe_url}) res = self.session.http.get(iframe_url) m = self._hls_re.search(res.text) if not m: raise PluginError('No hls_url found.') hls_url = update_scheme('https://', m.group('url')) streams = HLSStream.parse_variant_playlist(self.session, hls_url) if streams: for stream in streams.items(): yield stream else: yield 'live', HLSStream(self.session, hls_url)
def _get_streams(self): self.session.http.headers.update({ "User-Agent": useragents.CHROME, "Referer": self.url}) self.session.http.get(self.url) stream_url = None for div in itertags(self.session.http.get(self.url).text, "div"): if div.attributes.get("id") == "player": stream_url = div.attributes.get("data-stream") if stream_url: log.debug("URL={0}".format(stream_url)) return HLSStream.parse_variant_playlist(self.session, stream_url, name_fmt="{pixels}_{bitrate}")
def _get_streams(self): res = self.session.http.get(self.url) for script in itertags(res.text, 'script'): if script.attributes.get("id") == "playerScript": log.debug("Found the playerScript script tag") urlparts = urlparse(script.attributes.get("src")) i = 0 for key, url in parse_qsl(urlparts.query): if key == "streamUrl": i += 1 yield from HLSStream.parse_variant_playlist( self.session, url, params=dict(id=i), verify=False).items()
def _parse_streams(self, res): _found_stream_url = False for meta in itertags(res.text, "meta"): if meta.attributes.get("property") == "og:video:url": stream_url = html_unescape(meta.attributes.get("content")) if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s _found_stream_url = True elif ".mp4" in stream_url: yield "vod", HTTPStream(self.session, stream_url) _found_stream_url = True break else: log.debug("No meta og:video:url") if _found_stream_url: return for match in self._src_re.finditer(res.text): stream_url = match.group("url") if "\\/" in stream_url: # if the URL is json encoded, decode it stream_url = parse_json("\"{}\"".format(stream_url)) if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s elif ".mp4" in stream_url: yield match.group(1), HTTPStream(self.session, stream_url) else: log.debug("Non-dash/mp4 stream: {0}".format(stream_url)) match = self._dash_manifest_re.search(res.text) if match: # facebook replaces "<" characters with the substring "\\x3C" manifest = match.group("manifest").replace("\\/", "/") if is_py3: manifest = bytes(unquote_plus(manifest), "utf-8").decode("unicode_escape") else: manifest = unquote_plus(manifest).decode("string_escape") # Ignore unsupported manifests until DASH SegmentBase support is implemented if "SegmentBase" in manifest: log.error("Skipped DASH manifest with SegmentBase streams") else: for s in DASHStream.parse_manifest(self.session, manifest).items(): yield s
def _get_streams(self): self.session.http.headers.update({"User-Agent": useragents.FIREFOX}) res = self.session.http.get(self.url) for script in itertags(res.text, 'script'): if script.attributes.get("id") == "playerScript": log.debug("Found the playerScript script tag") urlparts = urlparse(script.attributes.get("src")) i = 0 for key, url in parse_qsl(urlparts.query): if key == "streamUrl": i += 1 for s in HLSStream.parse_variant_playlist(self.session, url, params=dict(id=i), verify=False).items(): yield s
def _get_eplus_data(session, eplus_url): """Return video data for an eplus event/video page. URL should be in the form https://live.eplus.jp/ex/player?ib=<key> """ result = {} body = session.http.get(eplus_url).text for title in itertags(body, "title"): result["title"] = html.unescape(title.text.strip()) break m = re.search(r"""var listChannels = \["(?P<channel_url>.*)"\]""", body) if m: result["channel_url"] = m.group("channel_url").replace(r"\/", "/") return result
def _get_streams(self): self.session.http.headers.update({'User-Agent': useragents.IPHONE_6}) self.follow_vk_redirect() video_id = self.match.group('video_id') log.debug('video ID: {0}'.format(video_id)) params = { 'act': 'show_inline', 'al': '1', 'video': video_id, } res = self.session.http.post(self.API_URL, params=params) for _i in itertags(res.text, 'iframe'): if _i.attributes.get('src'): iframe_url = update_scheme(self.url, _i.attributes['src']) log.debug('Found iframe: {0}'.format(iframe_url)) yield from self.session.streams(iframe_url).items() for _i in itertags(res.text.replace('\\', ''), 'source'): if _i.attributes.get('type') == 'application/vnd.apple.mpegurl': video_url = html_unescape(_i.attributes['src']) streams = HLSStream.parse_variant_playlist( self.session, video_url) if not streams: yield 'live', HLSStream(self.session, video_url) else: yield from streams.items() elif _i.attributes.get('type') == 'video/mp4': q = 'vod' video_url = _i.attributes['src'] m = self._vod_quality_re.search(video_url) if m: q = '{0}p'.format(m.group(1)) yield q, HTTPStream(self.session, video_url)
def _get_streams(self): headers = {'User-Agent': CHROME} res = self.session.http.get(self.url, headers=headers) if 'page/live' in self.url: stream = ''.join([ 'https:', [i for i in list(itertags(res.text, 'source')) ][0].attributes['src'] ]) live = True else: stream = [(i.attributes['type'], ''.join(['https:', i.attributes['src']])) for i in list(itertags(res.text, 'source'))[:-1]] live = False headers.update({"Referer": self.url}) try: parse_hls = bool(strtobool(self.get_option('parse_hls'))) except AttributeError: parse_hls = True if live: if parse_hls: yield HLSStream.parse_variant_playlist(self.session, stream, headers=headers) else: yield dict( live=HTTPStream(self.session, stream, headers=headers)) else: for q, s in stream: yield q, HTTPStream(self.session, s, headers=headers)
def _get_streams(self): self.session.http.headers.update({"Referer": self.url}) iframe_url = None res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): if "embed.lsm.lv" in iframe.attributes.get("src"): iframe_url = iframe.attributes.get("src") break if not iframe_url: log.error("Could not find player iframe") return log.debug("Found iframe: {0}".format(iframe_url)) res = self.session.http.get(iframe_url) for source in itertags(res.text, "source"): if source.attributes.get("src"): stream_url = source.attributes.get("src") url_path = urlparse(stream_url).path if url_path.endswith(".m3u8"): yield from HLSStream.parse_variant_playlist(self.session, stream_url).items() else: log.debug("Not used URL path: {0}".format(url_path))
def login(self, username, password): r = http.get(self._signin_url) csrf = None for input in itertags(r.text, "input"): if input.attributes['name'] == "csrf_ustvnow": csrf = input.attributes['value'] log.debug("CSRF: {0}", csrf) r = http.post(self._login_url, data={'csrf_ustvnow': csrf, 'signin_email': username, 'signin_password': password, 'signin_remember': '1'}) m = self._token_re.search(r.text) return m and m.group(1)
def _get_streams(self): self.session.http.headers.update({"User-Agent": useragents.FIREFOX}) res = self.session.http.get(self.url) for script in itertags(res.text, 'script'): if script.attributes.get("id") == "playerScript": log.debug("Found the playerScript script tag") urlparts = urlparse(script.attributes.get("src")) i = 0 for key, url in parse_qsl(urlparts.query): if key == "streamUrl": i += 1 for s in HLSStream.parse_variant_playlist( self.session, url, params=dict(id=i), verify=False).items(): yield s
def login(self, username, password): r = http.get(self._signin_url) csrf = None for input in itertags(r.text, "input"): if input.attributes['name'] == "csrf_ustvnow": csrf = input.attributes['value'] self.logger.debug("CSRF: {0}", csrf) r = http.post(self._login_url, data={'csrf_ustvnow': csrf, 'signin_email': username, 'signin_password': password, 'signin_remember': '1'}) m = self._token_re.search(r.text) return m and m.group(1)
def _get_streams(self): headers = {'User-Agent': CHROME} res = self.session.http.get(self.url, headers=headers) if 'live-stream' in self.url: html = [i.text for i in list(itertags(res.text, 'script'))] html = [i for i in html if 'm3u8' in i][0] stream = re.search(r"(?P<url>http.+?\.m3u8)", html) elif 'starx' in self.url: try: vid = re.search(r"kalturaPlayer\('(?P<id>\w+)'", res.text).group('id') stream = self._player_url.format(vid) except Exception: stream = None else: stream = re.search(r"(?P<url>http.+?\.m3u8)", res.text) if not stream: raise PluginError('Did not find the playable url') elif 'starx' not in self.url: stream = stream.group('url') headers.update({"Referer": self.url}) try: parse_hls = bool(strtobool(self.get_option('parse_hls'))) except AttributeError: parse_hls = True if parse_hls: return HLSStream.parse_variant_playlist(self.session, stream, headers=headers) else: return dict( stream=HTTPStream(self.session, stream, headers=headers))
def _fetch_fb_api_key(self): # get firebase API key body = self.session.http.get(self._BASE_URL).text for script in itertags(body, "script"): src = script.attributes.get("src", "") m = re.match(r"/static/js/main.*\.js", src) if m: break else: return None body = self.session.http.get(f"{self._BASE_URL}{src}").text m = re.search(r'REACT_APP_FB_API_KEY:\s*"(?P<key>[a-zA-Z0-9\-]+)"', body) if m: return m.group("key") return None
def _find_video_id(self, url): m = _url_re.match(url) if m.group("video_id"): log.debug("Video ID from URL") return m.group("video_id") res = self.session.http.get(url) datam = _ytdata_re.search(res.text) if datam: data = parse_json(datam.group(1)) # find the videoRenderer object, where there is a LVE NOW badge for vid_ep in search_dict(data, 'currentVideoEndpoint'): video_id = vid_ep.get("watchEndpoint", {}).get("videoId") if video_id: log.debug("Video ID from currentVideoEndpoint") return video_id for x in search_dict(data, 'videoRenderer'): if x.get("viewCountText", {}).get("runs"): if x.get("videoId"): log.debug("Video ID from videoRenderer (live)") return x["videoId"] for bstyle in search_dict(x.get("badges", {}), "style"): if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW": if x.get("videoId"): log.debug("Video ID from videoRenderer (live)") return x["videoId"] if "/embed/live_stream" in url: for link in itertags(res.text, "link"): if link.attributes.get("rel") == "canonical": canon_link = link.attributes.get("href") if canon_link != url: if canon_link.endswith("v=live_stream"): log.debug("The video is not available") break else: log.debug( "Re-directing to canonical URL: {0}".format( canon_link)) return self._find_video_id(canon_link) raise PluginError("Could not find a video on this page")
def _get_streams(self): """ Finds the streams from tvcatchup.com. """ token = self.login(self.get_option("username"), self.get_option("password")) m = self._url_re.match(self.url) scode = m and m.group("scode") or self.get_option("station_code") res = self.session.http.get(self._guide_url, params=dict(token=token)) channels = OrderedDict() for t in itertags(res.text, "a"): if t.attributes.get('cs'): channels[t.attributes.get('cs').lower()] = t.attributes.get( 'title').replace("Watch ", "").strip() if not scode: log.error("Station code not provided, use --ustvnow-station-code.") log.info("Available stations are: \n{0} ".format('\n'.join( ' {0} ({1})'.format(c, n) for c, n in channels.items()))) return if scode in channels: log.debug("Finding streams for: {0}", channels.get(scode)) r = self.session.http.get( self._stream_url, params={ "scode": scode, "token": token, "br_n": "Firefox", "br_v": "52", "br_d": "desktop" }, headers={"User-Agent": useragents.FIREFOX}) data = self.session.http.json(r) return HLSStream.parse_variant_playlist(self.session, data["stream"]) else: log.error("Invalid station-code: {0}", scode)
def _get_streams(self): p = urlparse(self.url) if "ott.streann.com" != p.netloc: self._domain = p.netloc res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): iframe_url = html_unescape(iframe.attributes.get("src")) if "ott.streann.com" in iframe_url: self.url = iframe_url break else: log.error("Could not find 'ott.streann.com' iframe") return if not self._domain and self.get_option("url"): self._domain = urlparse(self.get_option("url")).netloc if self._domain is None: log.error("Missing source URL use --streann-url") return self.session.http.headers.update({"Referer": self.url}) # Get the query string encrypted_data = urlparse(self.url).query data = base64.b64decode(encrypted_data) # and decrypt it passphrase = self.passphrase() if passphrase: log.debug("Found passphrase") params = decrypt_openssl(data, passphrase) config = parse_qsd(params.decode("utf8")) log.trace(f"config: {config!r}") token = self.get_token(**config) if not token: return hls_url = self.stream_url.format(time=self.time, deviceId=self.device_id, token=token, **config) log.debug("URL={0}".format(hls_url)) return HLSStream.parse_variant_playlist( self.session, hls_url, acceptable_status=(200, 403, 404, 500))
def _get_streams(self): streamdata = None if self.get_option("email"): if self.login(self.get_option("email"), self.get_option("password")): log.info("Logged in as {0}".format(self.get_option("email"))) self.save_cookies(lambda c: "steamMachineAuth" in c.name) # Handle steam.tv URLs if self._steamtv_url_re.match(self.url) is not None: # extract the steam ID from the page res = self.session.http.get(self.url) for div in itertags(res.text, 'div'): if div.attributes.get("id") == "webui_config": broadcast_data = html_unescape( div.attributes.get("data-broadcast")) steamid = parse_json(broadcast_data).get("steamid") self.url = self._watch_broadcast_url + steamid # extract the steam ID from the URL steamid = self._url_re.match(self.url).group(1) res = self.session.http.get( self.url) # get the page to set some cookies sessionid = res.cookies.get('sessionid') while streamdata is None or streamdata[u"success"] in ( "waiting", "waiting_for_start"): streamdata = self._get_broadcast_stream(steamid, sessionid=sessionid) if streamdata[u"success"] == "ready": return DASHStream.parse_manifest(self.session, streamdata["url"]) elif streamdata[u"success"] == "unavailable": log.error("This stream is currently unavailable") return else: r = streamdata[u"retry"] / 1000.0 log.info( "Waiting for stream, will retry again in {} seconds...". format(r)) time.sleep(r)
def login_csrf(self): r = self.session.http.get(self.login_url) for input in itertags(r.text, "input"): if input.attributes.get("name") == self.CSRF_NAME: return input.attributes.get("value")
def _get_isvp_url(self): res = self.session.http.get(self.url) for iframe in itertags(res.text, 'iframe'): m = self.url_re.match(iframe.attributes.get('src')) return m and m.group(1) is not None and iframe.attributes.get('src')
def test_itertags_single_text(self): title = list(itertags(self.test_html, "title")) self.assertTrue(len(title), 1) self.assertEqual(title[0].tag, "title") self.assertEqual(title[0].text, "Title") self.assertEqual(title[0].attributes, {})
def test_tag_inner_tag(self): links = list(itertags(self.test_html, "p")) self.assertTrue(len(links), 1) self.assertEqual(links[0].tag, "p") self.assertEqual(links[0].text.strip(), '<a \nhref="http://test.se/foo">bar</a>') self.assertEqual(links[0].attributes, {})
def video_info(self): page = self.session.http.get(self.url) for div in itertags(page.text, 'div'): if div.attributes.get("id") == "video": return div.attributes
def test_multi_line_a(self): anchor = list(itertags(self.test_html, "a")) self.assertTrue(len(anchor), 1) self.assertEqual(anchor[0].tag, "a") self.assertEqual(anchor[0].text, "bar") self.assertEqual(anchor[0].attributes, {"href": "http://test.se/foo"})
def _get_iframe_url(self, url): res = self.session.http.get(url) for iframe in itertags(res.text, 'iframe'): src = iframe.attributes.get("src") if src: return src
def _resolve_stream(self): res = self.session.http.get(self.url) for video in itertags(res.text, 'video'): stream_url = video.attributes.get("data-stream") log.debug("Stream data: {0}".format(stream_url)) return HLSStream.parse_variant_playlist(self.session, stream_url)