def get(self, options): match = re.search(r'data-videoid="([^"]+)"', self.get_urldata()) if not match: parse = urlparse(self.url) match = re.search(r'video/(\d+)/', parse.fragment) if not match: log.error("Can't find video id") sys.exit(2) videoid = match.group(1) data = get_http_data("http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website" % videoid) jsondata = json.loads(data) if options.output_auto: directory = os.path.dirname(options.output) title = "%s" % jsondata["title"] title = filenamify(title) if len(directory): options.output = "%s/%s" % (directory, title) else: options.output = title if "hds" in jsondata["streamUrls"]: parse = urlparse(jsondata["streamUrls"]["hds"]) manifest = "%s://%s%s?%s&hdcore=3.3.0" % (parse.scheme, parse.netloc, parse.path, parse.query) streams = hdsparse(copy.copy(options), manifest) if streams: for n in list(streams.keys()): yield streams[n] if "hls" in jsondata["streamUrls"]: streams = hlsparse(jsondata["streamUrls"]["hls"]) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n) if "mp4" in jsondata["streamUrls"]: yield HTTP(copy.copy(options), jsondata["streamUrls"]["mp4"])
def _isswe(self, url): if re.search(".se$", urlparse(url).netloc): return "sasong" elif re.search(".dk$", urlparse(url).netloc): return "saeson" else: return "sesong"
def get(self, options): if re.findall("svt.se", self.url): match = re.search(r"data-json-href=\"(.*)\"", self.get_urldata()) if match: filename = match.group(1).replace("&", "&").replace("&format=json", "") url = "http://www.svt.se%s" % filename else: log.error("Can't find video file") sys.exit(2) else: url = self.url pos = url.find("?") if pos < 0: dataurl = "%s?&output=json&format=json" % url else: dataurl = "%s&output=json&format=json" % url data = json.loads(get_http_data(dataurl)) if "live" in data["video"]: options.live = data["video"]["live"] else: options.live = False if data["video"]["subtitleReferences"]: try: subtitle = data["video"]["subtitleReferences"][0]["url"] except KeyError: pass if len(subtitle) > 0: yield subtitle_wsrt(subtitle) for i in data["video"]["videoReferences"]: parse = urlparse(i["url"]) if parse.path.find("m3u8") > 0: streams = hlsparse(i["url"]) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n) elif parse.path.find("f4m") > 0: match = re.search(r"\/se\/secure\/", i["url"]) if not match: parse = urlparse(i["url"]) manifest = "%s://%s%s?%s&hdcore=3.3.0" % (parse.scheme, parse.netloc, parse.path, parse.query) streams = hdsparse(copy.copy(options), manifest) for n in list(streams.keys()): yield streams[n] elif parse.scheme == "rtmp": embedurl = "%s?type=embed" % url data = get_http_data(embedurl) match = re.search(r"value=\"(/(public)?(statiskt)?/swf(/video)?/svtplayer-[0-9\.a-f]+swf)\"", data) swf = "http://www.svtplay.se%s" % match.group(1) options.other = "-W %s" % swf yield RTMP(copy.copy(options), i["url"], i["bitrate"]) else: yield HTTP(copy.copy(options), i["url"], "0")
def find_all_episodes(self, options): episodes = [] matches = re.findall( r'<button class="show-more" data-url="([^"]+)" data-partial="([^"]+)"', self.get_urldata()) for encpath, enccomp in matches: newstyle = '_' in encpath if newstyle: encbasepath = encpath.split('_')[0] path = base64.b64decode(encbasepath + '===').decode('latin1') if is_py3 else\ base64.b64decode(encbasepath + '===') else: path = base64.b64decode(encpath + '===').decode('latin1') if is_py3 else\ base64.b64decode(encpath + '===') if '/view/' in path: continue params = 'offset=0&limit=1000' if newstyle: encparams = base64.b64encode(params.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \ base64.b64encode(params).rstrip('=') encpath = '%s_%s' % (encbasepath, encparams) else: path = '%s?%s' % (urlparse(path).path, params) encpath = base64.b64encode(path.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else\ base64.b64encode(path).rstrip('=') url = urljoin('https://www.dr.dk/tv/partial/', '%s/%s' % (enccomp, encpath)) data = self.http.request('get', url).content.decode('latin1') if is_py3 else\ self.http.request('get', url).content matches = re.findall(r'"program-link" href="([^"]+)">', data) episodes = [urljoin('https://www.dr.dk/', url) for url in matches] break if not episodes: prefix = '/'.join( urlparse(self.url).path.rstrip('/').split('/')[:-1]) matches = re.findall(r'"program-link" href="([^"]+)">', self.get_urldata()) episodes = [ urljoin('https://www.dr.dk/', url) for url in matches if url.startswith(prefix) ] if options.all_last != -1: episodes = episodes[:options.all_last] else: episodes.reverse() return episodes
def get(self, options): data = self.get_urldata() match = re.search('data-subtitlesurl = "(/.*)"', data) if match: parse = urlparse(self.url) subtitle = "%s://%s%s" % (parse.scheme, parse.netloc, match.group(1)) yield subtitle_tt(subtitle) match = re.search(r'data-media="(.*manifest.f4m)"', data) if match: manifest_url = match.group(1) else: match = re.search(r'data-video-id="(\d+)"', data) if match is None: log.error("Can't find video id.") sys.exit(2) vid = match.group(1) match = re.search(r"PS_VIDEO_API_URL : '([^']*)',", data) if match is None: log.error("Can't find server address with media info") sys.exit(2) dataurl = "%smediaelement/%s" % (match.group(1), vid) data = json.loads(get_http_data(dataurl)) manifest_url = data["mediaUrl"] options.live = data["isLive"] hlsurl = manifest_url.replace("/z/", "/i/").replace("manifest.f4m", "master.m3u8") streams = hlsparse(hlsurl) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n) manifest_url = "%s?hdcore=2.8.0&g=hejsan" % manifest_url streams = hdsparse(copy.copy(options), manifest_url) for n in list(streams.keys()): yield streams[n]
def find_all_episodes(self, options): episodes = [] match = re.search('"ContentPageProgramStore":({.*}),"ApplicationStore', self.get_urldata()) if match: janson = json.loads(match.group(1)) season = re.search("sasong-(\d+)", urlparse(self.url).path) if season: season = season.group(1) seasons = [] for i in janson["format"]["seasons"]: if season: if int(season) == i["seasonNumber"]: seasons.append(i["seasonNumber"]) else: seasons.append(i["seasonNumber"]) for i in seasons: if "program" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["program"]: episodes = self._videos_to_list(n["sharingUrl"],n["id"],episodes) if self.options.include_clips: if "clip" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["clip"]: episodes = self._videos_to_list(n["sharingUrl"],n["id"],episodes) if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def find_all_episodes(self, options): parse = urlparse(self.url) show = parse.path[parse.path.find("/", 1)+1:] if not re.search("%", show): show = quote_plus(show) error, data = get_http_data("http://webapi.tv4play.se/play/video_assets?type=episode&is_live=false&platform=web&node_nids=%s&per_page=99999" % show) if error: log.error("Can't get api page") return jsondata = json.loads(data) episodes = [] n = 1 for i in jsondata["results"]: try: days = int(i["availability"]["availability_group_free"]) except (ValueError, TypeError): days = 999 if days > 0: video_id = i["id"] url = "http://www.tv4play.se/program/%s?video_id=%s" % ( show, video_id) episodes.append(url) if n == options.all_last: break n += 1 return episodes
def get(self): parse = urlparse(self.url) try: other = parse.fragment except KeyError: log.error("Something wrong with that url") return if self.exclude(): yield ServiceError("Excluding video") return match = re.search("^/(.*).html", other) if not match: log.error("Cant find video file") return url = "http://www.hbo.com/data/content/{0}.xml".format(match.group(1)) data = self.http.request("get", url).content xml = ET.XML(data) videoid = xml.find("content")[1].find("videoId").text url = "http://render.cdn.hbo.com/data/content/global/videos/data/{0}.xml".format(videoid) data = self.http.request("get", url).content xml = ET.XML(data) ss = xml.find("videos") if is_py2_old: sa = list(ss.getiterator("size")) else: sa = list(ss.iter("size")) for i in sa: videourl = i.find("tv14").find("path").text match = re.search("/([a-z0-9]+:[a-z0-9]+)/", videourl) self.options.other = "-y {0}".format(videourl[videourl.index(match.group(1)):]) yield RTMP(copy.copy(self.options), videourl[:videourl.index(match.group(1))], i.attrib["width"])
def get(self, options): parse = urlparse(self.url) try: other = parse.fragment except KeyError: log.error("Something wrong with that url") sys.exit(2) match = re.search("^/(.*).html", other) if not match: log.error("Cant find video file") sys.exit(2) url = "http://www.hbo.com/data/content/%s.xml" % match.group(1) data = get_http_data(url) xml = ET.XML(data) videoid = xml.find("content")[1].find("videoId").text url = "http://render.cdn.hbo.com/data/content/global/videos/data/%s.xml" % videoid data = get_http_data(url) xml = ET.XML(data) ss = xml.find("videos") if is_py2_old: sa = list(ss.getiterator("size")) else: sa = list(ss.iter("size")) for i in sa: videourl = i.find("tv14").find("path").text match = re.search("/([a-z0-9]+:[a-z0-9]+)/", videourl) options.other = "-y %s" % videourl[videourl.index(match.group(1)):] yield RTMP(copy.copy(options), videourl[:videourl.index(match.group(1))], i.attrib["width"])
def findvid(url, data): parse = urlparse(url) if "tv4play.se" in url: try: vid = parse_qs(parse.query)["video_id"][0] except KeyError: return None else: match = re.search(r"\"vid\":\"(\d+)\",", data) if match: vid = match.group(1) else: match = re.search(r"-(\d+)$", url) if match: vid = match.group(1) else: match = re.search(r"meta content='([^']+)' property='og:video'", data) if match: match = re.search(r"vid=(\d+)&", match.group(1)) if match: vid = match.group(1) else: log.error("Can't find video id for %s", url) return else: return None return vid
def find_all_episodes(self, options): data = self.get_urldata() parse = urlparse(self.url) domain = re.search(r"(dplay\.\w\w)", parse.netloc).group(1) match = re.search('data-show-id="([^"]+)"', data) if not match: log.error("Cant find show id") return None premium = None if options.username and options.password: premium = self._login(options) url = "http://www.%s/api/v2/ajax/shows/%s/seasons/?items=9999999&sort=episode_number_desc&page=" % (domain, match.group(1)) episodes = [] page = 0 data = self.http.request("get", "%s%s" % (url, page)).text dataj = json.loads(data) for i in dataj["data"]: what = self._playable(dataj, premium) if what == 0: episodes.append(i["url"]) pages = dataj["total_pages"] for n in range(1, pages): data = self.http.request("get", "%s%s" % (url, n)).text dataj = json.loads(data) for i in dataj["data"]: what = self._playable(dataj, premium) if what == 0: episodes.append(i["url"]) if len(episodes) == 0: log.error("Cant find any playable files") if options.all_last > 0: return episodes[:options.all_last] return episodes
def hdsparse(options, data, manifest): streams = {} bootstrap = {} xml = ET.XML(data) if is_py2_old: bootstrapIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}media") else: bootstrapIter = xml.iter("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.iter("{http://ns.adobe.com/f4m/1.0}media") if xml.find("{http://ns.adobe.com/f4m/1.0}drmAdditionalHeader") is not None: log.error("HDS DRM protected content.") return for i in bootstrapIter: if "id" in i.attrib: bootstrap[i.attrib["id"]] = i.text else: bootstrap["0"] = i.text parse = urlparse(manifest) querystring = parse.query for i in mediaIter: if len(bootstrap) == 1: bootstrapid = bootstrap["0"] else: bootstrapid = bootstrap[i.attrib["bootstrapInfoId"]] streams[int(i.attrib["bitrate"])] = HDS(options, i.attrib["url"], i.attrib["bitrate"], manifest=manifest, bootstrap=bootstrapid, metadata=i.find("{http://ns.adobe.com/f4m/1.0}metadata").text, querystring=querystring) return streams
def _grab_episodes(self, options, seasons): episodes = [] baseurl = self.url match = re.search("(sasong|sesong)-\d+", urlparse(self.url).path) if match: baseurl = self.url[:self.url.rfind("/")] for i in seasons: url = "{0}/{1}-{2}".format(baseurl, self._isswe(self.url), i) res = self.http.get(url) if res: match = self._conentpage(res.text) if match: janson = json.loads(match.group(1)) if "program" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["program"]: episodes = self._videos_to_list( n["sharingUrl"], n["id"], episodes) if options.include_clips: if "clip" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str( i)]["clip"]: episodes = self._videos_to_list( n["sharingUrl"], n["id"], episodes) return episodes
def get(self): if self.exclude(): yield ServiceError("Excluding video") return match = re.search('iframe src="(/embed/[^"]+)"', self.get_urldata()) if not match: yield ServiceError("Cant find video") return parse = urlparse(self.url) url = "{0}://{1}{2}".format(parse.scheme, parse.netloc, match.group(1)) data = self.http.get(url) match = re.search('src="([^"]+vtt)"', data.text) if match: yield subtitle(copy.copy(self.options), "wrst", match.group(1)) match = re.search('source src="([^"]+)" type="application/x-mpegURL"', data.text) if not match: yield ServiceError("Cant find video file") return streams = hlsparse(self.options, self.http.request("get", match.group(1)), match.group(1)) for n in list(streams.keys()): yield streams[n]
def get_mediaid(self): match = re.search(r"mediaId = '([^']+)';", self.get_urldata()) if not match: match = re.search(r'media-id="([^"]+)"', self.get_urldata()) if not match: match = re.search(r'screen9-mid="([^"]+)"', self.get_urldata()) if not match: match = re.search(r'data-id="([^"]+)"', self.get_urldata()) if not match: match = re.search( 's.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', self.get_urldata()) if match: data = self.http.request("get", match.group(1)) match = re.search(r'mediaid": "([^"]+)"', data.text) if not match: match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata()) if match: url = "http:%s" % match.group(1) data = self.http.request("get", url) match = re.search(r"mediaid: '([^']+)'", data.text) if not match: urlp = urlparse(self.url) match = urlp.fragment return match
def hdsparse(options, res, manifest): streams = {} bootstrap = {} if res.status_code == 403: streams[0] = ServiceError("Can't read HDS playlist. permission denied") return streams xml = ET.XML(res.text) if is_py2_old: bootstrapIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}media") else: bootstrapIter = xml.iter("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.iter("{http://ns.adobe.com/f4m/1.0}media") if xml.find("{http://ns.adobe.com/f4m/1.0}drmAdditionalHeader") is not None: streams[0] = ServiceError("HDS DRM protected content.") return streams for i in bootstrapIter: if "id" in i.attrib: bootstrap[i.attrib["id"]] = i.text else: bootstrap["0"] = i.text parse = urlparse(manifest) querystring = parse.query manifest = "%s://%s%s" % (parse.scheme, parse.netloc, parse.path) for i in mediaIter: if len(bootstrap) == 1: bootstrapid = bootstrap["0"] else: bootstrapid = bootstrap[i.attrib["bootstrapInfoId"]] streams[int(i.attrib["bitrate"])] = HDS(copy.copy(options), i.attrib["url"], i.attrib["bitrate"], manifest=manifest, bootstrap=bootstrapid, metadata=i.find("{http://ns.adobe.com/f4m/1.0}metadata").text, querystring=querystring, cookies=res.cookies) return streams
def get_mediaid(self): match = re.search(r"mediaId = '([^']+)';", self.get_urldata()) if not match: match = re.search(r'media-id="([^"]+)"', self.get_urldata()) if not match: match = re.search(r'screen9-mid="([^"]+)"', self.get_urldata()) if not match: match = re.search(r'data-id="([^"]+)"', self.get_urldata()) if not match: match = re.search(r'data-videoid="([^"]+)"', self.get_urldata()) if not match: match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', self.get_urldata()) if match: data = self.http.request("get", match.group(1)) match = re.search(r'mediaid": "([^"]+)"', data.text) if not match: match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata()) if match: url = "http:%s" % match.group(1) data = self.http.request("get", url) match = re.search(r"mediaid: '([^']+)'", data.text) if not match: urlp = urlparse(self.url) match = urlp.fragment return match
def find_all_episodes(self, options): episodes = [] match = re.search('"ContentPageProgramStore":({.*}),"ApplicationStore', self.get_urldata()) if match: janson = json.loads(match.group(1)) season = re.search("sasong-(\d+)", urlparse(self.url).path) if season: season = season.group(1) seasons = [] for i in janson["format"]["seasons"]: if season: if int(season) == i["seasonNumber"]: seasons.append(i["seasonNumber"]) else: seasons.append(i["seasonNumber"]) for i in seasons: if "program" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["program"]: episodes = self._videos_to_list( n["sharingUrl"], n["id"], episodes) if self.options.include_clips: if "clip" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["clip"]: episodes = self._videos_to_list( n["sharingUrl"], n["id"], episodes) if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def get(self, options): error, data = self.get_urldata() if error: log.error("Can't download webpage") return if self.exclude(options): return parse = urlparse(self.url) vidoid = parse.path[parse.path.rfind("/")+1:] match = re.search(r'JSONdata = ({.*});', data) if not match: log.error("Cant find json data") return janson = json.loads(match.group(1)) playlist = janson["playlist"] for i in playlist: if i["brightcoveId"] == vidoid: if i["HLSURL"]: streams = hlsparse(i["HLSURL"]) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n) for n in i["renditions"]: if n["container"] == "MP4": yield HTTP(copy.copy(options), n["URL"], int(n["rate"])/1000)
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return apiurl = None match = re.search('data-player-config="([^"]+)"', data) if not match: match = re.search('data-svpPlayer-video="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return data = json.loads(decode_html_entities(match.group(1))) if urlparse(self.url).netloc == "tv.aftonbadet.se": videoId = data["playerOptions"]["id"] apiurl = data["playerOptions"]["api"] vendor = data["playerOptions"]["vendor"] self.options.live = data["live"] if not self.options.live: dataurl = "{0}{1}/assets/{2}?appName=svp-player".format( apiurl, vendor, videoId) data = self.http.request("get", dataurl).text data = json.loads(data) streams = hlsparse(self.options, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"]) if streams: for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return parse = urlparse(self.url) vidoid = parse.path[parse.path.rfind("/") + 1:] match = re.search(r'JSONdata = ({.*});', data) if not match: yield ServiceError("Cant find json data") return janson = json.loads(match.group(1)) playlist = janson["playlist"] for i in playlist: if i["brightcoveId"] == int(vidoid): if i["HLSURL"]: streams = hlsparse(self.options, self.http.request("get", i["HLSURL"]), i["HLSURL"]) for n in list(streams.keys()): yield streams[n] for n in i["renditions"]: if n["container"] == "MP4": yield HTTP(copy.copy(self.options), n["URL"], int(n["rate"]) / 1000)
def find_all_episodes(self, options): videos = [] match = re.search('"ContentPageProgramStore":({.*}),"ApplicationStore', self.get_urldata()) if match: janson = json.loads(match.group(1)) season = re.search("sasong-(\d+)", urlparse(self.url).path) if season: season = season.group(1) seasons = [] for i in janson["format"]["seasons"]: if season: if int(season) == i["seasonNumber"]: seasons.append(i["seasonNumber"]) else: seasons.append(i["seasonNumber"]) for i in seasons: if "program" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["program"]: videos.append(n["sharingUrl"]) if self.options.include_clips: if "clip" in janson["format"]["videos"][str(i)]: for n in janson["format"]["videos"][str(i)]["clip"]: videos.append(n["sharingUrl"]) n = 0 episodes = [] for i in videos: if n == options.all_last: break episodes.append(i) n += 1 return episodes
def findvid(url, data): parse = urlparse(url) if "tv4play.se" in url: try: vid = parse_qs(parse.query)["video_id"][0] except KeyError: return None else: match = re.search(r"\"vid\":\"(\d+)\",", data) if match: vid = match.group(1) else: match = re.search(r"-(\d+)$", url) if match: vid = match.group(1) else: match = re.search( r"meta content='([^']+)' property='og:video'", data) if match: match = re.search(r"vid=(\d+)&", match.group(1)) if match: vid = match.group(1) else: log.error("Can't find video id for %s", url) return else: return None return vid
def hdsparse(options, res, manifest): streams = {} bootstrap = {} if res.status_code == 403 or res.status_code == 404: streams[0] = ServiceError("Can't read HDS playlist.") return streams xml = ET.XML(res.text) if is_py2_old: bootstrapIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}media") else: bootstrapIter = xml.iter("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.iter("{http://ns.adobe.com/f4m/1.0}media") if xml.find("{http://ns.adobe.com/f4m/1.0}drmAdditionalHeader") is not None: streams[0] = ServiceError("HDS DRM protected content.") return streams for i in bootstrapIter: if "id" in i.attrib: bootstrap[i.attrib["id"]] = i.text else: bootstrap["0"] = i.text parse = urlparse(manifest) querystring = parse.query manifest = "%s://%s%s" % (parse.scheme, parse.netloc, parse.path) for i in mediaIter: bootstrapid = bootstrap[i.attrib["bootstrapInfoId"]] streams[int(i.attrib["bitrate"])] = HDS(copy.copy(options), i.attrib["url"], i.attrib["bitrate"], manifest=manifest, bootstrap=bootstrapid, metadata=i.find("{http://ns.adobe.com/f4m/1.0}metadata").text, querystring=querystring, cookies=res.cookies) return streams
def find_all_episodes(self, options): episodes = [] matches = re.findall(r'<button class="show-more" data-url="([^"]+)" data-partial="([^"]+)"', self.get_urldata()) for encpath, enccomp in matches: newstyle = '_' in encpath if newstyle: encbasepath = encpath.split('_')[0] path = base64.b64decode(encbasepath + '===').decode('latin1') if is_py3 else base64.b64decode(encbasepath + '===') else: path = base64.b64decode(encpath + '===').decode('latin1') if is_py3 else base64.b64decode(encpath + '===') if '/view/' in path: continue params = 'offset=0&limit=1000' if newstyle: encparams = base64.b64encode(params.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \ base64.b64encode(params).rstrip('=') encpath = '{0}_{1}'.format(encbasepath, encparams) else: path = '{0}?{1}'.format(urlparse(path).path, params) encpath = base64.b64encode(path.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \ base64.b64encode(path).rstrip('=') url = urljoin('https://www.dr.dk/tv/partial/', '{0}/{1}'.format(enccomp, encpath)) data = self.http.request('get', url).content.decode('latin1') if is_py3 else \ self.http.request('get', url).content matches = re.findall(r'"program-link" href="([^"]+)">', data) episodes = [urljoin('https://www.dr.dk/', url) for url in matches] break if not episodes: prefix = '/'.join(urlparse(self.url).path.rstrip('/').split('/')[:-1]) matches = re.findall(r'"program-link" href="([^"]+)">', self.get_urldata()) episodes = [urljoin('https://www.dr.dk/', url) for url in matches if url.startswith(prefix)] if options.all_last != -1: episodes = episodes[:options.all_last] else: episodes.reverse() return episodes
def _get_show_info(self): parse = urlparse(self.url) show = parse.path[parse.path.find("/", 1)+1:] if not re.search("%", show): show = quote_plus(show) data = self.http.request("get", "http://webapi.tv4play.se/play/video_assets?type=episode&is_live=false&platform=web&node_nids=%s&per_page=99999" % show).text jsondata = json.loads(data) return jsondata
def videos_to_list(self, lvideos, videos): for n in lvideos: parse = urlparse(n["contentUrl"]) if parse.path not in videos: filename = self.outputfilename(n, self.options.output) if not self.exclude2(filename): videos.append(parse.path) return videos
def _country2lang(self): parse = urlparse(self.url) domain = re.search(r"dplay\.(\w\w)", parse.netloc).group(1) country = {"se": "sv", "no": "no", "dk": "da"} if domain and domain in country: return country[domain] else: return "sv"
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": match = self.url else: match = re.search( r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match: match = match.group(1) if match is None: videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["videoTitlePage"]["relatedVideosTabs"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list( i["videos"], videos) else: if "sasong" in i["slug"] or "senast" in i["slug"]: videos = self.videos_to_list( i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list( i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def videos_to_list(self, lvideos, videos): url = self.url + "/" + str(lvideos["id"]) parse = urlparse(url) if parse.path not in videos: filename = self.outputfilename(lvideos, self.options.output) if not self.exclude2(filename): videos.append(parse.path) return videos
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search( 'src="(http://mm-resource-service.herokuapp.com[^"]*)"', data) if match: data = self.http.request("get", match.group(1)).text match = re.search('src="(https://[^"]+solidtango[^"]+)" ', data) if match: data = self.http.request("get", match.group(1)).text match = re.search(r'<title>(http[^<]+)</title>', data) if match: data = self.http.request("get", match.group(1)).text match = re.search('is_livestream: true', data) if match: self.options.live = True match = re.search('isLivestream: true', data) if match: self.options.live = True match = re.search('html5_source: "([^"]+)"', data) match2 = re.search('hlsURI: "([^"]+)"', data) if match: streams = hlsparse(self.options, self.http.request("get", match.group(1)), match.group(1)) for n in list(streams.keys()): yield streams[n] elif match2: streams = hlsparse(self.options, self.http.request("get", match2.group(1)), match2.group(1)) for n in list(streams.keys()): yield streams[n] else: parse = urlparse(self.url) url2 = "https://{0}/api/v1/play/{1}.xml".format( parse.netloc, parse.path[parse.path.rfind("/") + 1:]) data = self.http.request("get", url2) if data.status_code != 200: yield ServiceError( "Can't find video info. if there is a video on the page. its a bug." ) return xmldoc = data.text if is_py2 and isinstance(xmldoc, unicode): xmldoc = xmldoc.encode("utf8") xml = ET.XML(xmldoc) elements = xml.findall(".//manifest") streams = hlsparse(self.options, self.http.request("get", elements[0].text), elements[0].text) for n in list(streams.keys()): yield streams[n]
def get(self): parse = urlparse(self.url) if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se": if parse.path[:6] != "/video" and parse.path[:6] != "/klipp": yield ServiceError("This mode is not supported anymore. Need the url with the video.") return query = parse_qs(parse.query) self.access = None if "accessService" in query: self.access = query["accessService"] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "programTitle" not in janson["video"]: yield ServiceError("Can't find any video on that page.") return if self.access: for i in janson["video"]["versions"]: if i["accessService"] == self.access: url = urljoin("http://www.svtplay.se", i["contentUrl"]) res = self.http.get(url) match = re.search("__svtplay'] = ({.*});", res.text) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "live" in janson["video"]: self.options.live = janson["video"]["live"] if self.options.output_auto: self.options.service = "svtplay" self.options.output = self.outputfilename(janson["video"], self.options.output) if self.exclude(): yield ServiceError("Excluding video.") return if "programVersionId" in janson["video"]: vid = janson["video"]["programVersionId"] else: vid = janson["video"]["id"] res = self.http.get("http://api.svt.se/videoplayer-api/video/{0}".format(vid)) try: janson = res.json() except json.decoder.JSONDecodeError: yield ServiceError("Can't decode api request: {0}".format(res.request.url)) return videos = self._get_video(janson) for i in videos: yield i
def get(self): parse = urlparse(self.url) if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se": if parse.path[:6] != "/video" and parse.path[:6] != "/klipp": yield ServiceError( "This mode is not supported anymore. need the url with the video" ) return query = parse_qs(parse.query) self.access = None if "accessService" in query: self.access = query["accessService"] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if not match: yield ServiceError("Cant find video info.") return janson = json.loads(match.group(1))["videoPage"] if "programTitle" not in janson["video"]: yield ServiceError("Can't find any video on that page") return if self.access: for i in janson["video"]["versions"]: if i["accessService"] == self.access: url = urljoin("http://www.svtplay.se", i["contentUrl"]) res = self.http.get(url) match = re.search("__svtplay'] = ({.*});", res.text) if not match: yield ServiceError("Cant find video info.") return janson = json.loads(match.group(1))["videoPage"] if "live" in janson["video"]: self.options.live = janson["video"]["live"] if self.options.output_auto: self.options.service = "svtplay" self.options.output = self.outputfilename(janson["video"], self.options.output) if self.exclude(): yield ServiceError("Excluding video") return if "programVersionId" in janson["video"]: vid = janson["video"]["programVersionId"] else: vid = janson["video"]["id"] res = self.http.get( "http://api.svt.se/videoplayer-api/video/{0}".format(vid)) janson = res.json() videos = self._get_video(janson) for i in videos: yield i
def get(self, options): parse = urlparse(self.url) if parse.hostname == "video.disney.se": match = re.search(r"Grill.burger=({.*}):", self.get_urldata()) if not match: log.error("Can't find video info") return jsondata = json.loads(match.group(1)) for n in jsondata["stack"]: if len(n["data"]) > 0: for x in n["data"]: if "flavors" in x: for i in x["flavors"]: if i["format"] == "mp4": yield HTTP(copy.copy(options), i["url"], i["bitrate"]) else: match = re.search(r"uniqueId : '([^']+)'", self.get_urldata()) if not match: log.error("Can't find video info") return uniq = match.group(1) match = re.search("entryId : '([^']+)'", self.get_urldata()) entryid = match.group(1) match = re.search("partnerId : '([^']+)'", self.get_urldata()) partnerid = match.group(1) match = re.search("uiConfId : '([^']+)'", self.get_urldata()) uiconfid = match.group(1) url = "http://cdnapi.kaltura.com/html5/html5lib/v1.9.7.6/mwEmbedFrame.php?&wid=%s&uiconf_id=%s&entry_id=%s&playerId=%s&forceMobileHTML5=true&urid=1.9.7.6&callback=mwi" % \ (partnerid, uiconfid, entryid, uniq) data = get_http_data(url) match = re.search(r"mwi\(({.*})\);", data) jsondata = json.loads(match.group(1)) data = jsondata["content"] match = re.search(r"window.kalturaIframePackageData = ({.*});", data) jsondata = json.loads(match.group(1)) ks = jsondata["enviornmentConfig"]["ks"] if options.output_auto: name = jsondata["entryResult"]["meta"]["name"] directory = os.path.dirname(options.output) options.service = "disney" title = "%s-%s" % (name, options.service) title = filenamify(title) if len(directory): options.output = "%s/%s" % (directory, title) else: options.output = title url = "http://cdnapi.kaltura.com/p/%s/sp/%s00/playManifest/entryId/%s/format/applehttp/protocol/http/a.m3u8?ks=%s&referrer=aHR0cDovL3d3dy5kaXNuZXkuc2U=&" % ( partnerid[1:], partnerid[1:], entryid, ks) redirect = check_redirect(url) streams = hlsparse(redirect) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n)
def _get_showname(self, url): parse = urlparse(self.url) if parse.path.count("/") > 2: match = re.search("^/([^/]+)/", parse.path) show = match.group(1) else: show = parse.path[parse.path.find("/", 1)+1:] if not re.search("%", show): show = quote_plus(show) return show
def get_http_data(url, header=None, post=None, useragent=FIREFOX_UA, referer=None, cookiejar=None): """ Get the page to parse it for streams """ if not cookiejar: cookiejar = CookieJar() if url.find("manifest.f4m") > 0: parse = urlparse(url) url = "%s://%s%s?%s&hdcore=3.3.0" % (parse.scheme, parse.netloc, parse.path, parse.query) log.debug("HTTP getting %r", url) starttime = time.time() error = None if post: if is_py3: post = bytes(post, encoding="utf-8") request = Request(url, data=post) else: request = Request(url) standard_header = {'Referer': referer, 'User-Agent': useragent} for key, value in [head for head in standard_header.items() if head[1]]: request.add_header(key, value) if header: for key, value in [head for head in header.items() if head[1]]: request.add_header(key, value) opener = build_opener(HTTPCookieProcessor(cookiejar)) try: response = opener.open(request) except HTTPError as e: error = True data = e.read() return error, data if is_py3: data = response.read() try: data = data.decode("utf-8") except UnicodeDecodeError: pass else: try: data = response.read() except socket.error as e: return True, "Lost the connection to the server" response.close() spent_time = time.time() - starttime bps = 8 * len(data) / max(spent_time, 0.001) log.debug("HTTP got %d bytes from %r in %.2fs (= %dbps)", len(data), url, spent_time, bps) return error, data
def _get_video(self, janson): if "live" in janson: self.options.live = janson["live"] if "subtitleReferences" in janson: for i in janson["subtitleReferences"]: if i["format"] == "websrt" and "url" in i: yield subtitle(copy.copy(self.options), "wrst", i["url"]) if "videoReferences" in janson: if len(janson["videoReferences"]) == 0: yield ServiceError("Media doesn't have any associated videos (yet?)") return for i in janson["videoReferences"]: parse = urlparse(i["url"]) query = parse_qs(parse.query) if i["format"] == "hls": streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = hlsparse(self.options, self.http.request("get", alt.request.url), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n] if i["format"] == "hds": match = re.search(r"\/se\/secure\/", i["url"]) if not match: streams = hdsparse(self.options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = hdsparse(self.options, self.http.request("get", alt.request.url, params={"hdcore": "3.7.0"}), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n] if i["format"] == "dash264" or i["format"] == "dashhbbtv": streams = dashparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = dashparse(self.options, self.http.request("get", alt.request.url), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n]
def videos_to_list(self, lvideos, videos): for n in lvideos: parse = urlparse(n["contentUrl"]) if parse.path not in videos: filename = self.outputfilename(n, self.options.output) if not self.exclude2(filename): videos.append(parse.path) if "versions" in n: for i in n["versions"]: parse = urlparse(i["contentUrl"]) filename = "" # output is None here. if "accessService" in i: if i["accessService"] == "audioDescription": filename += "-syntolkat" if i["accessService"] == "signInterpretation": filename += "-teckentolkat" if not self.exclude2(filename) and parse.path not in videos: videos.append(parse.path) return videos
def get(self, options): vid = self._get_video_id() if vid is None: log.error("Cant find video file") sys.exit(2) url = "http://playapi.mtgx.tv/v3/videos/%s" % vid options.other = "" data = get_http_data(url) dataj = json.loads(data) if "msg" in dataj: log.error("%s" % dataj["msg"]) return if dataj["type"] == "live": options.live = True if dataj["sami_path"]: yield subtitle_sami(dataj["sami_path"]) streams = get_http_data("http://playapi.mtgx.tv/v3/videos/stream/%s" % vid) streamj = json.loads(streams) if "msg" in streamj: log.error( "Can't play this because the video is either not found or geoblocked." ) return if streamj["streams"]["medium"]: filename = streamj["streams"]["medium"] if filename[len(filename) - 3:] == "f4m": manifest = "%s?hdcore=2.8.0&g=hejsan" % filename streams = hdsparse(copy.copy(options), manifest) if streams: for n in list(streams.keys()): yield streams[n] else: parse = urlparse(filename) match = re.search("^(/[^/]+)/(.*)", parse.path) if not match: log.error("Somthing wrong with rtmpparse") sys.exit(2) filename = "%s://%s:%s%s" % (parse.scheme, parse.hostname, parse.port, match.group(1)) path = "-y %s" % match.group(2) options.other = "-W http://flvplayer.viastream.viasat.tv/flvplayer/play/swf/player.swf %s" % path yield RTMP(copy.copy(options), filename, 800) if streamj["streams"]["hls"]: streams = hlsparse(streamj["streams"]["hls"]) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n)
def get(self, options): parse = urlparse(self.url) if parse.hostname == "video.disney.se": match = re.search(r"Grill.burger=({.*}):", self.get_urldata()) if not match: log.error("Can't find video info") return jsondata = json.loads(match.group(1)) for n in jsondata["stack"]: if len(n["data"]) > 0: for x in n["data"]: if "flavors" in x: for i in x["flavors"]: if i["format"] == "mp4": yield HTTP(copy.copy(options), i["url"], i["bitrate"]) else: match = re.search(r"uniqueId : '([^']+)'", self.get_urldata()) if not match: log.error("Can't find video info") return uniq = match.group(1) match = re.search("entryId : '([^']+)'", self.get_urldata()) entryid = match.group(1) match = re.search("partnerId : '([^']+)'", self.get_urldata()) partnerid = match.group(1) match = re.search("uiConfId : '([^']+)'", self.get_urldata()) uiconfid = match.group(1) url = "http://cdnapi.kaltura.com/html5/html5lib/v1.9.7.6/mwEmbedFrame.php?&wid=%s&uiconf_id=%s&entry_id=%s&playerId=%s&forceMobileHTML5=true&urid=1.9.7.6&callback=mwi" % \ (partnerid, uiconfid, entryid, uniq) data = get_http_data(url) match = re.search(r"mwi\(({.*})\);", data) jsondata = json.loads(match.group(1)) data = jsondata["content"] match = re.search(r"window.kalturaIframePackageData = ({.*});", data) jsondata = json.loads(match.group(1)) ks = jsondata["enviornmentConfig"]["ks"] if options.output_auto: name = jsondata["entryResult"]["meta"]["name"] directory = os.path.dirname(options.output) options.service = "disney" title = "%s-%s" % (name, options.service) title = filenamify(title) if len(directory): options.output = "%s/%s" % (directory, title) else: options.output = title url = "http://cdnapi.kaltura.com/p/%s/sp/%s00/playManifest/entryId/%s/format/applehttp/protocol/http/a.m3u8?ks=%s&referrer=aHR0cDovL3d3dy5kaXNuZXkuc2U=&" % (partnerid[1:], partnerid[1:], entryid, ks) redirect = check_redirect(url) streams = hlsparse(redirect) for n in list(streams.keys()): yield HLS(copy.copy(options), streams[n], n)
def _get_showname(self, url): parse = urlparse(self.url) if parse.path.count("/") > 2: match = re.search("^/([^/]+)/", parse.path) show = match.group(1) else: show = parse.path[parse.path.find("/", 1) + 1:] if not re.search("%", show): if is_py2 and isinstance(show, unicode): show = show.encode("utf-8") show = quote_plus(show) return show
def handles(cls, url): urlp = urlparse(url) correctpath = urlp.path.startswith(cls.supported_path) if urlp.netloc in cls.supported_domains and correctpath: return True # For every listed domain, try with www. subdomain as well. if urlp.netloc in ['www.' + x for x in cls.supported_domains] and correctpath: return True return False
def _get_showname(self, url): parse = urlparse(self.url) if parse.path.count("/") > 2: match = re.search("^/([^/]+)/", parse.path) show = match.group(1) else: show = parse.path[parse.path.find("/", 1)+1:] if not re.search("%", show): if is_py2 and isinstance(show, unicode): show = show.encode("utf-8") show = quote_plus(show) return show
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": match = self.url else: match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match: match = match.group(1) if match is None: videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["videoTitlePage"]["relatedVideosTabs"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list(i["videos"], videos) else: if "sasong" in i["slug"] or "senast" in i["slug"]: videos = self.videos_to_list(i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list(i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def _get_video_id(self): """ Extract video id. It will try to avoid making an HTTP request if it can find the ID in the URL, but otherwise it will try to scrape it from the HTML document. Returns None in case it's unable to extract the ID at all. """ html_data = self.get_urldata() match = re.search(r'data-video-id="([0-9]+)"', html_data) if match: return match.group(1) match = re.search(r'data-videoid="([0-9]+)', html_data) if match: return match.group(1) clips = False match = re.search('params":({.*}),"query', self.get_urldata()) if match: jansson = json.loads(match.group(1)) season = jansson["seasonNumberOrVideoId"] if "videoIdOrEpisodeNumber" in jansson: videp = jansson["videoIdOrEpisodeNumber"] match = re.search('(episode|avsnitt)-(\d+)', videp) if match: episodenr = match.group(2) else: episodenr = videp clips = True else: episodenr = season if clips: return episodenr else: match = re.search('"ContentPageProgramStore":({.*}),"ApplicationStore', self.get_urldata()) if match: janson = json.loads(match.group(1)) for i in janson["format"]["videos"].keys(): for n in janson["format"]["videos"][i]["program"]: if str(n["episodeNumber"]) and int(episodenr) == n["episodeNumber"]: return n["id"] elif n["id"] == episodenr: return episodenr parse = urlparse(self.url) match = re.search(r'/\w+/(\d+)', parse.path) if match: return match.group(1) match = re.search(r'iframe src="http://play.juicyplay.se[^\"]+id=(\d+)', html_data) if match: return match.group(1) return None
def _login(self, options): parse = urlparse(self.url) domain = re.search(r"(dplay\.\w\w)", parse.netloc).group(1) data = self.http.request("get", "https://secure.%s/login/" % domain, cookies={}) options.cookies = data.cookies match = re.search('realm_code" value="([^"]+)"', data.text) postdata = {"username" : options.username, "password": options.password, "remember_me": "true", "realm_code": match.group(1)} data = self.http.request("post", "https://secure.%s/secure/api/v1/user/auth/login" % domain, data=postdata, cookies=options.cookies) if data.status_code == 200: options.cookies = data.cookies return True else: return False
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": match = self.url else: match = re.search( r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match: match = match.group(1) if match is None: videos = [] match = re.search('_svtplay"] = ({.*});', self.get_urldata()) if match: dataj = json.loads(match.group(1)) else: log.error("Couldn't retrieve episode list") return if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif re.search("/genre", parse.path): videos = self._genre(dataj) else: items = dataj["context"]["dispatcher"]["stores"][ "VideoTitlePageStore"]["data"]["relatedVideoTabs"] for i in items: if "sasong" in i["slug"]: for n in i["videos"]: if n["url"] not in videos: videos.append(n["url"]) if "senast" in i["slug"]: for n in i["videos"]: if n["url"] not in videos: videos.append(n["url"]) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] episodes_new = [] n = 1 for i in episodes: episodes_new.append(i) if n == options.all_last: break n += 1 return sorted(episodes_new)
def _genre(self, jansson): videos = [] parse = urlparse(self._url) dataj = jansson["clusterPage"] tab = re.search("tab=(.+)", parse.query) if tab: tab = tab.group(1) for i in dataj["tabs"]: if i["slug"] == tab: videos = self.videos_to_list(i["content"], videos) else: videos = self.videos_to_list(dataj["clips"], videos) return videos