def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None): streams = {} dirname = os.path.dirname(url) + "/" if baseurl: dirname = urljoin(dirname, baseurl) template = element[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") represtation = element[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation") for i in represtation: files = [] segments = False filename = dirname bitrate = int(i.attrib["bandwidth"]) / 1000 idnumber = i.attrib["id"] if i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None: filename = urljoin(filename, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) if i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentBase") is not None: segments = True files.append(filename) if template is not None: segments = True files = templateelemt(template, filename, idnumber, offset_sec, duration_sec) elif i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") is not None: segments = True files = templateelemt(i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber, offset_sec, duration_sec) if files: streams[bitrate] = {"segments": segments, "files": files} return streams
def parsesegments(content, url): media = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") if media is not None: scheme = media.attrib["media"] vinit = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") if vinit is not None: init = vinit.attrib["initialization"] nrofvideos = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]") selemtns = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S") total = 0 if nrofvideos: total = int(nrofvideos[0].attrib["r"]) + len(selemtns) + 1 time = False else: time = [] time.append(0) for i in selemtns: time.append(int(i.attrib["d"])) elements = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation") files = {} for i in elements: id = i.attrib["id"] segments = [] bitrate = int(i.attrib["bandwidth"]) if vinit is None: init = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate").attrib["initialization"] vidinit = init.replace("$RepresentationID$", id) if media is None: scheme = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate").attrib["media"] if "startNumber" in content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib: start = int(content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib["startNumber"]) else: start = 1 dirname = os.path.dirname(url) + "/" segments.append(urljoin(dirname, vidinit)) name = scheme.replace("$RepresentationID$", id) if "$Number" in name: match = re.search("\$Number(\%\d+)d\$", name) if match: vname = name.replace("$Number", "").replace("$", "") for n in range(start, start+total): segments.append(urljoin(dirname, vname % n)) else: #not format string for n in range(start, start + total): newname = name.replace("$Number$", str(n)) segments.append(urljoin(dirname, newname)) if "$Time$" in name: match = re.search("\$Time\$", name) if match: number = 0 for n in time: number += int(n) new = name.replace("$Time$", str(number)) segments.append(urljoin(dirname, new)) files[bitrate] = segments return files
def find_all_episodes(self, options): match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()[1]) if match is None: match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata()[1]) if not match: log.error("Couldn't retrieve episode list") return episodes = [urljoin("http://www.svtplay.se", x) for x in match] else: error, data = get_http_data(match.group(1)) if error: log.error("Cant get rss page") return xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] episodes_new = [] n = 1 for i in episodes: episodes_new.append(i) if n == options.all_last: break n += 1 return sorted(episodes_new)
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search('data-audio-type="publication" data-audio-id="(\d+)">', data) # Nyheter if match: dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={}&type={}&quality=high&format=iis".format(match.group(1), "publication") data = self.http.request("get", dataurl).text playerinfo = json.loads(data) yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128) return match = re.search(r'href="(/topsy/ljudfil/\d+-mp3)"', data) # Ladda ner if match: yield HTTP(copy.copy(self.options), urljoin("https://sverigesradio.se", match.group(1)), 128) return else: match = re.search('data-audio-type="secondary" data-audio-id="(\d+)"', data) # Ladda ner utan musik match2 = re.search('data-audio-type="episode" data-audio-id="(\d+)"', data) # Ladda ner med musik if match: aid = match.group(1) type = "secondary" elif match2: aid = match2.group(1) type = "episode" else: yield ServiceError("Can't find audio info") return dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={}&type={}&quality=high&format=iis".format(aid, type) data = self.http.request("get", dataurl).text playerinfo = json.loads(data) yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
def find_all_episodes(self, options): match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match is None: videos = [] match = re.search('_svtplay"] = ({.*});', self.get_urldata()) if match: dataj = json.loads(match.group(1)) items = dataj["context"]["dispatcher"]["stores"]["VideoTitlePageStore"]["data"]["relatedVideoTabs"] else: log.error("Couldn't retrieve episode list") return for i in items: if "sasong" in i["slug"]: for n in i["videos"]: videos.append(n["url"]) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match.group(1)).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] episodes_new = [] n = 1 for i in episodes: episodes_new.append(i) if n == options.all_last: break n += 1 return sorted(episodes_new)
def find_all_episodes(self, options): episodes = [] matches = re.findall(r'<button class="show-more" data-url="([^"]+)" data-partial="([^"]+)"', self.get_urldata()) for encpath, enccomp in matches: newstyle = '_' in encpath if newstyle: encbasepath = encpath.split('_')[0] path = base64.b64decode(encbasepath + '===').decode('latin1') if is_py3 else base64.b64decode(encbasepath + '===') else: path = base64.b64decode(encpath + '===').decode('latin1') if is_py3 else base64.b64decode(encpath + '===') if '/view/' in path: continue params = 'offset=0&limit=1000' if newstyle: encparams = base64.b64encode(params.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \ base64.b64encode(params).rstrip('=') encpath = '{0}_{1}'.format(encbasepath, encparams) else: path = '{0}?{1}'.format(urlparse(path).path, params) encpath = base64.b64encode(path.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \ base64.b64encode(path).rstrip('=') url = urljoin('https://www.dr.dk/tv/partial/', '{0}/{1}'.format(enccomp, encpath)) data = self.http.request('get', url).content.decode('latin1') if is_py3 else \ self.http.request('get', url).content matches = re.findall(r'"program-link" href="([^"]+)">', data) episodes = [urljoin('https://www.dr.dk/', url) for url in matches] break if not episodes: prefix = '/'.join(urlparse(self.url).path.rstrip('/').split('/')[:-1]) matches = re.findall(r'"program-link" href="([^"]+)">', self.get_urldata()) episodes = [urljoin('https://www.dr.dk/', url) for url in matches if url.startswith(prefix)] if options.all_last != -1: episodes = episodes[:options.all_last] else: episodes.reverse() return episodes
def templateelemt(element, filename, idnumber): files = [] init = element.attrib["initialization"] media = element.attrib["media"] if "startNumber" in element.attrib: start = int(element.attrib["startNumber"]) else: start = 0 timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline") rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]") selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S") selements.pop() if rvalue: total = int(rvalue[0].attrib["r"]) + len(selements) + 1 name = media.replace("$RepresentationID$", idnumber) files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber))) if "$Time$" in media: time = [] time.append(0) for n in selements: time.append(int(n.attrib["d"])) match = re.search("\$Time\$", name) if match: number = 0 if len(selements) < 3: for n in range(start, start + total): new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"]))) files.append(urljoin(filename, new)) else: for n in time: number += int(n) new = name.replace("$Time$", str(number)) files.append(urljoin(filename, new)) if "$Number" in name: if re.search("\$Number(\%\d+)d\$", name): vname = name.replace("$Number", "").replace("$", "") for n in range(start, start + total): files.append(urljoin(filename, vname % n)) else: for n in range(start, start + total): newname = name.replace("$Number$", str(n)) files.append(urljoin(filename, newname)) return files
def get(self): parse = urlparse(self.url) if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se": if parse.path[:6] != "/video" and parse.path[:6] != "/klipp": yield ServiceError("This mode is not supported anymore. Need the url with the video.") return query = parse_qs(parse.query) self.access = None if "accessService" in query: self.access = query["accessService"] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "programTitle" not in janson["video"]: yield ServiceError("Can't find any video on that page.") return if self.access: for i in janson["video"]["versions"]: if i["accessService"] == self.access: url = urljoin("http://www.svtplay.se", i["contentUrl"]) res = self.http.get(url) match = re.search("__svtplay'] = ({.*});", res.text) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "live" in janson["video"]: self.options.live = janson["video"]["live"] if self.options.output_auto: self.options.service = "svtplay" self.options.output = self.outputfilename(janson["video"], self.options.output) if self.exclude(): yield ServiceError("Excluding video.") return if "programVersionId" in janson["video"]: vid = janson["video"]["programVersionId"] else: vid = janson["video"]["id"] res = self.http.get("http://api.svt.se/videoplayer-api/video/{0}".format(vid)) try: janson = res.json() except json.decoder.JSONDecodeError: yield ServiceError("Can't decode api request: {0}".format(res.request.url)) return videos = self._get_video(janson) for i in videos: yield i
def scrape_episodes(self, options): res = [] for relurl in re.findall(r'<a class="puff tv video"\s+title="[^"]*"\s+href="([^"]*)"', self.get_urldata()): res.append(urljoin(self.url, relurl.replace("&", "&"))) if options.all_last != -1: res = res[-options.all_last :] return res
def dashparse(options, res, url): streams = {} if not res: return None if res.status_code >= 400: streams[0] = ServiceError("Can't read DASH playlist. {0}".format(res.status_code)) return streams xml = ET.XML(res.text) if "isoff-on-demand" in xml.attrib["profiles"]: try: baseurl = urljoin(url, xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) except AttributeError: streams[0] = ServiceError("Can't parse DASH playlist") return videofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation") audiofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation") for i in audiofiles: audiourl = urljoin(baseurl, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) audiobitrate = float(i.attrib["bandwidth"]) / 1000 for n in videofiles: bitrate = float(n.attrib["bandwidth"])/1000 + audiobitrate videourl = urljoin(baseurl, n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) options.other = "mp4" streams[int(bitrate)] = DASH(copy.copy(options), videourl, bitrate, cookies=res.cookies, audio=audiourl) if "isoff-live" in xml.attrib["profiles"]: video = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']") if len(video) == 0: video = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='video/mp4']") audio = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']") if len(audio) == 0: audio = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='audio/mp4']") videofiles = parsesegments(video, url) audiofiles = parsesegments(audio, url) for i in videofiles.keys(): bitrate = (int(i) + int(list(audiofiles.keys())[0])) /1000 options.other = "mp4" streams[int(bitrate)] = DASH(copy.copy(options), url, bitrate, cookies=res.cookies, audio=audiofiles[list(audiofiles.keys())[0]], files=videofiles[i]) return streams
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": match = self.url else: match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match: match = match.group(1) if match is None: videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["videoTitlePage"]["relatedVideosTabs"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list(i["videos"], videos) else: if "sasong" in i["slug"] or "senast" in i["slug"]: videos = self.videos_to_list(i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list(i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def dashparse(options, res, url): streams = {} if res.status_code == 403 or res.status_code == 404: streams[0] = ServiceError("Can't read DASH playlist. {0}".format(res.status_code)) return streams xml = ET.XML(res.text) baseurl = urljoin(url, xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) videofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation") audiofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation") for i in audiofiles: audiourl = urljoin(baseurl, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) audiobitrate = float(i.attrib["bandwidth"]) / 1000 for n in videofiles: bitrate = float(n.attrib["bandwidth"])/1000 + audiobitrate videourl = urljoin(baseurl, n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) options.other = "mp4" streams[int(bitrate)] = DASH(copy.copy(options), videourl, bitrate, cookies=res.cookies, audio=audiourl) return streams
def _get_full_url(url, srcurl): if url[:4] == 'http': return url if url[0] == '/': baseurl = re.search(r'^(http[s]{0,1}://[^/]+)/', srcurl) return "{0}{1}".format(baseurl.group(1), url) # remove everything after last / in the path of the URL baseurl = re.sub(r'^([^\?]+)/[^/]*(\?.*)?$', r'\1/', srcurl) returl = urljoin(baseurl, url) return returl
def get(self): data = self.get_urldata() match = re.search(r"urPlayer.init\((.*)\);", data) if not match: yield ServiceError("Can't find json info") return if self.exclude(): yield ServiceError("Excluding video") return data = match.group(1) jsondata = json.loads(data) if len(jsondata["subtitles"]) > 0: for sub in jsondata["subtitles"]: if "label" in sub: absurl = urljoin(self.url, sub["file"].split(",")[0]) if absurl.endswith("vtt"): subtype = "wrst" else: subtype = "tt" if self.options.get_all_subtitles: yield subtitle(copy.copy(self.options), subtype, absurl, "-" + filenamify(sub["label"])) else: yield subtitle(copy.copy(self.options), subtype, absurl) if "streamer" in jsondata["streaming_config"]: basedomain = jsondata["streaming_config"]["streamer"]["redirect"] else: url = jsondata["streaming_config"]["loadbalancer"] if url[:1] == "/": url = "https:{}".format(url) lbjson = self.http.request("get", url).text lbjson = json.loads(lbjson) basedomain = lbjson["redirect"] http = "https://{0}/{1}".format(basedomain, jsondata["file_http"]) hd = None if len(jsondata["file_http_hd"]) > 0: http_hd = "https://{0}/{1}".format(basedomain, jsondata["file_http_hd"]) hls_hd = "{0}{1}".format(http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"]) hd = True hls = "{0}{1}".format(http, jsondata["streaming_config"]["http_streaming"]["hls_file"]) streams = hlsparse(self.options, self.http.request("get", hls), hls) for n in list(streams.keys()): yield streams[n] if hd: streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd) for n in list(streams.keys()): yield streams[n]
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": match = self.url else: match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match: match = match.group(1) if match is None: videos = [] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: items = dataj["videoTitlePage"]["realatedVideosTabs"] for i in items: if "sasong" in i["slug"]: for n in i["videos"]: if n["url"] not in videos: videos.append(n["url"]) if "senast" in i["slug"]: for n in i["videos"]: if n["url"] not in videos: videos.append(n["url"]) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] else: data = self.http.request("get", match).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] episodes_new = [] n = 1 for i in episodes: episodes_new.append(i) if n == options.all_last: break n += 1 return sorted(episodes_new)
def find_all_episodes(self, options): parse = urlparse(self.url) episodes = [] if parse.netloc == "urskola.se": data = self.get_urldata() match = re.search('data-limit="[^"]+" href="([^"]+)"', data) if match: res = self.http.get(urljoin("https://urskola.se", match.group(1))) data = res.text tags = re.findall('<a class="puff program tv video" title="[^"]+" href="([^"]+)"', data) for i in tags: url = urljoin("https://urskola.se/", i) if url not in episodes: episodes.append(url) else: match = re.search("/program/\d+-(\w+)-", parse.path) if not match: log.error("Can't find any videos") return None keyword = match.group(1) all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata()) for i in all_links: match = re.search("/program/\d+-(\w+)-", i) if match and match.group(1) == keyword: episodes.append(urljoin("https://urplay.se/", i)) episodes_new = [] n = 0 for i in episodes: if n == options.all_last: break if i not in episodes_new: episodes_new.append(i) n += 1 return episodes_new
def get(self): if self.exclude(): yield ServiceError("Excluding video") return match = re.search("var initialMedia\s+= ({[^;]+);", self.get_urldata()) if not match: yield ServiceError("Cant find any media on that page") return janson = json.loads(match.group(1)) vid = janson["content_id"] if not janson["metaData"]: yield ServiceError("Can't find video on that page") return if "playbacks" in janson["metaData"]: for i in janson["metaData"]["playbacks"]: if "CLOUD" in i["name"]: streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] else: match = re.search("var mediaConfig\s+= ({[^;]+);", self.get_urldata()) if not match: yield ServiceError("Cant find any media on that page") return janson = json.loads(match.group(1)) try: apiurl = janson["vpm"]["mediaFramework"][ "mediaFrameworkDomain"] except KeyError: yield ServiceError("Can't find api url") return filename = "{0}?contentId={1}&playbackScenario=HTTP_CLOUD_WIRED_WEB&format=json&platform=WEB_MEDIAPLAYER&_=1487455224334".format( janson["vpm"]["mediaFramework"]["mediaFrameworkEndPoint"], vid) url = urljoin(apiurl, filename) res = self.http.get(url) janson = res.json() for i in janson["user_verified_event"][0]["user_verified_content"][ 0]["user_verified_media_item"]: streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n]
def find_all_episodes(self, options): episodes = [] token, message = self._login() if not token: log.error(message) return res = self.http.get(self.url) tags = re.findall('<a class="card__link" href="([^"]+)"', res.text) for i in tags: url = urljoin("https://www.cmore.{}/".format(self._gettld()), i) if url not in episodes: episodes.append(url) if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search( 'data-audio-type="publication" data-audio-id="(\d+)">', data) # Nyheter if match: dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={0}&type={1}&quality=high&format=iis".format( match.group(1), "publication") data = self.http.request("get", dataurl).text playerinfo = json.loads(data) yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128) return match = re.search(r'href="(/topsy/ljudfil/\d+-mp3)"', data) # Ladda ner if match: yield HTTP(copy.copy(self.options), urljoin("https://sverigesradio.se", match.group(1)), 128) return else: match = re.search( 'data-audio-type="secondary" data-audio-id="(\d+)"', data) # Ladda ner utan musik match2 = re.search( 'data-audio-type="episode" data-audio-id="(\d+)"', data) # Ladda ner med musik if match: aid = match.group(1) type = "secondary" elif match2: aid = match2.group(1) type = "episode" else: yield ServiceError("Can't find audio info") return dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={0}&type={1}&quality=high&format=iis".format( aid, type) data = self.http.request("get", dataurl).text playerinfo = json.loads(data) yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
def find_all_episodes(self, options): videos = [] match = re.search("__barnplay'] = ({.*});", self.get_urldata()) if not match: log.error("Couldn't retrieve episode list.") return else: dataj = json.loads(match.group(1)) dataj = dataj["context"]["dispatcher"]["stores"]["EpisodesStore"] showId = list(dataj["data"].keys())[0] items = dataj["data"][showId]["episodes"] for i in items: program = i videos = self.videos_to_list(program, videos) videos.reverse() episodes = [urljoin("http://www.svt.se", x) for x in videos] if options.all_last > 0: return episodes[-options.all_last:] return episodes
def get(self): if self.exclude(): yield ServiceError("Excluding video") return match = re.search("var initialMedia\s+= ({[^;]+);", self.get_urldata()) if not match: yield ServiceError("Cant find any media on that page") return janson = json.loads(match.group(1)) vid = janson["content_id"] if not janson["metaData"]: yield ServiceError("Can't find video on that page") return if "playbacks" in janson["metaData"]: for i in janson["metaData"]["playbacks"]: if "CLOUD" in i["name"]: streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] else: match = re.search("var mediaConfig\s+= ({[^;]+);", self.get_urldata()) if not match: yield ServiceError("Cant find any media on that page") return janson = json.loads(match.group(1)) try: apiurl = janson["vpm"]["mediaFramework"]["mediaFrameworkDomain"] except KeyError: yield ServiceError("Can't find api url") return filename = "{0}?contentId={1}&playbackScenario=HTTP_CLOUD_WIRED_WEB&format=json&platform=WEB_MEDIAPLAYER&_=1487455224334".format(janson["vpm"]["mediaFramework"]["mediaFrameworkEndPoint"], vid) url = urljoin(apiurl, filename) res = self.http.get(url) janson = res.json() for i in janson["user_verified_event"][0]["user_verified_content"][0]["user_verified_media_item"]: streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n]
def find_all_episodes(self, options): parse = urlparse(self._url) videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list.") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["relatedVideoContent"]["relatedVideosAccordion"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list(i["videos"], videos) else: if "klipp" not in i["slug"] and "kommande" not in i[ "slug"]: videos = self.videos_to_list(i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list(i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] if options.all_last > 0: return episodes[-options.all_last:] return episodes
def find_all_episodes(self, options): parse = urlparse(self._url) videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list.") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: query = parse_qs(parse.query) if "tab" in query: tab = query["tab"][0] if dataj["relatedVideoContent"]: items = dataj["relatedVideoContent"]["relatedVideosAccordion"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list(i["videos"], videos) else: if "klipp" not in i["slug"] and "kommande" not in i["slug"]: videos = self.videos_to_list(i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list(i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] if options.all_last > 0: return episodes[-options.all_last:] return episodes
def find_all_episodes(self, options): parse = urlparse(self.url) match = re.search("/program/\d+-(\w+)-", parse.path) if not match: log.error("Can't find any videos") return None keyword = match.group(1) episodes = [] all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata()) for i in all_links: match = re.search("/program/\d+-(\w+)-", i) if match and match.group(1) == keyword: episodes.append(urljoin("http://urplay.se/", i)) episodes_new = [] n = 0 for i in episodes: if n == options.all_last: break if i not in episodes_new: episodes_new.append(i) n += 1 return episodes_new
def find_all_episodes(self, options): match = re.search( r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if match is None: match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata()) if not match: log.error("Couldn't retrieve episode list") return episodes = [urljoin("http://www.svtplay.se", x) for x in match] else: data = self.http.request("get", match.group(1)).content xml = ET.XML(data) episodes = [x.text for x in xml.findall(".//item/link")] episodes_new = [] n = 1 for i in episodes: episodes_new.append(i) if n == options.all_last: break n += 1 return sorted(episodes_new)
def get(self): data = self.http.get("https://www.sportlib.se/sportlib/login").text match = re.search('src="(/app[^"]+)">', data) if not match: yield ServiceError("Can't find url for login info") return url = urljoin("https://www.sportlib.se", match.group(1)) data = self.http.get(url).text match = re.search('CLIENT_SECRET:"([^"]+)"', data) if not match: yield ServiceError("Cant fint login info") return cs = match.group(1) match = re.search('CLIENT_ID:"([^"]+)"', data) if not match: yield ServiceError("Cant fint login info") return cid = match.group(1) res = self.http.get("https://core.oz.com/channels?slug=sportlib&org=www.sportlib.se") janson = res.json() sid = janson["data"][0]["id"] data = {"client_id": cid, "client_secret": cs, "grant_type": "password", "username": self.options.username, "password": self.options.password} res = self.http.post("https://core.oz.com/oauth2/token?channelId={}".format(sid), data=data) if res.status_code > 200: yield ServiceError("Wrong username / password?") return janson = res.json() token_type = janson["token_type"].title() access_token = janson["access_token"] parse = urlparse(self.url) match = re.search("video/([-a-fA-F0-9]+)", parse.path) if not match: yield ServiceError("Cant find video id") return vid = match.group(1) headers = {"content-type": "application/json", "authorization": "{} {}".format(token_type, access_token)} url = "https://core.oz.com/channels/{}/videos/{}?include=collection,streamUrl".format(sid, vid) res = self.http.get(url, headers=headers) janson = res.json() cookiename = janson["data"]["streamUrl"]["cookieName"] token = janson["data"]["streamUrl"]["token"] hlsplaylist = janson["data"]["streamUrl"]["cdnUrl"] if self.options.output_auto: directory = os.path.dirname(self.options.output) title = filenamify(janson["data"]["title"]) if len(directory): self.options.output = os.path.join(directory, title) else: self.options.output = title # get cookie postjson = {"name": cookiename, "value": token} res = self.http.post("https://playlist.oz.com/cookie", json=postjson) cookies = res.cookies streams = hlsparse(self.options, self.http.request("get", hlsplaylist), hlsplaylist, keycookie=cookies) if streams: for n in list(streams.keys()): yield streams[n]
def get(self): parse = urlparse(self.url) if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se": if parse.path[:6] != "/video" and parse.path[:6] != "/klipp": yield ServiceError( "This mode is not supported anymore. Need the url with the video." ) return query = parse_qs(parse.query) self.access = None if "accessService" in query: self.access = query["accessService"] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "programTitle" not in janson["video"]: yield ServiceError("Can't find any video on that page.") return if self.access: for i in janson["video"]["versions"]: if i["accessService"] == self.access: url = urljoin("http://www.svtplay.se", i["contentUrl"]) res = self.http.get(url) match = re.search("__svtplay'] = ({.*});", res.text) if not match: yield ServiceError("Can't find video info.") return janson = json.loads(match.group(1))["videoPage"] if "live" in janson["video"]: self.options.live = janson["video"]["live"] if self.options.output_auto: self.options.service = "svtplay" self.options.output = self.outputfilename(janson["video"], self.options.output) if self.exclude(): yield ServiceError("Excluding video.") return if "programVersionId" in janson["video"]: vid = janson["video"]["programVersionId"] else: vid = janson["video"]["id"] res = self.http.get( "http://api.svt.se/videoplayer-api/video/{0}".format(vid)) try: janson = res.json() except json.decoder.JSONDecodeError: yield ServiceError("Can't decode api request: {0}".format( res.request.url)) return videos = self._get_video(janson) for i in videos: yield i
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": rss_url = self.url else: rss_url = re.search( r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if rss_url: rss_url = rss_url.group(1) valid_rss = False tab = None if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) #Clips and tab can not be used with RSS-feed if rss_url and not self.options.include_clips and not tab: rss_data = self.http.request("get", rss_url).content try: xml = ET.XML(rss_data) episodes = [x.text for x in xml.findall(".//item/link")] #TODO add better checks for valid RSS-feed here valid_rss = True except ET.ParseError: log.info( "Error parsing RSS-feed at %s, make sure it is a valid RSS-feed, will use other method to find episodes." % rss_url) else: #if either tab or include_clips is set remove rss.xml from url if set manually. if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": self._url = self.url.replace("rss.xml", "") if not valid_rss: videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list.") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["relatedVideoContent"][ "relatedVideosAccordion"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list( i["videos"], videos) else: if "klipp" not in i[ "slug"] and "kommande" not in i["slug"]: videos = self.videos_to_list( i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list( i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] if options.all_last > 0: return sorted(episodes[-options.all_last:]) return sorted(episodes)
def get(self): parse = urlparse(self.url) if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se": if parse.path[:6] != "/video" and parse.path[:6] != "/klipp": yield ServiceError( "This mode is not supported anymore. need the url with the video" ) return query = parse_qs(parse.query) self.access = None if "accessService" in query: self.access = query["accessService"] match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if not match: yield ServiceError("Cant find video info.") return janson = json.loads(match.group(1))["videoTitlePage"] if "programTitle" not in janson["video"]: yield ServiceError("Can't find any video on that page") return if self.access: for i in janson["video"]["versions"]: if i["accessService"] == self.access: url = urljoin("http://www.svtplay.se", i["contentUrl"]) res = self.http.get(url) match = re.search("__svtplay'] = ({.*});", res.text) if not match: yield ServiceError("Cant find video info.") return janson = json.loads(match.group(1))["videoTitlePage"] if "live" in janson["video"]: self.options.live = janson["video"]["live"] if self.options.output_auto: self.options.service = "svtplay" self.options.output = self.outputfilename(janson["video"], self.options.output) if self.exclude(): yield ServiceError("Excluding video") return if "programVersionId" in janson["video"]: vid = janson["video"]["programVersionId"] else: vid = janson["video"]["id"] res = self.http.get( "http://api.svt.se/videoplayer-api/video/{0}".format(vid)) janson = res.json() if "live" in janson: self.options.live = janson["live"] if "subtitleReferences" in janson: for i in janson["subtitleReferences"]: if i["format"] == "websrt" and "url" in i: yield subtitle(copy.copy(self.options), "wrst", i["url"]) if "videoReferences" in janson: if len(janson["videoReferences"]) == 0: yield ServiceError( "Media doesn't have any associated videos (yet?)") return for i in janson["videoReferences"]: parse = urlparse(i["url"]) query = parse_qs(parse.query) if i["format"] == "hls": streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = hlsparse( self.options, self.http.request("get", alt.request.url), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n] if i["format"] == "hds": match = re.search(r"\/se\/secure\/", i["url"]) if not match: streams = hdsparse( self.options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = hdsparse( self.options, self.http.request( "get", alt.request.url, params={"hdcore": "3.7.0"}), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n] if i["format"] == "dash264" or i["format"] == "dashhbbtv": streams = dashparse(self.options, self.http.request("get", i["url"]), i["url"]) if streams: for n in list(streams.keys()): yield streams[n] if "alt" in query and len(query["alt"]) > 0: alt = self.http.get(query["alt"][0]) if alt: streams = dashparse( self.options, self.http.request("get", alt.request.url), alt.request.url) if streams: for n in list(streams.keys()): yield streams[n]
def parsesegments(content, url): media = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") if media is not None: scheme = media.attrib["media"] vinit = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") if vinit is not None: init = vinit.attrib["initialization"] nrofvideos = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]") selemtns = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S") total = 0 if nrofvideos: total = int(nrofvideos[0].attrib["r"]) + len(selemtns) + 1 time = False else: time = [] time.append(0) for i in selemtns: time.append(int(i.attrib["d"])) elements = content[0].findall( ".//{urn:mpeg:dash:schema:mpd:2011}Representation") files = {} for i in elements: id = i.attrib["id"] segments = [] bitrate = int(i.attrib["bandwidth"]) if vinit is None: init = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate" ).attrib["initialization"] vidinit = init.replace("$RepresentationID$", id) if media is None: scheme = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate" ).attrib["media"] if "startNumber" in content[0].findall( ".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib: start = int(content[0].findall( ".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") [0].attrib["startNumber"]) else: start = 1 dirname = os.path.dirname(url) + "/" segments.append(urljoin(dirname, vidinit)) name = scheme.replace("$RepresentationID$", id) if "$Number" in name: match = re.search("\$Number(\%\d+)d\$", name) if match: vname = name.replace("$Number", "").replace("$", "") for n in range(start, start + total): segments.append(urljoin(dirname, vname % n)) else: #not format string for n in range(start, start + total): newname = name.replace("$Number$", str(n)) segments.append(urljoin(dirname, newname)) if "$Time$" in name: match = re.search("\$Time\$", name) if match: number = 0 for n in time: number += int(n) new = name.replace("$Time$", str(number)) segments.append(urljoin(dirname, new)) files[bitrate] = segments return files
def dashparse(options, res, url): streams = {} if not res: return None if res.status_code >= 400: streams[0] = ServiceError("Can't read DASH playlist. {0}".format( res.status_code)) return streams xml = ET.XML(res.text) if "isoff-on-demand" in xml.attrib["profiles"]: try: baseurl = urljoin( url, xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) except AttributeError: streams[0] = ServiceError("Can't parse DASH playlist") return videofiles = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation" ) audiofiles = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation" ) for i in audiofiles: audiourl = urljoin( baseurl, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) audiobitrate = float(i.attrib["bandwidth"]) / 1000 for n in videofiles: bitrate = float(n.attrib["bandwidth"]) / 1000 + audiobitrate videourl = urljoin( baseurl, n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text) options.other = "mp4" streams[int(bitrate)] = DASH(copy.copy(options), videourl, bitrate, cookies=res.cookies, audio=audiourl) if "isoff-live" in xml.attrib["profiles"]: video = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']" ) if len(video) == 0: video = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='video/mp4']" ) audio = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']" ) if len(audio) == 0: audio = xml.findall( ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='audio/mp4']" ) videofiles = parsesegments(video, url) audiofiles = parsesegments(audio, url) for i in videofiles.keys(): bitrate = (int(i) + int(list(audiofiles.keys())[0])) / 1000 options.other = "mp4" streams[int(bitrate)] = DASH(copy.copy(options), url, bitrate, cookies=res.cookies, audio=audiofiles[list( audiofiles.keys())[0]], files=videofiles[i]) return streams
def get(self): data = self.http.get("https://www.sportlib.se/sportlib/login").text match = re.search('src="(/app[^"]+)">', data) if not match: yield ServiceError("Can't find url for login info") return url = urljoin("https://www.sportlib.se", match.group(1)) data = self.http.get(url).text match = re.search('CLIENT_SECRET:"([^"]+)"', data) if not match: yield ServiceError("Cant fint login info") return cs = match.group(1) match = re.search('CLIENT_ID:"([^"]+)"', data) if not match: yield ServiceError("Cant fint login info") return cid = match.group(1) res = self.http.get( "https://core.oz.com/channels?slug=sportlib&org=www.sportlib.se") janson = res.json() sid = janson["data"][0]["id"] data = { "client_id": cid, "client_secret": cs, "grant_type": "password", "username": self.options.username, "password": self.options.password } res = self.http.post( "https://core.oz.com/oauth2/token?channelId={}".format(sid), data=data) if res.status_code > 200: yield ServiceError("Wrong username / password?") return janson = res.json() token_type = janson["token_type"].title() access_token = janson["access_token"] parse = urlparse(self.url) match = re.search("video/([-a-fA-F0-9]+)", parse.path) if not match: yield ServiceError("Cant find video id") return vid = match.group(1) headers = { "content-type": "application/json", "authorization": "{} {}".format(token_type, access_token) } url = "https://core.oz.com/channels/{}/videos/{}?include=collection,streamUrl".format( sid, vid) res = self.http.get(url, headers=headers) janson = res.json() cookiename = janson["data"]["streamUrl"]["cookieName"] token = janson["data"]["streamUrl"]["token"] hlsplaylist = janson["data"]["streamUrl"]["cdnUrl"] if self.options.output_auto: directory = os.path.dirname(self.options.output) title = filenamify(janson["data"]["title"]) if len(directory): self.options.output = os.path.join(directory, title) else: self.options.output = title # get cookie postjson = {"name": cookiename, "value": token} res = self.http.post("https://playlist.oz.com/cookie", json=postjson) cookies = res.cookies streams = hlsparse(self.options, self.http.request("get", hlsplaylist), hlsplaylist, keycookie=cookies) if streams: for n in list(streams.keys()): yield streams[n]
def find_all_episodes(self, options): parse = urlparse(self._url) if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": rss_url = self.url else: rss_url = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata()) if rss_url: rss_url = rss_url.group(1) valid_rss = False tab = None if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) #Clips and tab can not be used with RSS-feed if rss_url and not self.options.include_clips and not tab: rss_data = self.http.request("get", rss_url).content try: xml = ET.XML(rss_data) episodes = [x.text for x in xml.findall(".//item/link")] #TODO add better checks for valid RSS-feed here valid_rss = True except ET.ParseError: log.info("Error parsing RSS-feed at {0}, make sure it is a valid RSS-feed, will use other method to find episodes.".format(rss_url)) else: #if either tab or include_clips is set remove rss.xml from url if set manually. if len(parse.path) > 7 and parse.path[-7:] == "rss.xml": self._url = self.url.replace("rss.xml","") if not valid_rss: videos = [] tab = None match = re.search("__svtplay'] = ({.*});", self.get_urldata()) if re.search("sista-chansen", parse.path): videos = self._last_chance(videos, 1) elif not match: log.error("Couldn't retrieve episode list.") return else: dataj = json.loads(match.group(1)) if re.search("/genre", parse.path): videos = self._genre(dataj) else: if parse.query: match = re.search("tab=(.+)", parse.query) if match: tab = match.group(1) items = dataj["relatedVideoContent"]["relatedVideosAccordion"] for i in items: if tab: if i["slug"] == tab: videos = self.videos_to_list(i["videos"], videos) else: if "klipp" not in i["slug"] and "kommande" not in i["slug"]: videos = self.videos_to_list(i["videos"], videos) if self.options.include_clips: if i["slug"] == "klipp": videos = self.videos_to_list(i["videos"], videos) episodes = [urljoin("http://www.svtplay.se", x) for x in videos] if options.all_last > 0: return sorted(episodes)[-options.all_last:] return sorted(episodes)
def templateelemt(element, filename, idnumber, offset_sec, duration_sec): files = [] timescale = 1 duration = 1 total = 1 init = element.attrib["initialization"] media = element.attrib["media"] if "startNumber" in element.attrib: start = int(element.attrib["startNumber"]) else: start = 1 if "timescale" in element.attrib: timescale = float(element.attrib["timescale"]) if "duration" in element.attrib: duration = float(element.attrib["duration"]) if offset_sec is not None and duration_sec is None: start += int(offset_sec / (duration / timescale)) if duration_sec is not None: total = int(duration_sec / (duration / timescale)) selements = None rvalue = None timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline") if timeline is not None: rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]") selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S") selements.pop() if rvalue: total = int(rvalue[0].attrib["r"]) + len(selements) + 1 name = media.replace("$RepresentationID$", idnumber) files.append( urljoin(filename, init.replace("$RepresentationID$", idnumber))) if "$Time$" in media: time = [0] for n in selements: time.append(int(n.attrib["d"])) match = re.search("\$Time\$", name) if match: if len(selements) < 3: for n in range(start, start + total): new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"]))) files.append(urljoin(filename, new)) else: number = 0 for n in time: number += n new = name.replace("$Time$", str(number)) files.append(urljoin(filename, new)) if "$Number" in name: if re.search("\$Number(\%\d+)d\$", name): vname = name.replace("$Number", "").replace("$", "") for n in range(start, start + total): files.append(urljoin(filename, vname % n)) else: for n in range(start, start + total): newname = name.replace("$Number$", str(n)) files.append(urljoin(filename, newname)) return files
def templateelemt(element, filename, idnumber, offset_sec, duration_sec): files = [] timescale = 1 duration = 1 total = 1 init = element.attrib["initialization"] media = element.attrib["media"] if "startNumber" in element.attrib: start = int(element.attrib["startNumber"]) else: start = 1 if "timescale" in element.attrib: timescale = float(element.attrib["timescale"]) if "duration" in element.attrib: duration = float(element.attrib["duration"]) if offset_sec is not None and duration_sec is None: start += int(offset_sec / ( duration / timescale )) if duration_sec is not None: total = int(duration_sec / ( duration / timescale )) selements = None rvalue = None timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline") if timeline is not None: rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]") selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S") selements.pop() if rvalue: total = int(rvalue[0].attrib["r"]) + len(selements) + 1 name = media.replace("$RepresentationID$", idnumber) files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber))) if "$Time$" in media: time = [0] for n in selements: time.append(int(n.attrib["d"])) match = re.search("\$Time\$", name) if rvalue and match and len(selements) < 3: for n in range(start, start + total): new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"]))) files.append(urljoin(filename, new)) else: number = 0 for n in time: number += n new = name.replace("$Time$", str(number)) files.append(urljoin(filename, new)) if "$Number" in name: if re.search("\$Number(\%\d+)d\$", name): vname = name.replace("$Number", "").replace("$", "") for n in range(start, start + total): files.append(urljoin(filename, vname % n)) else: for n in range(start, start + total): newname = name.replace("$Number$", str(n)) files.append(urljoin(filename, newname)) return files