Beispiel #1
0
def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None):
    streams = {}

    dirname = os.path.dirname(url) + "/"
    if baseurl:
        dirname = urljoin(dirname, baseurl)

    template = element[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
    represtation = element[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation")

    for i in represtation:
        files = []
        segments = False
        filename = dirname
        bitrate = int(i.attrib["bandwidth"]) / 1000
        idnumber = i.attrib["id"]

        if i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None:
            filename = urljoin(filename, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)

        if i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentBase") is not None:
            segments = True
            files.append(filename)
        if template is not None:
            segments = True
            files = templateelemt(template, filename, idnumber, offset_sec, duration_sec)
        elif i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") is not None:
            segments = True
            files = templateelemt(i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber, offset_sec, duration_sec)

        if files:
            streams[bitrate] = {"segments": segments, "files": files}

    return streams
Beispiel #2
0
def parsesegments(content, url):
    media = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
    if media is not None:
        scheme = media.attrib["media"]
    vinit = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
    if vinit is not None:
        init = vinit.attrib["initialization"]
    nrofvideos = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
    selemtns = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
    total = 0
    if nrofvideos:
        total = int(nrofvideos[0].attrib["r"]) + len(selemtns) + 1
        time = False
    else:
        time = []
        time.append(0)
        for i in selemtns:
            time.append(int(i.attrib["d"]))
    elements = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation")
    files = {}
    for i in elements:
        id = i.attrib["id"]
        segments = []
        bitrate = int(i.attrib["bandwidth"])
        if vinit is None:
            init = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate").attrib["initialization"]
        vidinit = init.replace("$RepresentationID$", id)
        if media is None:
            scheme = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate").attrib["media"]
        if "startNumber" in content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib:
            start = int(content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib["startNumber"])
        else:
            start = 1
        dirname = os.path.dirname(url) + "/"
        segments.append(urljoin(dirname, vidinit))
        name = scheme.replace("$RepresentationID$", id)
        if "$Number" in name:
            match = re.search("\$Number(\%\d+)d\$", name)
            if match:
                vname = name.replace("$Number", "").replace("$", "")
                for n in range(start, start+total):
                    segments.append(urljoin(dirname, vname % n))
            else:
                #not format string
                for n in range(start, start + total):
                    newname = name.replace("$Number$", str(n))
                    segments.append(urljoin(dirname, newname))
        if "$Time$" in name:
            match = re.search("\$Time\$", name)
            if match:
                number = 0
                for n in time:
                    number += int(n)
                    new = name.replace("$Time$", str(number))
                    segments.append(urljoin(dirname, new))
        files[bitrate] = segments
    return files
Beispiel #3
0
    def find_all_episodes(self, options):
        match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata()[1])
        if match is None:
            match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata()[1])
            if not match:
                log.error("Couldn't retrieve episode list")
                return
            episodes = [urljoin("http://www.svtplay.se", x) for x in match]
        else:
            error, data = get_http_data(match.group(1))
            if error:
                log.error("Cant get rss page")
                return
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)
Beispiel #4
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search('data-audio-type="publication" data-audio-id="(\d+)">', data)  # Nyheter
        if match:
            dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={}&type={}&quality=high&format=iis".format(match.group(1), "publication")
            data = self.http.request("get", dataurl).text
            playerinfo = json.loads(data)
            yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
            return
        match = re.search(r'href="(/topsy/ljudfil/\d+-mp3)"', data)  # Ladda ner
        if match:
            yield HTTP(copy.copy(self.options), urljoin("https://sverigesradio.se", match.group(1)), 128)
            return
        else:
            match = re.search('data-audio-type="secondary" data-audio-id="(\d+)"', data) # Ladda ner utan musik
            match2 = re.search('data-audio-type="episode" data-audio-id="(\d+)"', data) # Ladda ner med musik
            if match:
                aid = match.group(1)
                type = "secondary"
            elif match2:
                aid = match2.group(1)
                type = "episode"
            else:
                yield ServiceError("Can't find audio info")
                return

        dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={}&type={}&quality=high&format=iis".format(aid, type)
        data = self.http.request("get", dataurl).text
        playerinfo = json.loads(data)
        yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
Beispiel #5
0
    def find_all_episodes(self, options):
        match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
        if match is None:
            videos = []
            match = re.search('_svtplay"] = ({.*});', self.get_urldata())
            if match:
                dataj = json.loads(match.group(1))
                items = dataj["context"]["dispatcher"]["stores"]["VideoTitlePageStore"]["data"]["relatedVideoTabs"]
            else:
                log.error("Couldn't retrieve episode list")
                return
            for i in items:
                if "sasong" in i["slug"]:
                    for n in i["videos"]:
                        videos.append(n["url"])
            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]
        else:
            data = self.http.request("get", match.group(1)).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)
Beispiel #6
0
    def find_all_episodes(self, options):
        episodes = []
        matches = re.findall(r'<button class="show-more" data-url="([^"]+)" data-partial="([^"]+)"',
                             self.get_urldata())
        for encpath, enccomp in matches:
            newstyle = '_' in encpath
            if newstyle:
                encbasepath = encpath.split('_')[0]
                path = base64.b64decode(encbasepath + '===').decode('latin1') if is_py3 else base64.b64decode(encbasepath + '===')
            else:
                path = base64.b64decode(encpath + '===').decode('latin1') if is_py3 else base64.b64decode(encpath + '===')

            if '/view/' in path:
                continue

            params = 'offset=0&limit=1000'
            if newstyle:
                encparams = base64.b64encode(params.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \
                    base64.b64encode(params).rstrip('=')
                encpath = '{0}_{1}'.format(encbasepath, encparams)
            else:
                path = '{0}?{1}'.format(urlparse(path).path, params)
                encpath = base64.b64encode(path.encode('latin1')).decode('latin1').rstrip('=') if is_py3 else \
                    base64.b64encode(path).rstrip('=')

            url = urljoin('https://www.dr.dk/tv/partial/',
                          '{0}/{1}'.format(enccomp, encpath))
            data = self.http.request('get', url).content.decode('latin1') if is_py3 else \
                self.http.request('get', url).content

            matches = re.findall(r'"program-link" href="([^"]+)">', data)
            episodes = [urljoin('https://www.dr.dk/', url) for url in matches]
            break

        if not episodes:
            prefix = '/'.join(urlparse(self.url).path.rstrip('/').split('/')[:-1])
            matches = re.findall(r'"program-link" href="([^"]+)">', self.get_urldata())
            episodes = [urljoin('https://www.dr.dk/', url)
                        for url in matches
                        if url.startswith(prefix)]

        if options.all_last != -1:
            episodes = episodes[:options.all_last]
        else:
            episodes.reverse()

        return episodes
Beispiel #7
0
def templateelemt(element, filename, idnumber):
    files = []

    init = element.attrib["initialization"]
    media = element.attrib["media"]
    if "startNumber" in element.attrib:
        start = int(element.attrib["startNumber"])
    else:
        start = 0
    timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline")
    rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
    selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
    selements.pop()
    if rvalue:
        total = int(rvalue[0].attrib["r"]) + len(selements) + 1

    name = media.replace("$RepresentationID$", idnumber)
    files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber)))

    if "$Time$" in media:
        time = []
        time.append(0)
        for n in selements:
            time.append(int(n.attrib["d"]))
        match = re.search("\$Time\$", name)
        if match:
            number = 0
            if len(selements) < 3:
                for n in range(start, start + total):
                    new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"])))
                    files.append(urljoin(filename, new))
            else:
                for n in time:
                    number += int(n)
                    new = name.replace("$Time$", str(number))
                    files.append(urljoin(filename, new))
    if "$Number" in name:
        if re.search("\$Number(\%\d+)d\$", name):
            vname = name.replace("$Number", "").replace("$", "")
            for n in range(start, start + total):
                files.append(urljoin(filename, vname % n))
        else:
            for n in range(start, start + total):
                newname = name.replace("$Number$", str(n))
                files.append(urljoin(filename, newname))
    return files
Beispiel #8
0
    def get(self):
        parse = urlparse(self.url)
        if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se":
            if parse.path[:6] != "/video" and parse.path[:6] != "/klipp":
                yield ServiceError("This mode is not supported anymore. Need the url with the video.")
                return

        query = parse_qs(parse.query)
        self.access = None
        if "accessService" in query:
            self.access = query["accessService"]

        match = re.search("__svtplay'] = ({.*});", self.get_urldata())
        if not match:
            yield ServiceError("Can't find video info.")
            return
        janson = json.loads(match.group(1))["videoPage"]

        if "programTitle" not in janson["video"]:
            yield ServiceError("Can't find any video on that page.")
            return

        if self.access:
            for i in janson["video"]["versions"]:
                if i["accessService"] == self.access:
                    url = urljoin("http://www.svtplay.se", i["contentUrl"])
                    res = self.http.get(url)
                    match = re.search("__svtplay'] = ({.*});", res.text)
                    if not match:
                        yield ServiceError("Can't find video info.")
                        return
                    janson = json.loads(match.group(1))["videoPage"]

        if "live" in janson["video"]:
            self.options.live = janson["video"]["live"]

        if self.options.output_auto:
            self.options.service = "svtplay"
            self.options.output = self.outputfilename(janson["video"], self.options.output)

        if self.exclude():
            yield ServiceError("Excluding video.")
            return

        if "programVersionId" in janson["video"]:
            vid = janson["video"]["programVersionId"]
        else:
            vid = janson["video"]["id"]
        res = self.http.get("http://api.svt.se/videoplayer-api/video/{0}".format(vid))
        try:
            janson = res.json()
        except json.decoder.JSONDecodeError:
            yield ServiceError("Can't decode api request: {0}".format(res.request.url))
            return
        videos = self._get_video(janson)
        for i in videos:
            yield i
Beispiel #9
0
    def scrape_episodes(self, options):
        res = []
        for relurl in re.findall(r'<a class="puff tv video"\s+title="[^"]*"\s+href="([^"]*)"', self.get_urldata()):
            res.append(urljoin(self.url, relurl.replace("&amp;", "&")))

        if options.all_last != -1:
            res = res[-options.all_last :]

        return res
Beispiel #10
0
def dashparse(options, res, url):
    streams = {}

    if not res:
        return None

    if res.status_code >= 400:
        streams[0] = ServiceError("Can't read DASH playlist. {0}".format(res.status_code))
        return streams
    xml = ET.XML(res.text)
    if "isoff-on-demand" in xml.attrib["profiles"]:
        try:
            baseurl = urljoin(url, xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
        except AttributeError:
            streams[0] = ServiceError("Can't parse DASH playlist")
            return
        videofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation")
        audiofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation")
        for i in audiofiles:
            audiourl = urljoin(baseurl, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
            audiobitrate = float(i.attrib["bandwidth"]) / 1000
            for n in videofiles:
                bitrate = float(n.attrib["bandwidth"])/1000 + audiobitrate
                videourl = urljoin(baseurl, n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
                options.other = "mp4"
                streams[int(bitrate)] = DASH(copy.copy(options), videourl, bitrate, cookies=res.cookies, audio=audiourl)
    if "isoff-live" in xml.attrib["profiles"]:
        video = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']")
        if len(video) == 0:
            video = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='video/mp4']")
        audio = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']")
        if len(audio) == 0:
            audio = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='audio/mp4']")
        videofiles = parsesegments(video, url)
        audiofiles = parsesegments(audio, url)
        for i in videofiles.keys():
            bitrate = (int(i) + int(list(audiofiles.keys())[0])) /1000
            options.other = "mp4"
            streams[int(bitrate)] = DASH(copy.copy(options), url, bitrate, cookies=res.cookies, audio=audiofiles[list(audiofiles.keys())[0]], files=videofiles[i])

    return streams
Beispiel #11
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)
        
        if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
            match = self.url
        else:
            match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                              self.get_urldata())
            if match:
                match = match.group(1)
            
        if match is None:
            videos = []
            tab = None
            match = re.search("__svtplay'] = ({.*});", self.get_urldata())
            if re.search("sista-chansen", parse.path):
                videos = self._last_chance(videos, 1)
            elif not match:
                log.error("Couldn't retrieve episode list")
                return
            else:
                dataj = json.loads(match.group(1))
                if re.search("/genre", parse.path):
                    videos = self._genre(dataj)
                else:
                    if parse.query: 
                        match = re.search("tab=(.+)", parse.query)
                        if match:
                            tab = match.group(1)

                    items = dataj["videoTitlePage"]["relatedVideosTabs"]
                    for i in items:
                        if tab:
                            if i["slug"] == tab:
                                videos = self.videos_to_list(i["videos"], videos)

                        else:
                            if "sasong" in i["slug"] or "senast" in i["slug"]:
                                videos = self.videos_to_list(i["videos"], videos)

                        if self.options.include_clips: 
                            if i["slug"] == "klipp":
                                videos = self.videos_to_list(i["videos"], videos)

            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]
        else:
            data = self.http.request("get", match).content
            xml = ET.XML(data)
            episodes = [x.text for x in xml.findall(".//item/link")]
            
        if options.all_last > 0:
            return sorted(episodes[-options.all_last:])
        return sorted(episodes)
Beispiel #12
0
def dashparse(options, res, url):
    streams = {}

    if res.status_code == 403 or res.status_code == 404:
        streams[0] = ServiceError("Can't read DASH playlist. {0}".format(res.status_code))
        return streams
    xml = ET.XML(res.text)
    baseurl = urljoin(url, xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
    videofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation")

    audiofiles = xml.findall(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation")
    for i in audiofiles:
        audiourl = urljoin(baseurl, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
        audiobitrate = float(i.attrib["bandwidth"]) / 1000
        for n in videofiles:
            bitrate = float(n.attrib["bandwidth"])/1000 + audiobitrate
            videourl = urljoin(baseurl, n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
            options.other = "mp4"
            streams[int(bitrate)] = DASH(copy.copy(options), videourl, bitrate, cookies=res.cookies, audio=audiourl)

    return streams
Beispiel #13
0
def _get_full_url(url, srcurl):
    if url[:4] == 'http':
        return url
    if url[0] == '/':
        baseurl = re.search(r'^(http[s]{0,1}://[^/]+)/', srcurl)
        return "{0}{1}".format(baseurl.group(1), url)

    # remove everything after last / in the path of the URL
    baseurl = re.sub(r'^([^\?]+)/[^/]*(\?.*)?$', r'\1/', srcurl)
    returl = urljoin(baseurl, url)

    return returl
Beispiel #14
0
    def get(self):
        data = self.get_urldata()
        match = re.search(r"urPlayer.init\((.*)\);", data)
        if not match:
            yield ServiceError("Can't find json info")
            return

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        data = match.group(1)
        jsondata = json.loads(data)
        if len(jsondata["subtitles"]) > 0:
            for sub in jsondata["subtitles"]:
                if "label" in sub:
                    absurl = urljoin(self.url, sub["file"].split(",")[0])
                    if absurl.endswith("vtt"):
                        subtype = "wrst"
                    else:
                        subtype = "tt"
                    if self.options.get_all_subtitles:
                        yield subtitle(copy.copy(self.options), subtype, absurl, "-" + filenamify(sub["label"]))
                    else:
                        yield subtitle(copy.copy(self.options), subtype, absurl)

        if "streamer" in jsondata["streaming_config"]:
            basedomain = jsondata["streaming_config"]["streamer"]["redirect"]
        else:
            url = jsondata["streaming_config"]["loadbalancer"]
            if url[:1] == "/":
                url = "https:{}".format(url)
            lbjson = self.http.request("get", url).text
            lbjson = json.loads(lbjson)
            basedomain = lbjson["redirect"]
        http = "https://{0}/{1}".format(basedomain, jsondata["file_http"])
        hd = None
        if len(jsondata["file_http_hd"]) > 0:
            http_hd = "https://{0}/{1}".format(basedomain, jsondata["file_http_hd"])
            hls_hd = "{0}{1}".format(http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])
            hd = True
        hls = "{0}{1}".format(http, jsondata["streaming_config"]["http_streaming"]["hls_file"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        for n in list(streams.keys()):
            yield streams[n]
        if hd:
            streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)
            for n in list(streams.keys()):
                yield streams[n]
Beispiel #15
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)
        
        if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
            match = self.url
        else:
            match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
            if match:
                match = match.group(1)
            
        if match is None:
            videos = []
            match = re.search("__svtplay'] = ({.*});", self.get_urldata())
            if re.search("sista-chansen", parse.path):
                videos = self._last_chance(videos, 1)
            elif not match:
                log.error("Couldn't retrieve episode list")
                return
            else:
                dataj = json.loads(match.group(1))
                if re.search("/genre", parse.path):
                    videos = self._genre(dataj)
                else:
                    items = dataj["videoTitlePage"]["realatedVideosTabs"]
                    for i in items:
                        if "sasong" in i["slug"]:
                            for n in i["videos"]:
                                if n["url"] not in videos:
                                    videos.append(n["url"])
                        if "senast" in i["slug"]:
                            for n in i["videos"]:
                                if n["url"] not in videos:
                                    videos.append(n["url"])

            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]
        else:
            data = self.http.request("get", match).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)
Beispiel #16
0
    def get(self):
        data = self.get_urldata()
        match = re.search(r"urPlayer.init\((.*)\);", data)
        if not match:
            yield ServiceError("Can't find json info")
            return

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        data = match.group(1)
        jsondata = json.loads(data)
        if len(jsondata["subtitles"]) > 0:
            for sub in jsondata["subtitles"]:
                if "label" in sub:
                    absurl = urljoin(self.url, sub["file"].split(",")[0])
                    if absurl.endswith("vtt"):
                        subtype = "wrst"
                    else:
                        subtype = "tt"
                    if self.options.get_all_subtitles:
                        yield subtitle(copy.copy(self.options), subtype, absurl, "-" + filenamify(sub["label"]))
                    else:
                        yield subtitle(copy.copy(self.options), subtype, absurl)

        if "streamer" in jsondata["streaming_config"]:
            basedomain = jsondata["streaming_config"]["streamer"]["redirect"]
        else:
            url = jsondata["streaming_config"]["loadbalancer"]
            if url[:1] == "/":
                url = "https:{}".format(url)
            lbjson = self.http.request("get", url).text
            lbjson = json.loads(lbjson)
            basedomain = lbjson["redirect"]
        http = "https://{0}/{1}".format(basedomain, jsondata["file_http"])
        hd = None
        if len(jsondata["file_http_hd"]) > 0:
            http_hd = "https://{0}/{1}".format(basedomain, jsondata["file_http_hd"])
            hls_hd = "{0}{1}".format(http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])
            hd = True
        hls = "{0}{1}".format(http, jsondata["streaming_config"]["http_streaming"]["hls_file"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        for n in list(streams.keys()):
            yield streams[n]
        if hd:
            streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)
            for n in list(streams.keys()):
                yield streams[n]
Beispiel #17
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)
        
        if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
            match = self.url
        else:
            match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
            if match:
                match = match.group(1)
            
        if match is None:
            videos = []
            match = re.search("__svtplay'] = ({.*});", self.get_urldata())
            if re.search("sista-chansen", parse.path):
                videos = self._last_chance(videos, 1)
            elif not match:
                log.error("Couldn't retrieve episode list")
                return
            else:
                dataj = json.loads(match.group(1))
                if re.search("/genre", parse.path):
                    videos = self._genre(dataj)
                else:
                    items = dataj["videoTitlePage"]["realatedVideosTabs"]
                    for i in items:
                        if "sasong" in i["slug"]:
                            for n in i["videos"]:
                                if n["url"] not in videos:
                                    videos.append(n["url"])
                        if "senast" in i["slug"]:
                            for n in i["videos"]:
                                if n["url"] not in videos:
                                    videos.append(n["url"])

            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]
        else:
            data = self.http.request("get", match).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)
Beispiel #18
0
    def find_all_episodes(self, options):
        parse = urlparse(self.url)
        episodes = []

        if parse.netloc == "urskola.se":
            data = self.get_urldata()
            match = re.search('data-limit="[^"]+" href="([^"]+)"', data)
            if match:
                res = self.http.get(urljoin("https://urskola.se", match.group(1)))
                data = res.text
            tags = re.findall('<a class="puff program tv video" title="[^"]+" href="([^"]+)"', data)
            for i in tags:
                url = urljoin("https://urskola.se/", i)
                if url not in episodes:
                    episodes.append(url)
        else:
            match = re.search("/program/\d+-(\w+)-", parse.path)
            if not match:
                log.error("Can't find any videos")
                return None
            keyword = match.group(1)
            all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata())
            for i in all_links:
                match = re.search("/program/\d+-(\w+)-", i)
                if match and match.group(1) == keyword:
                    episodes.append(urljoin("https://urplay.se/", i))

        episodes_new = []
        n = 0
        for i in episodes:
            if n == options.all_last:
                break
            if i not in episodes_new:
                episodes_new.append(i)
            n += 1
        return episodes_new
Beispiel #19
0
 def get(self):
     if self.exclude():
         yield ServiceError("Excluding video")
         return
     match = re.search("var initialMedia\s+= ({[^;]+);", self.get_urldata())
     if not match:
         yield ServiceError("Cant find any media on that page")
         return
     janson = json.loads(match.group(1))
     vid = janson["content_id"]
     if not janson["metaData"]:
         yield ServiceError("Can't find video on that page")
         return
     if "playbacks" in janson["metaData"]:
         for i in janson["metaData"]["playbacks"]:
             if "CLOUD" in i["name"]:
                 streams = hlsparse(self.options,
                                    self.http.request("get", i["url"]),
                                    i["url"])
                 if streams:
                     for n in list(streams.keys()):
                         yield streams[n]
     else:
         match = re.search("var mediaConfig\s+= ({[^;]+);",
                           self.get_urldata())
         if not match:
             yield ServiceError("Cant find any media on that page")
             return
         janson = json.loads(match.group(1))
         try:
             apiurl = janson["vpm"]["mediaFramework"][
                 "mediaFrameworkDomain"]
         except KeyError:
             yield ServiceError("Can't find api url")
             return
         filename = "{0}?contentId={1}&playbackScenario=HTTP_CLOUD_WIRED_WEB&format=json&platform=WEB_MEDIAPLAYER&_=1487455224334".format(
             janson["vpm"]["mediaFramework"]["mediaFrameworkEndPoint"], vid)
         url = urljoin(apiurl, filename)
         res = self.http.get(url)
         janson = res.json()
         for i in janson["user_verified_event"][0]["user_verified_content"][
                 0]["user_verified_media_item"]:
             streams = hlsparse(self.options,
                                self.http.request("get", i["url"]),
                                i["url"])
             if streams:
                 for n in list(streams.keys()):
                     yield streams[n]
Beispiel #20
0
    def find_all_episodes(self, options):
        episodes = []

        token, message = self._login()
        if not token:
            log.error(message)
            return
        res = self.http.get(self.url)
        tags = re.findall('<a class="card__link" href="([^"]+)"', res.text)
        for i in tags:
            url = urljoin("https://www.cmore.{}/".format(self._gettld()), i)
            if url not in episodes:
                episodes.append(url)

        if options.all_last > 0:
            return sorted(episodes[-options.all_last:])
        return sorted(episodes)
Beispiel #21
0
    def get(self):
        data = self.get_urldata()

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.search(
            'data-audio-type="publication" data-audio-id="(\d+)">',
            data)  # Nyheter
        if match:
            dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={0}&type={1}&quality=high&format=iis".format(
                match.group(1), "publication")
            data = self.http.request("get", dataurl).text
            playerinfo = json.loads(data)
            yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
            return
        match = re.search(r'href="(/topsy/ljudfil/\d+-mp3)"',
                          data)  # Ladda ner
        if match:
            yield HTTP(copy.copy(self.options),
                       urljoin("https://sverigesradio.se", match.group(1)),
                       128)
            return
        else:
            match = re.search(
                'data-audio-type="secondary" data-audio-id="(\d+)"',
                data)  # Ladda ner utan musik
            match2 = re.search(
                'data-audio-type="episode" data-audio-id="(\d+)"',
                data)  # Ladda ner med musik
            if match:
                aid = match.group(1)
                type = "secondary"
            elif match2:
                aid = match2.group(1)
                type = "episode"
            else:
                yield ServiceError("Can't find audio info")
                return

        dataurl = "https://sverigesradio.se/sida/playerajax/getaudiourl?id={0}&type={1}&quality=high&format=iis".format(
            aid, type)
        data = self.http.request("get", dataurl).text
        playerinfo = json.loads(data)
        yield HTTP(copy.copy(self.options), playerinfo["audioUrl"], 128)
Beispiel #22
0
    def find_all_episodes(self, options):
        videos = []
        match = re.search("__barnplay'] = ({.*});", self.get_urldata())
        if not match:
            log.error("Couldn't retrieve episode list.")
            return
        else:
            dataj = json.loads(match.group(1))
            dataj = dataj["context"]["dispatcher"]["stores"]["EpisodesStore"]
            showId = list(dataj["data"].keys())[0]
            items = dataj["data"][showId]["episodes"]
            for i in items:
                program = i
                videos = self.videos_to_list(program, videos)
            videos.reverse()

        episodes = [urljoin("http://www.svt.se", x) for x in videos]

        if options.all_last > 0:
            return episodes[-options.all_last:]
        return episodes
Beispiel #23
0
 def get(self):
     if self.exclude():
         yield ServiceError("Excluding video")
         return
     match = re.search("var initialMedia\s+= ({[^;]+);", self.get_urldata())
     if not match:
         yield ServiceError("Cant find any media on that page")
         return
     janson = json.loads(match.group(1))
     vid = janson["content_id"]
     if not janson["metaData"]:
         yield ServiceError("Can't find video on that page")
         return
     if "playbacks" in janson["metaData"]:
         for i in janson["metaData"]["playbacks"]:
             if "CLOUD" in i["name"]:
                 streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"])
                 if streams:
                     for n in list(streams.keys()):
                         yield streams[n]
     else:
         match = re.search("var mediaConfig\s+= ({[^;]+);", self.get_urldata())
         if not match:
             yield ServiceError("Cant find any media on that page")
             return
         janson = json.loads(match.group(1))
         try:
             apiurl = janson["vpm"]["mediaFramework"]["mediaFrameworkDomain"]
         except KeyError:
             yield ServiceError("Can't find api url")
             return
         filename = "{0}?contentId={1}&playbackScenario=HTTP_CLOUD_WIRED_WEB&format=json&platform=WEB_MEDIAPLAYER&_=1487455224334".format(janson["vpm"]["mediaFramework"]["mediaFrameworkEndPoint"], vid)
         url = urljoin(apiurl, filename)
         res = self.http.get(url)
         janson = res.json()
         for i in janson["user_verified_event"][0]["user_verified_content"][0]["user_verified_media_item"]:
             streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"])
             if streams:
                 for n in list(streams.keys()):
                     yield streams[n]
Beispiel #24
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)

        videos = []
        tab = None
        match = re.search("__svtplay'] = ({.*});", self.get_urldata())
        if re.search("sista-chansen", parse.path):
            videos = self._last_chance(videos, 1)
        elif not match:
            log.error("Couldn't retrieve episode list.")
            return
        else:
            dataj = json.loads(match.group(1))
            if re.search("/genre", parse.path):
                videos = self._genre(dataj)
            else:
                if parse.query:
                    match = re.search("tab=(.+)", parse.query)
                    if match:
                        tab = match.group(1)

                items = dataj["relatedVideoContent"]["relatedVideosAccordion"]
                for i in items:
                    if tab:
                        if i["slug"] == tab:
                            videos = self.videos_to_list(i["videos"], videos)
                    else:
                        if "klipp" not in i["slug"] and "kommande" not in i[
                                "slug"]:
                            videos = self.videos_to_list(i["videos"], videos)
                    if self.options.include_clips:
                        if i["slug"] == "klipp":
                            videos = self.videos_to_list(i["videos"], videos)

        episodes = [urljoin("http://www.svtplay.se", x) for x in videos]

        if options.all_last > 0:
            return episodes[-options.all_last:]
        return episodes
Beispiel #25
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)

        videos = []
        tab = None
        match = re.search("__svtplay'] = ({.*});", self.get_urldata())
        if re.search("sista-chansen", parse.path):
            videos = self._last_chance(videos, 1)
        elif not match:
            log.error("Couldn't retrieve episode list.")
            return
        else:
            dataj = json.loads(match.group(1))
            if re.search("/genre", parse.path):
                videos = self._genre(dataj)
            else:
                if parse.query:
                    query = parse_qs(parse.query)
                    if "tab" in query:
                        tab = query["tab"][0]

                if dataj["relatedVideoContent"]:
                    items = dataj["relatedVideoContent"]["relatedVideosAccordion"]
                    for i in items:
                        if tab:
                            if i["slug"] == tab:
                                videos = self.videos_to_list(i["videos"], videos)
                        else:
                            if "klipp" not in i["slug"] and "kommande" not in i["slug"]:
                                videos = self.videos_to_list(i["videos"], videos)
                        if self.options.include_clips:
                            if i["slug"] == "klipp":
                                videos = self.videos_to_list(i["videos"], videos)

        episodes = [urljoin("http://www.svtplay.se", x) for x in videos]

        if options.all_last > 0:
            return episodes[-options.all_last:]
        return episodes
Beispiel #26
0
    def find_all_episodes(self, options):
        parse = urlparse(self.url)
        match = re.search("/program/\d+-(\w+)-", parse.path)
        if not match:
            log.error("Can't find any videos")
            return None
        keyword = match.group(1)
        episodes = []
        all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata())
        for i in all_links:
            match = re.search("/program/\d+-(\w+)-", i)
            if match and match.group(1) == keyword:
                episodes.append(urljoin("http://urplay.se/", i))

        episodes_new = []
        n = 0
        for i in episodes:
            if n == options.all_last:
                break
            if i not in episodes_new:
                episodes_new.append(i)
            n += 1
        return episodes_new
Beispiel #27
0
    def find_all_episodes(self, options):
        match = re.search(
            r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
            self.get_urldata())
        if match is None:
            match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"',
                               self.get_urldata())
            if not match:
                log.error("Couldn't retrieve episode list")
                return
            episodes = [urljoin("http://www.svtplay.se", x) for x in match]
        else:
            data = self.http.request("get", match.group(1)).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)
Beispiel #28
0
    def get(self):
        data = self.http.get("https://www.sportlib.se/sportlib/login").text
        match = re.search('src="(/app[^"]+)">', data)
        if not match:
            yield ServiceError("Can't find url for login info")
            return

        url = urljoin("https://www.sportlib.se", match.group(1))
        data = self.http.get(url).text
        match = re.search('CLIENT_SECRET:"([^"]+)"', data)
        if not match:
            yield ServiceError("Cant fint login info")
            return
        cs = match.group(1)
        match = re.search('CLIENT_ID:"([^"]+)"', data)
        if not match:
            yield ServiceError("Cant fint login info")
            return
        cid = match.group(1)
        res = self.http.get("https://core.oz.com/channels?slug=sportlib&org=www.sportlib.se")
        janson = res.json()
        sid = janson["data"][0]["id"]

        data = {"client_id": cid, "client_secret": cs, "grant_type": "password",
                "username": self.options.username, "password": self.options.password}
        res = self.http.post("https://core.oz.com/oauth2/token?channelId={}".format(sid), data=data)
        if res.status_code > 200:
            yield ServiceError("Wrong username / password?")
            return
        janson = res.json()
        token_type = janson["token_type"].title()
        access_token = janson["access_token"]

        parse = urlparse(self.url)
        match = re.search("video/([-a-fA-F0-9]+)", parse.path)
        if not match:
            yield ServiceError("Cant find video id")
            return
        vid = match.group(1)

        headers = {"content-type": "application/json", "authorization": "{} {}".format(token_type, access_token)}
        url = "https://core.oz.com/channels/{}/videos/{}?include=collection,streamUrl".format(sid, vid)
        res = self.http.get(url, headers=headers)
        janson = res.json()
        cookiename = janson["data"]["streamUrl"]["cookieName"]
        token = janson["data"]["streamUrl"]["token"]
        hlsplaylist = janson["data"]["streamUrl"]["cdnUrl"]

        if self.options.output_auto:
            directory = os.path.dirname(self.options.output)
            title = filenamify(janson["data"]["title"])
            if len(directory):
                self.options.output = os.path.join(directory, title)
            else:
                self.options.output = title

        # get cookie
        postjson = {"name": cookiename, "value": token}
        res = self.http.post("https://playlist.oz.com/cookie", json=postjson)
        cookies = res.cookies
        streams = hlsparse(self.options, self.http.request("get", hlsplaylist), hlsplaylist, keycookie=cookies)
        if streams:
            for n in list(streams.keys()):
                yield streams[n]
Beispiel #29
0
    def get(self):
        parse = urlparse(self.url)
        if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se":
            if parse.path[:6] != "/video" and parse.path[:6] != "/klipp":
                yield ServiceError(
                    "This mode is not supported anymore. Need the url with the video."
                )
                return

        query = parse_qs(parse.query)
        self.access = None
        if "accessService" in query:
            self.access = query["accessService"]

        match = re.search("__svtplay'] = ({.*});", self.get_urldata())
        if not match:
            yield ServiceError("Can't find video info.")
            return
        janson = json.loads(match.group(1))["videoPage"]

        if "programTitle" not in janson["video"]:
            yield ServiceError("Can't find any video on that page.")
            return

        if self.access:
            for i in janson["video"]["versions"]:
                if i["accessService"] == self.access:
                    url = urljoin("http://www.svtplay.se", i["contentUrl"])
                    res = self.http.get(url)
                    match = re.search("__svtplay'] = ({.*});", res.text)
                    if not match:
                        yield ServiceError("Can't find video info.")
                        return
                    janson = json.loads(match.group(1))["videoPage"]

        if "live" in janson["video"]:
            self.options.live = janson["video"]["live"]

        if self.options.output_auto:
            self.options.service = "svtplay"
            self.options.output = self.outputfilename(janson["video"],
                                                      self.options.output)

        if self.exclude():
            yield ServiceError("Excluding video.")
            return

        if "programVersionId" in janson["video"]:
            vid = janson["video"]["programVersionId"]
        else:
            vid = janson["video"]["id"]
        res = self.http.get(
            "http://api.svt.se/videoplayer-api/video/{0}".format(vid))
        try:
            janson = res.json()
        except json.decoder.JSONDecodeError:
            yield ServiceError("Can't decode api request: {0}".format(
                res.request.url))
            return
        videos = self._get_video(janson)
        for i in videos:
            yield i
Beispiel #30
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)

        if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
            rss_url = self.url
        else:
            rss_url = re.search(
                r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                self.get_urldata())
            if rss_url:
                rss_url = rss_url.group(1)

        valid_rss = False
        tab = None
        if parse.query:
            match = re.search("tab=(.+)", parse.query)
            if match:
                tab = match.group(1)

        #Clips and tab can not be used with RSS-feed
        if rss_url and not self.options.include_clips and not tab:
            rss_data = self.http.request("get", rss_url).content

            try:
                xml = ET.XML(rss_data)
                episodes = [x.text for x in xml.findall(".//item/link")]
                #TODO add better checks for valid RSS-feed here
                valid_rss = True
            except ET.ParseError:
                log.info(
                    "Error parsing RSS-feed at %s, make sure it is a valid RSS-feed, will use other method to find episodes."
                    % rss_url)
        else:
            #if either tab or include_clips is set remove rss.xml from url if set manually.
            if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
                self._url = self.url.replace("rss.xml", "")

        if not valid_rss:
            videos = []
            tab = None
            match = re.search("__svtplay'] = ({.*});", self.get_urldata())
            if re.search("sista-chansen", parse.path):
                videos = self._last_chance(videos, 1)
            elif not match:
                log.error("Couldn't retrieve episode list.")
                return
            else:
                dataj = json.loads(match.group(1))
                if re.search("/genre", parse.path):
                    videos = self._genre(dataj)
                else:
                    if parse.query:
                        match = re.search("tab=(.+)", parse.query)
                        if match:
                            tab = match.group(1)

                    items = dataj["relatedVideoContent"][
                        "relatedVideosAccordion"]
                    for i in items:
                        if tab:
                            if i["slug"] == tab:
                                videos = self.videos_to_list(
                                    i["videos"], videos)
                        else:
                            if "klipp" not in i[
                                    "slug"] and "kommande" not in i["slug"]:
                                videos = self.videos_to_list(
                                    i["videos"], videos)
                        if self.options.include_clips:
                            if i["slug"] == "klipp":
                                videos = self.videos_to_list(
                                    i["videos"], videos)

            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]

        if options.all_last > 0:
            return sorted(episodes[-options.all_last:])
        return sorted(episodes)
Beispiel #31
0
    def get(self):
        parse = urlparse(self.url)
        if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se":
            if parse.path[:6] != "/video" and parse.path[:6] != "/klipp":
                yield ServiceError(
                    "This mode is not supported anymore. need the url with the video"
                )
                return

        query = parse_qs(parse.query)
        self.access = None
        if "accessService" in query:
            self.access = query["accessService"]

        match = re.search("__svtplay'] = ({.*});", self.get_urldata())
        if not match:
            yield ServiceError("Cant find video info.")
            return
        janson = json.loads(match.group(1))["videoTitlePage"]

        if "programTitle" not in janson["video"]:
            yield ServiceError("Can't find any video on that page")
            return

        if self.access:
            for i in janson["video"]["versions"]:
                if i["accessService"] == self.access:
                    url = urljoin("http://www.svtplay.se", i["contentUrl"])
                    res = self.http.get(url)
                    match = re.search("__svtplay'] = ({.*});", res.text)
                    if not match:
                        yield ServiceError("Cant find video info.")
                        return
                    janson = json.loads(match.group(1))["videoTitlePage"]

        if "live" in janson["video"]:
            self.options.live = janson["video"]["live"]

        if self.options.output_auto:
            self.options.service = "svtplay"
            self.options.output = self.outputfilename(janson["video"],
                                                      self.options.output)

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        if "programVersionId" in janson["video"]:
            vid = janson["video"]["programVersionId"]
        else:
            vid = janson["video"]["id"]
        res = self.http.get(
            "http://api.svt.se/videoplayer-api/video/{0}".format(vid))
        janson = res.json()
        if "live" in janson:
            self.options.live = janson["live"]
        if "subtitleReferences" in janson:
            for i in janson["subtitleReferences"]:
                if i["format"] == "websrt" and "url" in i:
                    yield subtitle(copy.copy(self.options), "wrst", i["url"])

        if "videoReferences" in janson:
            if len(janson["videoReferences"]) == 0:
                yield ServiceError(
                    "Media doesn't have any associated videos (yet?)")
                return

            for i in janson["videoReferences"]:
                parse = urlparse(i["url"])
                query = parse_qs(parse.query)
                if i["format"] == "hls":
                    streams = hlsparse(self.options,
                                       self.http.request("get", i["url"]),
                                       i["url"])
                    if streams:
                        for n in list(streams.keys()):
                            yield streams[n]
                    if "alt" in query and len(query["alt"]) > 0:
                        alt = self.http.get(query["alt"][0])
                        if alt:
                            streams = hlsparse(
                                self.options,
                                self.http.request("get", alt.request.url),
                                alt.request.url)
                            if streams:
                                for n in list(streams.keys()):
                                    yield streams[n]
                if i["format"] == "hds":
                    match = re.search(r"\/se\/secure\/", i["url"])
                    if not match:
                        streams = hdsparse(
                            self.options,
                            self.http.request("get",
                                              i["url"],
                                              params={"hdcore": "3.7.0"}),
                            i["url"])
                        if streams:
                            for n in list(streams.keys()):
                                yield streams[n]
                        if "alt" in query and len(query["alt"]) > 0:
                            alt = self.http.get(query["alt"][0])
                            if alt:
                                streams = hdsparse(
                                    self.options,
                                    self.http.request(
                                        "get",
                                        alt.request.url,
                                        params={"hdcore": "3.7.0"}),
                                    alt.request.url)
                                if streams:
                                    for n in list(streams.keys()):
                                        yield streams[n]
                if i["format"] == "dash264" or i["format"] == "dashhbbtv":
                    streams = dashparse(self.options,
                                        self.http.request("get", i["url"]),
                                        i["url"])
                    if streams:
                        for n in list(streams.keys()):
                            yield streams[n]

                    if "alt" in query and len(query["alt"]) > 0:
                        alt = self.http.get(query["alt"][0])
                        if alt:
                            streams = dashparse(
                                self.options,
                                self.http.request("get", alt.request.url),
                                alt.request.url)
                            if streams:
                                for n in list(streams.keys()):
                                    yield streams[n]
Beispiel #32
0
def parsesegments(content, url):
    media = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
    if media is not None:
        scheme = media.attrib["media"]
    vinit = content[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
    if vinit is not None:
        init = vinit.attrib["initialization"]
    nrofvideos = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
    selemtns = content[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
    total = 0
    if nrofvideos:
        total = int(nrofvideos[0].attrib["r"]) + len(selemtns) + 1
        time = False
    else:
        time = []
        time.append(0)
        for i in selemtns:
            time.append(int(i.attrib["d"]))
    elements = content[0].findall(
        ".//{urn:mpeg:dash:schema:mpd:2011}Representation")
    files = {}
    for i in elements:
        id = i.attrib["id"]
        segments = []
        bitrate = int(i.attrib["bandwidth"])
        if vinit is None:
            init = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"
                          ).attrib["initialization"]
        vidinit = init.replace("$RepresentationID$", id)
        if media is None:
            scheme = i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"
                            ).attrib["media"]
        if "startNumber" in content[0].findall(
                ".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")[0].attrib:
            start = int(content[0].findall(
                ".//{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
                        [0].attrib["startNumber"])
        else:
            start = 1
        dirname = os.path.dirname(url) + "/"
        segments.append(urljoin(dirname, vidinit))
        name = scheme.replace("$RepresentationID$", id)
        if "$Number" in name:
            match = re.search("\$Number(\%\d+)d\$", name)
            if match:
                vname = name.replace("$Number", "").replace("$", "")
                for n in range(start, start + total):
                    segments.append(urljoin(dirname, vname % n))
            else:
                #not format string
                for n in range(start, start + total):
                    newname = name.replace("$Number$", str(n))
                    segments.append(urljoin(dirname, newname))
        if "$Time$" in name:
            match = re.search("\$Time\$", name)
            if match:
                number = 0
                for n in time:
                    number += int(n)
                    new = name.replace("$Time$", str(number))
                    segments.append(urljoin(dirname, new))
        files[bitrate] = segments
    return files
Beispiel #33
0
def dashparse(options, res, url):
    streams = {}

    if not res:
        return None

    if res.status_code >= 400:
        streams[0] = ServiceError("Can't read DASH playlist. {0}".format(
            res.status_code))
        return streams
    xml = ET.XML(res.text)
    if "isoff-on-demand" in xml.attrib["profiles"]:
        try:
            baseurl = urljoin(
                url,
                xml.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
        except AttributeError:
            streams[0] = ServiceError("Can't parse DASH playlist")
            return
        videofiles = xml.findall(
            ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']/{urn:mpeg:dash:schema:mpd:2011}Representation"
        )
        audiofiles = xml.findall(
            ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']/{urn:mpeg:dash:schema:mpd:2011}Representation"
        )
        for i in audiofiles:
            audiourl = urljoin(
                baseurl,
                i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
            audiobitrate = float(i.attrib["bandwidth"]) / 1000
            for n in videofiles:
                bitrate = float(n.attrib["bandwidth"]) / 1000 + audiobitrate
                videourl = urljoin(
                    baseurl,
                    n.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
                options.other = "mp4"
                streams[int(bitrate)] = DASH(copy.copy(options),
                                             videourl,
                                             bitrate,
                                             cookies=res.cookies,
                                             audio=audiourl)
    if "isoff-live" in xml.attrib["profiles"]:
        video = xml.findall(
            ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='video']"
        )
        if len(video) == 0:
            video = xml.findall(
                ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='video/mp4']"
            )
        audio = xml.findall(
            ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType='audio']"
        )
        if len(audio) == 0:
            audio = xml.findall(
                ".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType='audio/mp4']"
            )
        videofiles = parsesegments(video, url)
        audiofiles = parsesegments(audio, url)
        for i in videofiles.keys():
            bitrate = (int(i) + int(list(audiofiles.keys())[0])) / 1000
            options.other = "mp4"
            streams[int(bitrate)] = DASH(copy.copy(options),
                                         url,
                                         bitrate,
                                         cookies=res.cookies,
                                         audio=audiofiles[list(
                                             audiofiles.keys())[0]],
                                         files=videofiles[i])

    return streams
Beispiel #34
0
    def get(self):
        data = self.http.get("https://www.sportlib.se/sportlib/login").text
        match = re.search('src="(/app[^"]+)">', data)
        if not match:
            yield ServiceError("Can't find url for login info")
            return

        url = urljoin("https://www.sportlib.se", match.group(1))
        data = self.http.get(url).text
        match = re.search('CLIENT_SECRET:"([^"]+)"', data)
        if not match:
            yield ServiceError("Cant fint login info")
            return
        cs = match.group(1)
        match = re.search('CLIENT_ID:"([^"]+)"', data)
        if not match:
            yield ServiceError("Cant fint login info")
            return
        cid = match.group(1)
        res = self.http.get(
            "https://core.oz.com/channels?slug=sportlib&org=www.sportlib.se")
        janson = res.json()
        sid = janson["data"][0]["id"]

        data = {
            "client_id": cid,
            "client_secret": cs,
            "grant_type": "password",
            "username": self.options.username,
            "password": self.options.password
        }
        res = self.http.post(
            "https://core.oz.com/oauth2/token?channelId={}".format(sid),
            data=data)
        if res.status_code > 200:
            yield ServiceError("Wrong username / password?")
            return
        janson = res.json()
        token_type = janson["token_type"].title()
        access_token = janson["access_token"]

        parse = urlparse(self.url)
        match = re.search("video/([-a-fA-F0-9]+)", parse.path)
        if not match:
            yield ServiceError("Cant find video id")
            return
        vid = match.group(1)

        headers = {
            "content-type": "application/json",
            "authorization": "{} {}".format(token_type, access_token)
        }
        url = "https://core.oz.com/channels/{}/videos/{}?include=collection,streamUrl".format(
            sid, vid)
        res = self.http.get(url, headers=headers)
        janson = res.json()
        cookiename = janson["data"]["streamUrl"]["cookieName"]
        token = janson["data"]["streamUrl"]["token"]
        hlsplaylist = janson["data"]["streamUrl"]["cdnUrl"]

        if self.options.output_auto:
            directory = os.path.dirname(self.options.output)
            title = filenamify(janson["data"]["title"])
            if len(directory):
                self.options.output = os.path.join(directory, title)
            else:
                self.options.output = title

        # get cookie
        postjson = {"name": cookiename, "value": token}
        res = self.http.post("https://playlist.oz.com/cookie", json=postjson)
        cookies = res.cookies
        streams = hlsparse(self.options,
                           self.http.request("get", hlsplaylist),
                           hlsplaylist,
                           keycookie=cookies)
        if streams:
            for n in list(streams.keys()):
                yield streams[n]
Beispiel #35
0
    def find_all_episodes(self, options):
        parse = urlparse(self._url)
        
        if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":
            rss_url = self.url
        else:
            rss_url = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"', self.get_urldata())
            if rss_url: 
                rss_url = rss_url.group(1)

        valid_rss = False
        tab = None
        if parse.query:
            match = re.search("tab=(.+)", parse.query)
            if match:
                tab = match.group(1)

        #Clips and tab can not be used with RSS-feed
        if rss_url and not self.options.include_clips and not tab:
            rss_data = self.http.request("get", rss_url).content

            try:
                xml = ET.XML(rss_data)
                episodes = [x.text for x in xml.findall(".//item/link")]
                #TODO add better checks for valid RSS-feed here
                valid_rss = True
            except ET.ParseError:
                log.info("Error parsing RSS-feed at {0}, make sure it is a valid RSS-feed, will use other method to find episodes.".format(rss_url))
        else:
            #if either tab or include_clips is set remove rss.xml from url if set manually. 
            if len(parse.path) > 7 and parse.path[-7:] == "rss.xml":                
                self._url = self.url.replace("rss.xml","")

        if not valid_rss:
            videos = []
            tab = None
            match = re.search("__svtplay'] = ({.*});", self.get_urldata())
            if re.search("sista-chansen", parse.path):
                videos = self._last_chance(videos, 1)
            elif not match:
                log.error("Couldn't retrieve episode list.")
                return
            else:
                dataj = json.loads(match.group(1))
                if re.search("/genre", parse.path):
                    videos = self._genre(dataj)
                else:
                    if parse.query:
                        match = re.search("tab=(.+)", parse.query)
                        if match:
                            tab = match.group(1)
                            
                    items = dataj["relatedVideoContent"]["relatedVideosAccordion"]
                    for i in items:
                        if tab:
                            if i["slug"] == tab:
                                videos = self.videos_to_list(i["videos"], videos)
                        else:
                            if "klipp" not in i["slug"] and "kommande" not in i["slug"]:
                                videos = self.videos_to_list(i["videos"], videos)
                        if self.options.include_clips: 
                            if i["slug"] == "klipp":
                                videos = self.videos_to_list(i["videos"], videos)

            episodes = [urljoin("http://www.svtplay.se", x) for x in videos]

        if options.all_last > 0:
            return sorted(episodes)[-options.all_last:]
        return sorted(episodes)
Beispiel #36
0
def templateelemt(element, filename, idnumber, offset_sec, duration_sec):
    files = []
    timescale = 1
    duration = 1
    total = 1

    init = element.attrib["initialization"]
    media = element.attrib["media"]
    if "startNumber" in element.attrib:
        start = int(element.attrib["startNumber"])
    else:
        start = 1

    if "timescale" in element.attrib:
        timescale = float(element.attrib["timescale"])

    if "duration" in element.attrib:
        duration = float(element.attrib["duration"])

    if offset_sec is not None and duration_sec is None:
        start += int(offset_sec / (duration / timescale))

    if duration_sec is not None:
        total = int(duration_sec / (duration / timescale))

    selements = None
    rvalue = None
    timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline")
    if timeline is not None:

        rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
        selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
        selements.pop()

        if rvalue:
            total = int(rvalue[0].attrib["r"]) + len(selements) + 1

    name = media.replace("$RepresentationID$", idnumber)
    files.append(
        urljoin(filename, init.replace("$RepresentationID$", idnumber)))

    if "$Time$" in media:
        time = [0]
        for n in selements:
            time.append(int(n.attrib["d"]))
        match = re.search("\$Time\$", name)
        if match:
            if len(selements) < 3:
                for n in range(start, start + total):
                    new = name.replace("$Time$",
                                       str(n * int(rvalue[0].attrib["d"])))
                    files.append(urljoin(filename, new))
            else:
                number = 0
                for n in time:
                    number += n
                    new = name.replace("$Time$", str(number))
                    files.append(urljoin(filename, new))
    if "$Number" in name:
        if re.search("\$Number(\%\d+)d\$", name):
            vname = name.replace("$Number", "").replace("$", "")
            for n in range(start, start + total):
                files.append(urljoin(filename, vname % n))
        else:
            for n in range(start, start + total):
                newname = name.replace("$Number$", str(n))
                files.append(urljoin(filename, newname))
    return files
Beispiel #37
0
def templateelemt(element, filename, idnumber, offset_sec, duration_sec):
    files = []
    timescale = 1
    duration = 1
    total = 1

    init = element.attrib["initialization"]
    media = element.attrib["media"]
    if "startNumber" in element.attrib:
        start = int(element.attrib["startNumber"])
    else:
        start = 1

    if "timescale" in element.attrib:
        timescale = float(element.attrib["timescale"])

    if "duration" in element.attrib:
        duration = float(element.attrib["duration"])

    if offset_sec is not None and duration_sec is None:
        start += int(offset_sec / ( duration / timescale ))

    if duration_sec is not None:
        total = int(duration_sec / ( duration / timescale ))

    selements = None
    rvalue = None
    timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline")
    if timeline is not None:

        rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
        selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
        selements.pop()

        if rvalue:
            total = int(rvalue[0].attrib["r"]) + len(selements) + 1

    name = media.replace("$RepresentationID$", idnumber)
    files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber)))

    if "$Time$" in media:
        time = [0]
        for n in selements:
            time.append(int(n.attrib["d"]))
        match = re.search("\$Time\$", name)
        if rvalue and match and len(selements) < 3:
            for n in range(start, start + total):
                new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"])))
                files.append(urljoin(filename, new))
        else:
            number = 0
            for n in time:
                number += n
                new = name.replace("$Time$", str(number))
                files.append(urljoin(filename, new))
    if "$Number" in name:
        if re.search("\$Number(\%\d+)d\$", name):
            vname = name.replace("$Number", "").replace("$", "")
            for n in range(start, start + total):
                files.append(urljoin(filename, vname % n))
        else:
            for n in range(start, start + total):
                newname = name.replace("$Number$", str(n))
                files.append(urljoin(filename, newname))
    return files