Beispiel #1
0
def _extract_dash(dashurl):
    """ Download dash url and extract some data. """
    # pylint: disable = R0914
    dbg("Fetching dash page")
    dashdata = fetch_decode(dashurl)
    dbg("DASH list fetched")
    ns = "{urn:mpeg:DASH:schema:MPD:2011}"
    ytns = "{http://youtube.com/yt/2012/10/10}"
    tree = ElementTree.fromstring(dashdata)
    tlist = tree.findall(".//%sRepresentation" % ns)
    dashmap = []

    for x in tlist:
        baseurl = x.find("%sBaseURL" % ns)
        url = baseurl.text
        size = baseurl.get("%scontentLength" % ytns)
        bitrate = x.get("bandwidth")
        itag = uni(x.get("id"))
        width = uni(x.get("width"))
        height = uni(x.get("height"))
        dashmap.append(
            dict(bitrate=bitrate,
                 dash=True,
                 itag=itag,
                 width=width,
                 height=height,
                 url=url,
                 size=size))
    return dashmap
Beispiel #2
0
def fetch_cached(url, callback, encoding=None, dbg_ref="", file_prefix=""):
    """ Fetch url - from tmpdir if already retrieved. """
    tmpdir = os.path.join(tempfile.gettempdir(), "pafy")

    if not os.path.exists(tmpdir):
        os.makedirs(tmpdir)

    url_md5 = hashlib.md5(url.encode("utf8")).hexdigest()
    cached_filename = os.path.join(tmpdir, file_prefix + url_md5)

    if os.path.exists(cached_filename):
        dbg("fetched %s from cache", dbg_ref)

        with open(cached_filename) as f:
            retval = f.read()

        return retval

    else:
        data = fetch_decode(url, "utf8")  # unicode
        dbg("Fetched %s", dbg_ref)
        if callback:
            callback("Fetched %s" % dbg_ref)

        with open(cached_filename, "w") as f:
            f.write(data)

        # prune files after write
        prune_files(tmpdir, file_prefix)
        return data
Beispiel #3
0
def get_video_info(video_id, callback, newurl=None):
    """ Return info for video_id.  Returns dict. """
    # TODO: see if there is a way to avoid retrieving the embed page
    #       just for this, or to use it for more. This was coppied from
    #       youtube-dl.
    embed_webpage = fetch_decode(g.urls['embed'])
    sts = re.search(r'sts"\s*:\s*(\d+)', embed_webpage).group(1)

    url = g.urls['vidinfo'] % (video_id, video_id, sts)
    url = newurl if newurl else url
    info = fetch_decode(url)  # bytes
    info = parseqs(info)  # unicode dict
    dbg("Fetched video info%s", " (age ver)" if newurl else "")

    if info['status'][0] == "fail":
        reason = info['reason'][0] or "Bad video argument"
        raise IOError("Youtube says: %s [%s]" % (reason, video_id))

    return info
Beispiel #4
0
def get_playlist(playlist_url,
                 basic=False,
                 gdata=False,
                 size=False,
                 callback=None):
    """ Return a dict containing Pafy objects from a YouTube Playlist.
    The returned Pafy objects are initialised using the arguments to
    get_playlist() in the manner documented for pafy.new()
    """

    playlist_id = extract_playlist_id(playlist_url)

    if not playlist_id:
        return None

    url = g.urls["playlist"] % playlist_id

    allinfo = fetch_decode(url)  # unicode
    allinfo = json.loads(allinfo)

    # playlist specific metadata
    playlist = dict(playlist_id=playlist_id,
                    likes=allinfo.get('likes'),
                    title=allinfo.get('title'),
                    author=allinfo.get('author'),
                    dislikes=allinfo.get('dislikes'),
                    description=allinfo.get('description'),
                    items=[])

    # playlist items specific metadata
    for v in allinfo['video']:

        vid_data = dict(added=v.get('added'),
                        is_cc=v.get('is_cc'),
                        is_hd=v.get('is_hd'),
                        likes=v.get('likes'),
                        title=v.get('title'),
                        views=v.get('views'),
                        rating=v.get('rating'),
                        author=v.get('author'),
                        user_id=v.get('user_id'),
                        privacy=v.get('privacy'),
                        start=v.get('start', 0.0),
                        dislikes=v.get('dislikes'),
                        duration=v.get('duration'),
                        comments=v.get('comments'),
                        keywords=v.get('keywords'),
                        thumbnail=v.get('thumbnail'),
                        cc_license=v.get('cc_license'),
                        category_id=v.get('category_id'),
                        description=v.get('description'),
                        encrypted_id=v.get('encrypted_id'),
                        time_created=v.get('time_created'),
                        time_updated=v.get('time_updated'),
                        length_seconds=v.get('length_seconds'),
                        end=v.get('end', v.get('length_seconds')))

        try:
            pafy_obj = new(vid_data['encrypted_id'],
                           basic=basic,
                           gdata=gdata,
                           size=size,
                           callback=callback)

        except IOError as e:
            if callback:
                callback("%s: %s" % (v['title'], e.message))
            continue

        pafy_obj.populate_from_playlist(vid_data)
        playlist['items'].append(dict(pafy=pafy_obj, playlist_meta=vid_data))
        if callback:
            callback("Added video: %s" % v['title'])

    return playlist
Beispiel #5
0
    def _fetch_basic(self):
        """ Fetch basic data and streams. """
        if self._have_basic:
            return

        allinfo = get_video_info(self.videoid, self.callback)

        if self.callback:
            self.callback("Fetched video info")

        def _get_lst(key, default="unknown", dic=allinfo):
            """ Dict get function, returns first index. """
            retval = dic.get(key, default)
            return retval[0] if retval != default else default

        self._title = _get_lst('title')
        self._dashurl = _get_lst('dashmpd')
        self._author = _get_lst('author')
        self._rating = float(_get_lst('avg_rating', 0.0))
        self._length = int(_get_lst('length_seconds', 0))
        self._viewcount = int(_get_lst('view_count'), 0)
        self._thumb = unquote_plus(_get_lst('thumbnail_url', ""))
        self._formats = [x.split("/") for x in _get_lst('fmt_list').split(",")]
        self._keywords = _get_lst('keywords', "").split(',')
        self._bigthumb = _get_lst('iurlsd', "")
        self._bigthumbhd = _get_lst('iurlsdmaxres', "")
        self.ciphertag = _get_lst("use_cipher_signature") == "True"
        self.sm = _extract_smap(g.UEFSM, allinfo, True)
        self.asm = _extract_smap(g.AF, allinfo, True)
        dbg("extracted stream maps")

        sm_ciphertag = "s" in self.sm[0]

        if self.ciphertag != sm_ciphertag:
            dbg("ciphertag mismatch")
            self.ciphertag = not self.ciphertag

        watch_url = g.urls['watchv'] % self.videoid
        if self.callback:
            self.callback("Fetching watch page")
        watchinfo = fetch_decode(watch_url)  # unicode
        dbg("Fetched watch page")
        if self.callback:
            self.callback("Fetched watch page")
        self.age_ver = re.search(r'player-age-gate-content">',
                                 watchinfo) is not None

        if self.ciphertag:
            dbg("Encrypted signature detected.")

            if not self.age_ver:
                smaps, js_url, mainfunc = get_js_sm(watchinfo, self.callback)
                funcmap[js_url] = mainfunc
                self.sm, self.asm = smaps
                self.js_url = js_url
                dashsig = re.search(r"/s/([\w\.]+)", self._dashurl).group(1)
                dbg("decrypting dash sig")
                goodsig = _decodesig(dashsig, js_url, self.callback)
                self._dashurl = re.sub(r"/s/[\w\.]+",
                                       "/signature/%s" % goodsig,
                                       self._dashurl)

            else:
                s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1)
                s = s[2:63] + s[82] + s[64:82] + s[63]
                self._dashurl = re.sub(r"/s/[\w\.]+", "/signature/%s" % s,
                                       self._dashurl)

        if self._dashurl != 'unknown':
            self.dash = _extract_dash(self._dashurl)
        self._have_basic = 1
        self._process_streams()
        self.expiry = time.time() + g.lifespan