Example #1
0
def extract_videos(video_id):
    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D"
    }

    url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \
          (video_id, video_id)
    data = httptools.downloadpage(url).data

    video_urls = []
    params = dict(urlparse.parse_qsl(data))
    if params.get('hlsvp'):
        video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']])
        return video_urls

    if config.is_xbmc():
        import xbmc
        xbmc_version = int(
            xbmc.getInfoLabel("System.BuildVersion").split(".", 1)[0])
        if xbmc_version > 16 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \
                             and params.get('dashmpd'):
            if params.get('use_cipher_signature', '') != 'True':
                video_urls.append(
                    ['mpd  HD [youtube]', params['dashmpd'], 0, '', True])

    js_signature = ""
    youtube_page_data = httptools.downloadpage(
        "http://www.youtube.com/watch?v=%s" % video_id).data
    params = extract_flashvars(youtube_page_data)
    if params.get('url_encoded_fmt_stream_map'):
        data_flashvars = params["url_encoded_fmt_stream_map"].split(",")
        for url_desc in data_flashvars:
            url_desc_map = dict(urlparse.parse_qsl(url_desc))
            if not url_desc_map.get("url") and not url_desc_map.get("stream"):
                continue

            try:
                key = int(url_desc_map["itag"])
                if not fmt_value.get(key):
                    continue

                if url_desc_map.get("url"):
                    url = urllib.unquote(url_desc_map["url"])
                elif url_desc_map.get("conn") and url_desc_map.get("stream"):
                    url = urllib.unquote(url_desc_map["conn"])
                    if url.rfind("/") < len(url) - 1:
                        url += "/"
                    url += urllib.unquote(url_desc_map["stream"])
                elif url_desc_map.get(
                        "stream") and not url_desc_map.get("conn"):
                    url = urllib.unquote(url_desc_map["stream"])

                if url_desc_map.get("sig"):
                    url += "&signature=" + url_desc_map["sig"]
                elif url_desc_map.get("s"):
                    sig = url_desc_map["s"]
                    if not js_signature:
                        urljs = scrapertools.find_single_match(
                            youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
                        urljs = urljs.replace("\\", "")
                        if urljs:
                            if not re.search(r'https?://', urljs):
                                urljs = urlparse.urljoin(
                                    "https://www.youtube.com", urljs)
                            data_js = httptools.downloadpage(urljs).data
                            from jsinterpreter import JSInterpreter
                            funcname = scrapertools.find_single_match(
                                data_js, '\.sig\|\|([A-z0-9$]+)\(')
                            if not funcname:
                                funcname = scrapertools.find_single_match(
                                    data_js, '["\']signature["\']\s*,\s*'
                                    '([A-z0-9$]+)\(')
                            jsi = JSInterpreter(data_js)
                            js_signature = jsi.extract_function(funcname)

                    signature = js_signature([sig])
                    url += "&signature=" + signature
                url = url.replace(",", "%2C")
                video_urls.append(["(" + fmt_value[key] + ") [youtube]", url])
            except:
                import traceback
                logger.info(traceback.format_exc())

    return video_urls
Example #2
0
def extract_videos(video_id):
    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D"
        }

    url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \
          (video_id, video_id)
    data = httptools.downloadpage(url).data

    video_urls = []
    params = dict(urlparse.parse_qsl(data))
    if params.get('hlsvp'):
        video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']])
        return video_urls

    if config.is_xbmc():
        import xbmc
        xbmc_version = config.get_platform(True)['num_version']
        if xbmc_version >= 17 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \
                             and params.get('dashmpd'):
            if params.get('use_cipher_signature', '') != 'True':
                video_urls.append(['mpd  HD [youtube]', params['dashmpd'], 0, '', True])

    js_signature = ""
    youtube_page_data = httptools.downloadpage("http://www.youtube.com/watch?v=%s" % video_id).data
    params = extract_flashvars(youtube_page_data)
    if params.get('url_encoded_fmt_stream_map'):
        data_flashvars = params["url_encoded_fmt_stream_map"].split(",")
        for url_desc in data_flashvars:
            url_desc_map = dict(urlparse.parse_qsl(url_desc))
            if not url_desc_map.get("url") and not url_desc_map.get("stream"):
                continue

            try:
                key = int(url_desc_map["itag"])
                if not fmt_value.get(key):
                    continue

                if url_desc_map.get("url"):
                    url = urllib.unquote(url_desc_map["url"])
                elif url_desc_map.get("conn") and url_desc_map.get("stream"):
                    url = urllib.unquote(url_desc_map["conn"])
                    if url.rfind("/") < len(url) - 1:
                        url += "/"
                    url += urllib.unquote(url_desc_map["stream"])
                elif url_desc_map.get("stream") and not url_desc_map.get("conn"):
                    url = urllib.unquote(url_desc_map["stream"])

                if url_desc_map.get("sig"):
                    url += "&signature=" + url_desc_map["sig"]
                elif url_desc_map.get("s"):
                    sig = url_desc_map["s"]
                    if not js_signature:
                        urljs = scrapertools.find_single_match(youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
                        urljs = urljs.replace("\\", "")
                        if urljs:
                            if not re.search(r'https?://', urljs):
                                urljs = urlparse.urljoin("https://www.youtube.com", urljs)
                            data_js = httptools.downloadpage(urljs).data
                            from jsinterpreter import JSInterpreter
                            funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')
                            if not funcname:
                                funcname = scrapertools.find_single_match(data_js, '["\']signature["\']\s*,\s*'
                                                                                   '([A-z0-9$]+)\(')
                            jsi = JSInterpreter(data_js)
                            js_signature = jsi.extract_function(funcname)

                    signature = js_signature([sig])
                    url += "&signature=" + signature
                url = url.replace(",", "%2C")
                video_urls.append(["("+fmt_value[key]+") [youtube]", url])
            except:
                import traceback
                logger.info(traceback.format_exc())

    return video_urls
Example #3
0
def scrapeWebPageForVideoLinks(data):
    logger.info("")
    links = {}

    fmt_value = {
        5: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        37: "1080p h264 mp4",
        36: "3gpp",
        38: "720p vp8 webm",
        43: "360p h264 flv",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "520p vp8 webm",
        59: "480 for rtmpe",
        78: "400 for rtmpe",
        82: "360p h264 stereo",
        83: "240p h264 stereo",
        84: "720p h264 stereo",
        85: "520p h264 stereo",
        100: "360p vp8 webm stereo",
        101: "480p vp8 webm stereo",
        102: "720p vp8 webm stereo",
        120: "hd720",
        121: "hd1080"
        }

    video_urls=[]

    flashvars = extractFlashVars(data)
    if not flashvars.has_key(u"url_encoded_fmt_stream_map"):
        return links

    if flashvars.has_key(u"ttsurl"):
        logger.info("ttsurl="+flashvars[u"ttsurl"])

    js_signature = ""
    for url_desc in flashvars[u"url_encoded_fmt_stream_map"].split(u","):
        url_desc_map = cgi.parse_qs(url_desc)
        logger.info(u"url_map: " + repr(url_desc_map))
        if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")):
            continue

        try:
            key = int(url_desc_map[u"itag"][0])
            url = u""
            if url_desc_map.has_key(u"url"):
                url = urllib.unquote(url_desc_map[u"url"][0])
            elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"):
                url = urllib.unquote(url_desc_map[u"conn"][0])
                if url.rfind("/") < len(url) -1:
                    url = url + "/"
                url = url + urllib.unquote(url_desc_map[u"stream"][0])
            elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"):
                url = urllib.unquote(url_desc_map[u"stream"][0])

            if url_desc_map.has_key(u"sig"):
                url = url + u"&signature=" + url_desc_map[u"sig"][0]
            elif url_desc_map.has_key(u"s"):
                sig = url_desc_map[u"s"][0]
                if not js_signature:
                    urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"')
                    urljs = urljs.replace("\\", "")
                    if urljs:
                        data_js = scrapertools.downloadpage("http:"+urljs)
                        from jsinterpreter import JSInterpreter
                        funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')

                        jsi = JSInterpreter(data_js)
                        js_signature = jsi.extract_function(funcname)

                signature = js_signature([sig])
                url += u"&signature=" + signature

            # Se encodean las comas para que no falle en método built-in
            url = url.replace(",", "%2C")
            video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ])
        except:
            import traceback
            logger.info(traceback.format_exc())

    return video_urls
Example #4
0
def scrapeWebPageForVideoLinks(data):
    logger.info("")
    links = {}

    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D"
    }

    video_urls=[]

    flashvars = extractFlashVars(data)
    if not flashvars.has_key(u"url_encoded_fmt_stream_map"):
        return links

    if flashvars.has_key(u"ttsurl"):
        logger.info("ttsurl="+flashvars[u"ttsurl"])

    if flashvars.has_key('hlsvp'):
        url = flashvars[u"hlsvp"]
        video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ])
        return video_urls
    
    js_signature = ""
    data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",")
    for url_desc in data_flashvars:
        url_desc_map = cgi.parse_qs(url_desc)
        logger.info(u"url_map: " + repr(url_desc_map))
        if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")):
            continue

        try:
            key = int(url_desc_map[u"itag"][0])
            if not fmt_value.get(key):
                continue
            url = u""
            if url_desc_map.has_key(u"url"):
                url = urllib.unquote(url_desc_map[u"url"][0])
            elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"):
                url = urllib.unquote(url_desc_map[u"conn"][0])
                if url.rfind("/") < len(url) -1:
                    url = url + "/"
                url = url + urllib.unquote(url_desc_map[u"stream"][0])
            elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"):
                url = urllib.unquote(url_desc_map[u"stream"][0])

            if url_desc_map.has_key(u"sig"):
                url = url + u"&signature=" + url_desc_map[u"sig"][0]
            elif url_desc_map.has_key(u"s"):
                sig = url_desc_map[u"s"][0]
                if not js_signature:
                    urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"')
                    urljs = urljs.replace("\\", "")
                    if urljs:
                        data_js = scrapertools.downloadpage("http:"+urljs)
                        from jsinterpreter import JSInterpreter
                        funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')

                        jsi = JSInterpreter(data_js)
                        js_signature = jsi.extract_function(funcname)

                signature = js_signature([sig])
                url += u"&signature=" + signature

            # Se encodean las comas para que no falle en método built-in
            url = url.replace(",", "%2C")
            video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ])
        except:
            import traceback
            logger.info(traceback.format_exc())

    return video_urls
Example #5
0
def scrapeWebPageForVideoLinks(data):
    logger.info("")
    links = {}

    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D",
        133: "240p h264 mp4",
        134: "360p h264 mp4",
        135: "480p h264 mp4",
        136: "720p h264 mp4",
        137: "1080p h264 mp4",
        264: "1440p h264 mp4",
        266: "4K h264 mp4",
        298: "720p mp4 60fps",
        299: "1080p mp4 60fps",
        218: "480p vp9 webm",
        219: "480 vp9 webm",
        242: "240p vp9 webm",
        243: "360p vp9 webm",
        244: "480p vp9 webm",
        245: "480p vp9 webm",
        246: "480p vp9 webm",
        247: "720p vp9 webm",
        248: "1080p vp9 webm",
        271: "1440p vp9 webm",
        272: "4K webm 60fps",
        302: "720p webm 60fps",
        303: "1080p webm 60fps",
        308: "1440p webm 60fps",
        313: "4K vp9 webm",
        315: "4K webm 60fps"
        }
    exclude_itags = [17, 139, 140, 141, 160, 171, 172, 249, 250, 251, 256, 258, 278]

    video_urls=[]

    flashvars = extractFlashVars(data)
    if not flashvars.has_key(u"url_encoded_fmt_stream_map"):
        return links

    if flashvars.has_key(u"ttsurl"):
        logger.info("ttsurl="+flashvars[u"ttsurl"])

    if flashvars.has_key('hlsvp'):
        url = flashvars[u"hlsvp"]
        video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ])
        return video_urls
    
    js_signature = ""
    try:
        data_flashvars = flashvars[u"adaptive_fmts"].split(u",")
    except:
        data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",")

    for url_desc in data_flashvars:
        url_desc_map = cgi.parse_qs(url_desc)
        logger.info(u"url_map: " + repr(url_desc_map))
        if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")):
            continue

        try:
            key = int(url_desc_map[u"itag"][0])
            if key in exclude_itags:
                continue
            url = u""
            if url_desc_map.has_key(u"url"):
                url = urllib.unquote(url_desc_map[u"url"][0])
            elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"):
                url = urllib.unquote(url_desc_map[u"conn"][0])
                if url.rfind("/") < len(url) -1:
                    url = url + "/"
                url = url + urllib.unquote(url_desc_map[u"stream"][0])
            elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"):
                url = urllib.unquote(url_desc_map[u"stream"][0])

            if url_desc_map.has_key(u"sig"):
                url = url + u"&signature=" + url_desc_map[u"sig"][0]
            elif url_desc_map.has_key(u"s"):
                sig = url_desc_map[u"s"][0]
                if not js_signature:
                    urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"')
                    urljs = urljs.replace("\\", "")
                    if urljs:
                        data_js = scrapertools.downloadpage("http:"+urljs)
                        from jsinterpreter import JSInterpreter
                        funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')

                        jsi = JSInterpreter(data_js)
                        js_signature = jsi.extract_function(funcname)

                signature = js_signature([sig])
                url += u"&signature=" + signature

            # Se encodean las comas para que no falle en método built-in
            url = url.replace(",", "%2C")
            video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ])
        except:
            import traceback
            logger.info(traceback.format_exc())

    return video_urls
Example #6
0
def get_video_url(item):
    logger.trace()
    itemlist = []

    video_id = scrapertools.find_single_match(item.url, 'v=([A-z0-9_-]{11})')

    url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % (
        video_id, video_id)
    data = httptools.downloadpage(url).data
    params = dict(urlparse.parse_qsl(data))

    if params.get('hlsvp'):
        itemlist.append(Video(type='Live', url=params['hlsvp']))
        return itemlist

    if params.get('dashmpd') and params.get('use_cipher_signature',
                                            '') != 'True':
        itemlist.append(Video(type='MPD', url=params['dashmpd'], mpd=True))

    js_signature = ""
    youtube_page_data = httptools.downloadpage(
        "http://www.youtube.com/watch?v=%s" % video_id).data
    params = extract_flashvars(youtube_page_data)
    if params.get('url_encoded_fmt_stream_map'):
        data_flashvars = params["url_encoded_fmt_stream_map"].split(",")
        for url_desc in data_flashvars:
            url_desc_map = dict(urlparse.parse_qsl(url_desc))
            if not url_desc_map.get("url") and not url_desc_map.get("stream"):
                continue

            try:
                key = int(url_desc_map["itag"])
                if not fmt_value.get(key):
                    continue

                if url_desc_map.get("url"):
                    url = urllib.unquote(url_desc_map["url"])
                elif url_desc_map.get("conn") and url_desc_map.get("stream"):
                    url = urllib.unquote(url_desc_map["conn"])
                    if url.rfind("/") < len(url) - 1:
                        url += "/"
                    url += urllib.unquote(url_desc_map["stream"])
                elif url_desc_map.get(
                        "stream") and not url_desc_map.get("conn"):
                    url = urllib.unquote(url_desc_map["stream"])

                if url_desc_map.get("sig"):
                    url += "&signature=" + url_desc_map["sig"]
                elif url_desc_map.get("s"):
                    sig = url_desc_map["s"]
                    if not js_signature:
                        urljs = scrapertools.find_single_match(
                            youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
                        urljs = urljs.replace("\\", "")
                        if urljs:
                            if not re.search(r'https?://', urljs):
                                urljs = urlparse.urljoin(
                                    "https://www.youtube.com", urljs)
                            data_js = httptools.downloadpage(urljs).data
                            from jsinterpreter import JSInterpreter
                            funcname = scrapertools.find_single_match(
                                data_js, '\.sig\|\|([A-z0-9$]+)\(')
                            if not funcname:
                                funcname = scrapertools.find_single_match(
                                    data_js, '["\']signature["\']\s*,\s*'
                                    '([A-z0-9$]+)\(')
                            jsi = JSInterpreter(data_js)
                            js_signature = jsi.extract_function(funcname)

                    signature = js_signature([sig])
                    url += "&signature=" + signature
                url = url.replace(",", "%2C")
                itemlist.append(
                    Video(type=fmt_value[key][1],
                          res=fmt_value[key][0],
                          url=url))
            except:
                logger.error()

    return itemlist