def extract_videos(video_id): fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D" } url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \ (video_id, video_id) data = httptools.downloadpage(url).data video_urls = [] params = dict(urlparse.parse_qsl(data)) if params.get('hlsvp'): video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']]) return video_urls if config.is_xbmc(): import xbmc xbmc_version = int( xbmc.getInfoLabel("System.BuildVersion").split(".", 1)[0]) if xbmc_version > 16 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \ and params.get('dashmpd'): if params.get('use_cipher_signature', '') != 'True': video_urls.append( ['mpd HD [youtube]', params['dashmpd'], 0, '', True]) js_signature = "" youtube_page_data = httptools.downloadpage( "http://www.youtube.com/watch?v=%s" % video_id).data params = extract_flashvars(youtube_page_data) if params.get('url_encoded_fmt_stream_map'): data_flashvars = params["url_encoded_fmt_stream_map"].split(",") for url_desc in data_flashvars: url_desc_map = dict(urlparse.parse_qsl(url_desc)) if not url_desc_map.get("url") and not url_desc_map.get("stream"): continue try: key = int(url_desc_map["itag"]) if not fmt_value.get(key): continue if url_desc_map.get("url"): url = urllib.unquote(url_desc_map["url"]) elif url_desc_map.get("conn") and url_desc_map.get("stream"): url = urllib.unquote(url_desc_map["conn"]) if url.rfind("/") < len(url) - 1: url += "/" url += urllib.unquote(url_desc_map["stream"]) elif url_desc_map.get( "stream") and not url_desc_map.get("conn"): url = urllib.unquote(url_desc_map["stream"]) if url_desc_map.get("sig"): url += "&signature=" + url_desc_map["sig"] elif url_desc_map.get("s"): sig = url_desc_map["s"] if not js_signature: urljs = scrapertools.find_single_match( youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: if not re.search(r'https?://', urljs): urljs = urlparse.urljoin( "https://www.youtube.com", urljs) data_js = httptools.downloadpage(urljs).data from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match( data_js, '\.sig\|\|([A-z0-9$]+)\(') if not funcname: funcname = scrapertools.find_single_match( data_js, '["\']signature["\']\s*,\s*' '([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += "&signature=" + signature url = url.replace(",", "%2C") video_urls.append(["(" + fmt_value[key] + ") [youtube]", url]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def extract_videos(video_id): fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D" } url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \ (video_id, video_id) data = httptools.downloadpage(url).data video_urls = [] params = dict(urlparse.parse_qsl(data)) if params.get('hlsvp'): video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']]) return video_urls if config.is_xbmc(): import xbmc xbmc_version = config.get_platform(True)['num_version'] if xbmc_version >= 17 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \ and params.get('dashmpd'): if params.get('use_cipher_signature', '') != 'True': video_urls.append(['mpd HD [youtube]', params['dashmpd'], 0, '', True]) js_signature = "" youtube_page_data = httptools.downloadpage("http://www.youtube.com/watch?v=%s" % video_id).data params = extract_flashvars(youtube_page_data) if params.get('url_encoded_fmt_stream_map'): data_flashvars = params["url_encoded_fmt_stream_map"].split(",") for url_desc in data_flashvars: url_desc_map = dict(urlparse.parse_qsl(url_desc)) if not url_desc_map.get("url") and not url_desc_map.get("stream"): continue try: key = int(url_desc_map["itag"]) if not fmt_value.get(key): continue if url_desc_map.get("url"): url = urllib.unquote(url_desc_map["url"]) elif url_desc_map.get("conn") and url_desc_map.get("stream"): url = urllib.unquote(url_desc_map["conn"]) if url.rfind("/") < len(url) - 1: url += "/" url += urllib.unquote(url_desc_map["stream"]) elif url_desc_map.get("stream") and not url_desc_map.get("conn"): url = urllib.unquote(url_desc_map["stream"]) if url_desc_map.get("sig"): url += "&signature=" + url_desc_map["sig"] elif url_desc_map.get("s"): sig = url_desc_map["s"] if not js_signature: urljs = scrapertools.find_single_match(youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: if not re.search(r'https?://', urljs): urljs = urlparse.urljoin("https://www.youtube.com", urljs) data_js = httptools.downloadpage(urljs).data from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(') if not funcname: funcname = scrapertools.find_single_match(data_js, '["\']signature["\']\s*,\s*' '([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += "&signature=" + signature url = url.replace(",", "%2C") video_urls.append(["("+fmt_value[key]+") [youtube]", url]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def scrapeWebPageForVideoLinks(data): logger.info("") links = {} fmt_value = { 5: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 37: "1080p h264 mp4", 36: "3gpp", 38: "720p vp8 webm", 43: "360p h264 flv", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "520p vp8 webm", 59: "480 for rtmpe", 78: "400 for rtmpe", 82: "360p h264 stereo", 83: "240p h264 stereo", 84: "720p h264 stereo", 85: "520p h264 stereo", 100: "360p vp8 webm stereo", 101: "480p vp8 webm stereo", 102: "720p vp8 webm stereo", 120: "hd720", 121: "hd1080" } video_urls=[] flashvars = extractFlashVars(data) if not flashvars.has_key(u"url_encoded_fmt_stream_map"): return links if flashvars.has_key(u"ttsurl"): logger.info("ttsurl="+flashvars[u"ttsurl"]) js_signature = "" for url_desc in flashvars[u"url_encoded_fmt_stream_map"].split(u","): url_desc_map = cgi.parse_qs(url_desc) logger.info(u"url_map: " + repr(url_desc_map)) if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")): continue try: key = int(url_desc_map[u"itag"][0]) url = u"" if url_desc_map.has_key(u"url"): url = urllib.unquote(url_desc_map[u"url"][0]) elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"): url = urllib.unquote(url_desc_map[u"conn"][0]) if url.rfind("/") < len(url) -1: url = url + "/" url = url + urllib.unquote(url_desc_map[u"stream"][0]) elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"): url = urllib.unquote(url_desc_map[u"stream"][0]) if url_desc_map.has_key(u"sig"): url = url + u"&signature=" + url_desc_map[u"sig"][0] elif url_desc_map.has_key(u"s"): sig = url_desc_map[u"s"][0] if not js_signature: urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: data_js = scrapertools.downloadpage("http:"+urljs) from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += u"&signature=" + signature # Se encodean las comas para que no falle en método built-in url = url.replace(",", "%2C") video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def scrapeWebPageForVideoLinks(data): logger.info("") links = {} fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D" } video_urls=[] flashvars = extractFlashVars(data) if not flashvars.has_key(u"url_encoded_fmt_stream_map"): return links if flashvars.has_key(u"ttsurl"): logger.info("ttsurl="+flashvars[u"ttsurl"]) if flashvars.has_key('hlsvp'): url = flashvars[u"hlsvp"] video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ]) return video_urls js_signature = "" data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",") for url_desc in data_flashvars: url_desc_map = cgi.parse_qs(url_desc) logger.info(u"url_map: " + repr(url_desc_map)) if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")): continue try: key = int(url_desc_map[u"itag"][0]) if not fmt_value.get(key): continue url = u"" if url_desc_map.has_key(u"url"): url = urllib.unquote(url_desc_map[u"url"][0]) elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"): url = urllib.unquote(url_desc_map[u"conn"][0]) if url.rfind("/") < len(url) -1: url = url + "/" url = url + urllib.unquote(url_desc_map[u"stream"][0]) elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"): url = urllib.unquote(url_desc_map[u"stream"][0]) if url_desc_map.has_key(u"sig"): url = url + u"&signature=" + url_desc_map[u"sig"][0] elif url_desc_map.has_key(u"s"): sig = url_desc_map[u"s"][0] if not js_signature: urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: data_js = scrapertools.downloadpage("http:"+urljs) from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += u"&signature=" + signature # Se encodean las comas para que no falle en método built-in url = url.replace(",", "%2C") video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def scrapeWebPageForVideoLinks(data): logger.info("") links = {} fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D", 133: "240p h264 mp4", 134: "360p h264 mp4", 135: "480p h264 mp4", 136: "720p h264 mp4", 137: "1080p h264 mp4", 264: "1440p h264 mp4", 266: "4K h264 mp4", 298: "720p mp4 60fps", 299: "1080p mp4 60fps", 218: "480p vp9 webm", 219: "480 vp9 webm", 242: "240p vp9 webm", 243: "360p vp9 webm", 244: "480p vp9 webm", 245: "480p vp9 webm", 246: "480p vp9 webm", 247: "720p vp9 webm", 248: "1080p vp9 webm", 271: "1440p vp9 webm", 272: "4K webm 60fps", 302: "720p webm 60fps", 303: "1080p webm 60fps", 308: "1440p webm 60fps", 313: "4K vp9 webm", 315: "4K webm 60fps" } exclude_itags = [17, 139, 140, 141, 160, 171, 172, 249, 250, 251, 256, 258, 278] video_urls=[] flashvars = extractFlashVars(data) if not flashvars.has_key(u"url_encoded_fmt_stream_map"): return links if flashvars.has_key(u"ttsurl"): logger.info("ttsurl="+flashvars[u"ttsurl"]) if flashvars.has_key('hlsvp'): url = flashvars[u"hlsvp"] video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ]) return video_urls js_signature = "" try: data_flashvars = flashvars[u"adaptive_fmts"].split(u",") except: data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",") for url_desc in data_flashvars: url_desc_map = cgi.parse_qs(url_desc) logger.info(u"url_map: " + repr(url_desc_map)) if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")): continue try: key = int(url_desc_map[u"itag"][0]) if key in exclude_itags: continue url = u"" if url_desc_map.has_key(u"url"): url = urllib.unquote(url_desc_map[u"url"][0]) elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"): url = urllib.unquote(url_desc_map[u"conn"][0]) if url.rfind("/") < len(url) -1: url = url + "/" url = url + urllib.unquote(url_desc_map[u"stream"][0]) elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"): url = urllib.unquote(url_desc_map[u"stream"][0]) if url_desc_map.has_key(u"sig"): url = url + u"&signature=" + url_desc_map[u"sig"][0] elif url_desc_map.has_key(u"s"): sig = url_desc_map[u"s"][0] if not js_signature: urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: data_js = scrapertools.downloadpage("http:"+urljs) from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += u"&signature=" + signature # Se encodean las comas para que no falle en método built-in url = url.replace(",", "%2C") video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def get_video_url(item): logger.trace() itemlist = [] video_id = scrapertools.find_single_match(item.url, 'v=([A-z0-9_-]{11})') url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % ( video_id, video_id) data = httptools.downloadpage(url).data params = dict(urlparse.parse_qsl(data)) if params.get('hlsvp'): itemlist.append(Video(type='Live', url=params['hlsvp'])) return itemlist if params.get('dashmpd') and params.get('use_cipher_signature', '') != 'True': itemlist.append(Video(type='MPD', url=params['dashmpd'], mpd=True)) js_signature = "" youtube_page_data = httptools.downloadpage( "http://www.youtube.com/watch?v=%s" % video_id).data params = extract_flashvars(youtube_page_data) if params.get('url_encoded_fmt_stream_map'): data_flashvars = params["url_encoded_fmt_stream_map"].split(",") for url_desc in data_flashvars: url_desc_map = dict(urlparse.parse_qsl(url_desc)) if not url_desc_map.get("url") and not url_desc_map.get("stream"): continue try: key = int(url_desc_map["itag"]) if not fmt_value.get(key): continue if url_desc_map.get("url"): url = urllib.unquote(url_desc_map["url"]) elif url_desc_map.get("conn") and url_desc_map.get("stream"): url = urllib.unquote(url_desc_map["conn"]) if url.rfind("/") < len(url) - 1: url += "/" url += urllib.unquote(url_desc_map["stream"]) elif url_desc_map.get( "stream") and not url_desc_map.get("conn"): url = urllib.unquote(url_desc_map["stream"]) if url_desc_map.get("sig"): url += "&signature=" + url_desc_map["sig"] elif url_desc_map.get("s"): sig = url_desc_map["s"] if not js_signature: urljs = scrapertools.find_single_match( youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: if not re.search(r'https?://', urljs): urljs = urlparse.urljoin( "https://www.youtube.com", urljs) data_js = httptools.downloadpage(urljs).data from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match( data_js, '\.sig\|\|([A-z0-9$]+)\(') if not funcname: funcname = scrapertools.find_single_match( data_js, '["\']signature["\']\s*,\s*' '([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += "&signature=" + signature url = url.replace(",", "%2C") itemlist.append( Video(type=fmt_value[key][1], res=fmt_value[key][0], url=url)) except: logger.error() return itemlist