def _parse_sig_js(jscode): funcname = _search_regex( r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode, 'Initial JS player signature function name') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def _parse_sig_js(self, jscode): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('), jscode, group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def _get_mainfunc_from_js(js): """ Return main signature decryption function from javascript as dict. """ dbg("Scanning js for main function.") m = re.search(r'\.sig\|\|([a-zA-Z0-9$]+)\(', js) funcname = m.group(1) dbg("Found main function: %s", funcname) jsi = JSInterpreter(js) return jsi.extract_function(funcname)
def _parse_sig_js(self, jscode): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def _parse_sig_js(self, jscode): funcname = self._search_regex(( r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(' ), jscode, group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def retrieveSignatureDecryptorFunction(ytplayerConfigJson): global DECRYPTOR_FUNCTION_CACHE if DECRYPTOR_FUNCTION_CACHE is not None: return DECRYPTOR_FUNCTION_CACHE assetsJson = ytplayerConfigJson['assets'] try: print( "Video seems to use signature protection...\nAttempting extraction of decryptor from video player asset...\n\n" ) jsplayer = assetsJson[ 'js'] # This key may not exist if video uses swf player print( "JSPlayer identified as the video player asset. Downloading JSPlayer javascript file...\n\n" ) jsplayerUrl = "https://www.youtube.com" + jsplayer jsplayerContent = requests.get(jsplayerUrl).text print("Asset downloaded. Extracting decryption function...\n\n") # This list is directly taken from youtube-dl's source - youtube.py # It contains regex filters to find the name of the decryption function in the jsplayer file. # The idea is that at least one filter from the list will match regexSearchList = [ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(' ] decryptionFunctionName = None for regexSearchStr in regexSearchList: regexSearchResult = re.search(regexSearchStr, jsplayerContent) if regexSearchResult is not None: decryptionFunctionName = regexSearchResult.groupdict()['sig'] break if decryptionFunctionName is None: print( "Cannot find signature-decryption function in JSPlayer asset!") exit(1) jsInt = JSInterpreter(jsplayerContent) decryptionFunction = jsInt.extract_function( decryptionFunctionName) # Returns an invokable decryption function print( "Signature-decryption function found. Deciphering encrypted signature...\n\n" ) DECRYPTOR_FUNCTION_CACHE = decryptionFunction return decryptionFunction except KeyError: print("ytplayer.config.assets.js doesn't exist.\nProbably swf player?") exit(1)
def _parse_sig_js(self, jscode): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def _parse_sig_js(self, jscode): funcname = self._search_regex(r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode) jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s])
def getCipher(session, player): playerJS = session.get(youtubeUrl + player.get("assets", {}).get("js")).text jsi = JSInterpreter(playerJS) return jsi.extract_function('FK')
def scrapeWebPageForVideoLinks(data): logger.info(data) links = {} fmt_value = { 5: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 37: "1080p h264 mp4", 36: "3gpp", 38: "720p vp8 webm", 43: "360p h264 flv", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "520p vp8 webm", 59: "480 for rtmpe", 78: "400 for rtmpe", 82: "360p h264 stereo", 83: "240p h264 stereo", 84: "720p h264 stereo", 85: "520p h264 stereo", 100: "360p vp8 webm stereo", 101: "480p vp8 webm stereo", 102: "720p vp8 webm stereo", 120: "hd720", 121: "hd1080" } video_urls = [] flashvars = extractFlashVars(data) if not flashvars.has_key(u"url_encoded_fmt_stream_map"): return links if flashvars.has_key(u"ttsurl"): logger.info("ttsurl=" + flashvars[u"ttsurl"]) js_signature = "" for url_desc in flashvars[u"url_encoded_fmt_stream_map"].split(u","): url_desc_map = cgi.parse_qs(url_desc) logger.info(u"url_map: " + repr(url_desc_map)) if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")): continue try: key = int(url_desc_map[u"itag"][0]) url = u"" if url_desc_map.has_key(u"url"): url = urllib.unquote(url_desc_map[u"url"][0]) elif url_desc_map.has_key(u"conn") and url_desc_map.has_key( u"stream"): url = urllib.unquote(url_desc_map[u"conn"][0]) if url.rfind("/") < len(url) - 1: url = url + "/" url = url + urllib.unquote(url_desc_map[u"stream"][0]) elif url_desc_map.has_key( u"stream") and not url_desc_map.has_key(u"conn"): url = urllib.unquote(url_desc_map[u"stream"][0]) if url_desc_map.has_key(u"sig"): url = url + u"&signature=" + url_desc_map[u"sig"][0] elif url_desc_map.has_key(u"s"): sig = url_desc_map[u"s"][0] if not js_signature: urljs = scrapertools.find_single_match( data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: data_js = scrapertools.downloadpage("http:" + urljs) from jsinterp import JSInterpreter funcname = scrapertools.find_single_match( data_js, '\.sig\|\|([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += u"&signature=" + signature # Se encodean las comas para que no falle en método built-in url = url.replace(",", "%2C") video_urls.append(["(" + fmt_value[key] + ") [youtube]", url]) except: import traceback logger.info(traceback.format_exc()) return video_urls