def loadM3U8(url): txt = xurl.load(url) txt = txt.replace('\/', '/') txt = txt.replace('\\n', '\n') m = re.search(r'"m3u8":"([^"]*)"', txt) if m: m3u8 = m.group(1) local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8') xurl.saveLocal(local, m3u8) return local results = [] for l in re.finditer(r'"l":"([^"]*)"', txt): part = l.group(1) if re.search(r'f4v\?', part): if part.startswith('http'): results.append(part) else: data_url = 'https://data.video.iqiyi.com/videos' + part for v in re.finditer(r'"l":"([^"]*)"', xurl.load(data_url)): results.append(v.group(1)) if len(results): local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8') xurl.saveM3U8(local, results) return local return None
def parseJson(path): print('\n[ytdl][parseJson]\n') results = [] cookies = None fd = open(path, "r") lines = fd.readlines() fd.close() for line in lines: try: data = json.loads(line) urls = data['url'] except: print('\texception') continue try: cookies = data['http_headers']['Cookie'].encode('utf8') except: cookies = None if not isinstance(urls, basestring): results.append(urls) else: encoded = re.search( r'data:application/vnd.apple.mpegurl;base64,([a-zA-Z0-9+/=]*)', urls) if encoded: local = xurl.genLocal(path, prefix='vod_list_', suffix='.m3u') decoded = base64.b64decode(encoded.group(1)) xurl.saveLocal(local, decoded) results.append(local) else: results.append(urls) if cookies: print('\thdr : %s' % (cookies)) if len(results) == 0: print('\tNo results') return None, None elif len(results) == 1: print('\tret : %s' % (results[0])) return results[0], cookies else: m3u = xurl.genLocal(path, prefix='vod_list_', suffix='.m3u8') xurl.saveM3U8(m3u, results) print('\tret : %s' % (m3u)) return m3u, cookies
def extractSUB(url, subtitle=None): print('\n[ytdl][extracSUB]\n') sub = xurl.genLocal(url, prefix='vod_sub_') sub_dir = os.path.dirname(sub) for f in os.listdir(sub_dir): if f.startswith(sub): print('\tsub: ' + sub_dir + f) return f try: opt = '' if subtitle == 'auto-generated': opt += '--write-auto-sub ' cmd = '%s %s %s -o %s \'%s\'' % (ytdlcmd(), defvals.ytdlsub, opt, sub, url) start_time = timeit.default_timer() output = subprocess.check_output(cmd, shell=True) elapsed = timeit.default_timer() - start_time print('\tsec: ' + str(elapsed)) except: print('\texception') return None m = re.search(r'Writing video subtitles to: (.*)', output) if m: local = m.group(1) print('\tsub: ' + local) return local return None
def extract(url): objs = [] basename = url.split('/')[-1] if len(basename) == 15: url_tv = 'https://www.pianku.tv/ajax/downurl/%s_tv/' % (basename[0:10]) local_cookie = xurl.genLocal(url, suffix='.cookie') opts = [] opts.append('-c %s' % (local_cookie)) xurl.load(url, opts=opts) opts = [] opts.append('-b %s' % (local_cookie)) opts.append('-H \'x-requested-with: XMLHttpRequest\'') opts.append('-H \'referer: %s\'' % (url)) txt = xurl.load(url_tv, opts=opts) for m in re.finditer(r'<li><a href="([^"]*)">(.*?)</a></li>', txt): link, title = urljoin(url, m.group(1)), m.group(2) objs.append(entryObj(link, title)) else: for m in re.finditer( r'<a href="(.*?)" title="(.*?)" target="_blank"><img src=".*?"\s+data-funlazy="(.*?)"', load(url)): link, title, img = urljoin(url, m.group(1)), m.group(2), urljoin( url, m.group(3)) objs.append(pageObj(link, title, img)) return objs
def get_tracks(no, bno, args): url = 'https://histock.tw/stock/brokertrace.aspx?bno={b}&no={n}'.format( b=bno, n=no) url_opts = [] if args.cookies: url_opts.append('-H \'cookie: ' + args.cookies + '\'') local = xurl.genLocal(url, prefix='twstock_load_broker_') txt = xurl.load(url, local=local, opts=url_opts, cache=args.cache, cacheOnly=args.cacheOnly, verbose=args.verbose) vec = [] for m in re.finditer( r'<td>(.*?)</td><td>([\d|,]+)</td><td>(\d+[.]\d*)</td><td>([\d|,]+)</td><td>(\d+[.]\d*)</td><td>(\d+[.]\d*)</td>', txt): vec.insert( 0, track(m.group(1), m.group(2), m.group(3), m.group(4), m.group(5), m.group(6))) if len(vec) == 0 and re.search('alert', txt): os.remove(local) return vec
def executeJSCode(code): local = xurl.genLocal(str(os.getuid), prefix='vod_code_') xurl.saveLocal(local, code) try: output = subprocess.check_output('nodejs ' + local, shell=True).rstrip('\n') except: return None output = output.replace("\/", "/") showAll(code, output) return output
def getSource(url, fmt, ref): txt = xurl.load(url) video = [] audio = [] video_ids = ['64', '32', '16'] audio_ids = ['30280', '30216'] video_id = None audio_id = None for m in re.finditer(r'"id":(\d+),"baseUrl":"([^"]*)"', txt): _id, _url = m.group(1), m.group(2) if _id in video_ids: if not video_id: video_id = _id if _id == video_id: video.append(_url) if _id in audio_ids: if not audio_id: audio_id = _id if _id == audio_id: audio.append(_url) local_a = xurl.genLocal(url, prefix='vod_list_', suffix='.audio.m3u8') local_v = xurl.genLocal(url, prefix='vod_list_', suffix='.video.m3u8') local = xurl.genLocal(url, prefix='vod_list_', suffix='.m3u8') xurl.saveM3U8(local_a, audio) xurl.saveM3U8(local_v, video) s = [] s.append('#EXTM3U') s.append( '#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",URI="%s"' % (local_a)) s.append('#EXT-X-STREAM-INF:AUDIO="audio"') s.append(local_v) xurl.saveLocal(local, '\n'.join(s)) return local
def extractURL(url, fmt, key=None, ref=None, dontParseJson=False): print('\n[ytdl][extractURL]\n') url = redirectURL(url) arg = '-i -j --no-playlist --no-warnings' fmt = getFormat(url, fmt) local = xurl.genLocal(url + fmt, prefix='vod_list_', suffix='.json') if key: arg = ' '.join([arg, '--video-password='******' '.join([arg, '--referer=\'%s\'' % (ref)]) print('\targ : %s' % (arg or '')) print('\tfmt : %s' % (fmt or '')) if os.path.exists(local) and not xurl.checkExpire(local): print('\tret : %s' % (local)) if dontParseJson: return local return parseJson(local) cmd = '%s -f \'%s\' --user-agent \'%s\' %s \'%s\' > %s' % ( ytdlcmd(), fmt, defvals.ua, arg, url, local) try: start_time = timeit.default_timer() output = subprocess.check_output(cmd, shell=True) elapsed = timeit.default_timer() - start_time except: elapsed = timeit.default_timer() - start_time print('\texception') return None print('\tsec : %s' % (str(elapsed))) print('\tret : %s' % (local)) if dontParseJson: return local return parseJson(local)
def findYouTubeNextPage(url, q): objs = [] local = xurl.genLocal(url, suffix='.old') txt = xurl.load(url, local, opts=['--cookie \"PREF=f1=50000000;f6=1408;f5=30;hl=en\"']) pages = re.search(r'search-pager(.*?)</div>', txt, re.DOTALL | re.MULTILINE) if pages: for m in re.finditer(r'<(a|button) .*?</(a|button)>', pages.group(1)): label = re.search(r'<span.*?">(.*?)</span>', m.group()) label = label.group(1) if label else None link = None if m.group(1) == 'a': href = re.search(r'href="([^"]*)"', m.group()) link = urljoin(url, href.group(1)) if href else None objs.append(navObj(label, link)) return objs