def print_version(): version = __version__ log.i( 'version {}, a tiny downloader that scrapes the web.'.format( version ) )
def prepare(self, **kwargs): assert self.url or self.vid if self.url and not self.vid: html = get_content(self.url) tvid = match1(self.url, r'#curid=(.+)_') or match1( self.url, r'tvid=([^&]+)') or match1( html, r'data-player-tvid="([^"]+)"') or match1( html, r'tv(?:i|I)d=(.+?)\&') or match1( html, r'param\[\'tvid\'\]\s*=\s*"(.+?)"') videoid = match1(self.url, r'#curid=.+_(.*)$') or match1( self.url, r'vid=([^&]+)') or match1( html, r'data-player-videoid="([^"]+)"') or match1( html, r'vid=(.+?)\&') or match1( html, r'param\[\'vid\'\]\s*=\s*"(.+?)"') self.vid = (tvid, videoid) info_u = 'http://mixer.video.iqiyi.com/jp/mixin/videos/{}'.format( tvid) mixin = get_content(info_u) mixin_json = json.loads(mixin[len('var tvInfoJs='):]) real_u = mixin_json['url'] real_html = get_content(real_u) parser = get_parser(real_html) self.title = parser.find('meta', property='og:title')['content'] tvid, videoid = self.vid info = getVMS(tvid, videoid) assert info['code'] == 'A00000', "can't play this video" for stream in info['data']['vidl']: try: stream_id = self.vd_2_id[stream['vd']] if stream_id in self.stream_types: continue stream_profile = self.id_2_profile[stream_id] self.streams[stream_id] = { 'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size': 0, 'm3u8_url': stream['m3u'] } except Exception as e: log.i('vd: {} is not handled'.format(stream['vd'])) log.i('info is {}'.format(stream))
def fetch_cna(): def quote_cna(val): if '%' in val: return val return urllib.parse.quote(val) if cookies: for cookie in cookies: if cookie.name == 'cna' and cookie.domain == '.youku.com': log.i('Found cna in imported cookies. Use it') return quote_cna(cookie.value) url = 'http://log.mmstat.com/eg.js' req = urllib.request.urlopen(url) headers = req.getheaders() for header in headers: if header[0].lower() == 'set-cookie': n_v = header[1].split(';')[0] name, value = n_v.split('=') if name == 'cna': return quote_cna(value) log.w('It seems that the client failed to fetch a cna cookie. ' 'Please load your own cookie if possible') return quote_cna('DOG4EdW4qzsCAbZyXbU+t7Jt')
def script_main(download, download_playlist, **kwargs): logging.basicConfig(format='[%(levelname)s] %(message)s') def print_version(): version = __version__ log.i('version {}, a tiny downloader that scrapes the web.'.format( version)) parser = argparse.ArgumentParser( prog='lulu', usage='lulu [OPTION]... URL...', description='A tiny downloader that scrapes the web', add_help=False, ) parser.add_argument('-V', '--version', action='store_true', help='Print version and exit') parser.add_argument('-h', '--help', action='store_true', help='Print this help message and exit') dry_run_grp = parser.add_argument_group('Dry-run options', '(no actual downloading)') dry_run_grp = dry_run_grp.add_mutually_exclusive_group() dry_run_grp.add_argument('-i', '--info', action='store_true', help='Print extracted information') dry_run_grp.add_argument('-u', '--url', action='store_true', help='Print extracted information with URLs') dry_run_grp.add_argument('--json', action='store_true', help='Print extracted URLs in JSON format') download_grp = parser.add_argument_group('Download options') download_grp.add_argument('-n', '--no-merge', action='store_true', default=False, help='Do not merge video parts') download_grp.add_argument( '--no-caption', action='store_true', help='Do not download captions (subtitles, lyrics, danmaku, ...)') download_grp.add_argument('-f', '--force', action='store_true', default=False, help='Force overwriting existing files') download_grp.add_argument('-F', '--format', metavar='STREAM_ID', help='Set video format to STREAM_ID') download_grp.add_argument('-O', '--output-filename', metavar='FILE', help='Set output filename') download_grp.add_argument('-o', '--output-dir', metavar='DIR', default='.', help='Set output directory') download_grp.add_argument('-p', '--player', metavar='PLAYER', help='Stream extracted URL to a PLAYER') download_grp.add_argument('-c', '--cookies', metavar='COOKIES_FILE', help='Load cookies.txt or cookies.sqlite') download_grp.add_argument('-t', '--timeout', metavar='SECONDS', type=int, default=600, help='Set socket timeout') download_grp.add_argument('-d', '--debug', action='store_true', help='Show traceback and other debug info') download_grp.add_argument('-I', '--input-file', metavar='FILE', type=argparse.FileType('r'), help='Read non-playlist URLs from FILE') download_grp.add_argument('-P', '--password', help='Set video visit password to PASSWORD') download_grp.add_argument('-l', '--playlist', action='store_true', help='Prefer to download a playlist') download_grp.add_argument( '-T', '--thread', type=int, default=0, help=('Use multithreading to download (only works for multiple-parts ' 'video)')) proxy_grp = parser.add_argument_group('Proxy options') proxy_grp = proxy_grp.add_mutually_exclusive_group() proxy_grp.add_argument('-x', '--http-proxy', metavar='HOST:PORT', help='Use an HTTP proxy for downloading') proxy_grp.add_argument('-y', '--extractor-proxy', metavar='HOST:PORT', help='Use an HTTP proxy for extracting only') proxy_grp.add_argument('--no-proxy', action='store_true', help='Never use a proxy') proxy_grp.add_argument('-s', '--socks-proxy', metavar='HOST:PORT', help='Use an SOCKS5 proxy for downloading') download_grp.add_argument('--stream', help=argparse.SUPPRESS) download_grp.add_argument('--itag', help=argparse.SUPPRESS) parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS) args = parser.parse_args() if args.help: print_version() parser.print_help() sys.exit() if args.version: print_version() sys.exit() if args.debug: # Set level of root logger to DEBUG logging.getLogger().setLevel(logging.DEBUG) global force global dry_run global json_output global player global extractor_proxy global output_filename output_filename = args.output_filename extractor_proxy = args.extractor_proxy info_only = args.info if args.url: dry_run = True if args.json: json_output = True # to fix extractors not use VideoExtractor dry_run = True info_only = False if args.cookies: load_cookies(args.cookies) caption = True stream_id = args.format or args.stream or args.itag if args.no_caption: caption = False if args.player: player = args.player caption = False if args.no_proxy: unset_proxy() else: if args.http_proxy: set_proxy(parse_host(args.http_proxy)) if args.socks_proxy: set_socks_proxy(args.socks_proxy) URLs = [] if args.input_file: logging.debug('you are trying to load urls from %s', args.input_file) if args.playlist: log.e("reading playlist from a file is unsupported " "and won't make your life easier") sys.exit(2) URLs.extend(args.input_file.read().splitlines()) args.input_file.close() URLs.extend(args.URL) if not URLs: parser.print_help() sys.exit() socket.setdefaulttimeout(args.timeout) try: extra = {} if extractor_proxy: extra['extractor_proxy'] = extractor_proxy if stream_id: extra['stream_id'] = stream_id download_main(download, download_playlist, URLs, args.playlist, output_dir=args.output_dir, merge=not args.no_merge, info_only=info_only, json_output=json_output, caption=caption, password=args.password, thread=args.thread, **extra) except KeyboardInterrupt: if args.debug: raise else: sys.exit(1) except UnicodeEncodeError: if args.debug: raise log.e('[error] oops, the current environment does not seem to support ' 'Unicode.') log.e('please set it to a UTF-8-aware locale first,') log.e( 'so as to save the video (with some Unicode characters) correctly.' ) log.e('you can do it like this:') log.e(' (Windows) % chcp 65001 ') log.e(' (Linux) $ LC_CTYPE=en_US.UTF-8') sys.exit(1) except Exception: if not args.debug: log.e('[error] oops, something went wrong.') log.e( 'don\'t panic, c\'est la vie. please try the following steps:') log.e(' (1) Rule out any network problem.') log.e(' (2) Make sure lulu is up-to-date.') log.e(' (3) Check if the issue is already known, on') log.e(' https://github.com/iawia002/Lulu/issues') log.e(' (4) Run the command with \'--debug\' option,') log.e(' and report this issue with the full output.') else: print_version() log.i(args) raise sys.exit(1)