def login(username, password): csrf_token = _get_csrf_token(request('get', url=constant.LOGIN_URL).text) if os.getenv('DEBUG'): logger.info('Getting CSRF token ...') if os.getenv('DEBUG'): logger.info('CSRF token is {}'.format(csrf_token)) login_dict = { 'csrfmiddlewaretoken': csrf_token, 'username_or_email': username, 'password': password, } resp = request('post', url=constant.LOGIN_URL, data=login_dict) if 'You\'re loading pages way too quickly.' in resp.text: csrf_token = _get_csrf_token(resp.text) resp = request('post', url=resp.url, data={'csrfmiddlewaretoken': csrf_token, 'next': '/'}) if 'Invalid username/email or password' in resp.text: logger.error('Login failed, please check your username and password') exit(1) if 'You\'re loading pages way too quickly.' in resp.text: logger.error('You meet challenge insistently, please submit a issue' ' at https://github.com/RicterZ/nhentai/issues') exit(2)
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): try: import img2pdf """Write images to a PDF file using img2pdf.""" if doujinshi_obj is not None: doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) pdf_filename = os.path.join( os.path.join(doujinshi_dir, '..'), '{}.pdf'.format(doujinshi_obj.filename)) else: pdf_filename = './doujinshi.pdf' doujinshi_dir = '.' file_list = os.listdir(doujinshi_dir) file_list.sort() logger.info('Writing PDF file to path: {}'.format(pdf_filename)) with open(pdf_filename, 'wb') as pdf_f: full_path_list = ([ os.path.join(doujinshi_dir, image) for image in file_list ]) pdf_f.write(img2pdf.convert(full_path_list)) if rm_origin_dir: shutil.rmtree(doujinshi_dir, ignore_errors=True) logger.log( 15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir)) except ImportError: logger.error("Please install img2pdf package by using pip.")
def tag_parser(tag_name, max_page=1): result = [] tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') for p in range(1, max_page + 1): logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name)) response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content html = BeautifulSoup(response, 'html.parser') doujinshi_items = html.find_all('div', attrs={'class': 'gallery'}) if not doujinshi_items: logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return for i in doujinshi_items: doujinshi_id = i.a.attrs['href'].strip('/g') doujinshi_title = i.a.text.strip() doujinshi_title = doujinshi_title if len(doujinshi_title) < 85 else doujinshi_title[:82] + '...' result.append({'title': doujinshi_title, 'id': doujinshi_id}) if not result: logger.warn('No results for tag \'{}\''.format(tag_name)) return result
def _download(self, url, folder='', filename='', retried=False): logger.info('Start downloading: {0} ...'.format(url)) filename = filename if filename else os.path.basename( urlparse(url).path) base_filename, extension = os.path.splitext(filename) try: with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: response = request('get', url, stream=True, timeout=self.timeout) length = response.headers.get('content-length') if length is None: f.write(response.content) else: for chunk in response.iter_content(2048): f.write(chunk) except requests.HTTPError as e: if not retried: logger.error('Error: {0}, retrying'.format(str(e))) return self._download(url=url, folder=folder, filename=filename, retried=True) else: return None except Exception as e: logger.critical(str(e)) return None return url
def tag_parser(tag_name, max_page=1): result = [] tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') for p in range(1, max_page + 1): logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format( p, tag_name)) response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content html = BeautifulSoup(response, 'html.parser') doujinshi_items = html.find_all('div', attrs={'class': 'gallery'}) if not doujinshi_items: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return for i in doujinshi_items: doujinshi_id = i.a.attrs['href'].strip('/g') doujinshi_title = i.a.text.strip() doujinshi_title = doujinshi_title if len( doujinshi_title) < 85 else doujinshi_title[:82] + '...' result.append({'title': doujinshi_title, 'id': doujinshi_id}) if not result: logger.warn('No results for tag \'{}\''.format(tag_name)) return result
def login(username, password): csrf_token = _get_csrf_token(request('get', url=constant.LOGIN_URL).text) if os.getenv('DEBUG'): logger.info('Getting CSRF token ...') if os.getenv('DEBUG'): logger.info('CSRF token is {}'.format(csrf_token)) login_dict = { 'csrfmiddlewaretoken': csrf_token, 'username_or_email': username, 'password': password, } resp = request('post', url=constant.LOGIN_URL, data=login_dict) if 'You\'re loading pages way too quickly.' in resp.text: csrf_token = _get_csrf_token(resp.text) resp = request('post', url=resp.url, data={ 'csrfmiddlewaretoken': csrf_token, 'next': '/' }) if 'Invalid username/email or password' in resp.text: logger.error('Login failed, please check your username and password') exit(1) if 'You\'re loading pages way too quickly.' in resp.text: logger.error('You meet challenge insistently, please submit a issue' ' at https://github.com/RicterZ/nhentai/issues') exit(2)
def check_cookie(): response = request('get', constant.BASE_URL).text username = re.findall('"/users/\d+/(.*?)"', response) if not username: logger.error('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') else: logger.info('Login successfully! Your username: {}'.format(username[0]))
def login(username, password): logger.warning( 'This feature is deprecated, please use --cookie to set your cookie.') csrf_token = _get_csrf_token(request('get', url=constant.LOGIN_URL).text) if os.getenv('DEBUG'): logger.info('Getting CSRF token ...') if os.getenv('DEBUG'): logger.info('CSRF token is {}'.format(csrf_token)) login_dict = { 'csrfmiddlewaretoken': csrf_token, 'username_or_email': username, 'password': password, } resp = request('post', url=constant.LOGIN_URL, data=login_dict) if 'You\'re loading pages way too quickly.' in resp.text or 'Really, slow down' in resp.text: csrf_token = _get_csrf_token(resp.text) resp = request('post', url=resp.url, data={ 'csrfmiddlewaretoken': csrf_token, 'next': '/' }) if 'Invalid username/email or password' in resp.text: logger.error('Login failed, please check your username and password') exit(1) if 'You\'re loading pages way too quickly.' in resp.text or 'Really, slow down' in resp.text: logger.error( 'Using nhentai --cookie \'YOUR_COOKIE_HERE\' to save your Cookie.') exit(2)
def load_config(): if not os.path.exists(constant.NHENTAI_CONFIG_FILE): return try: with open(constant.NHENTAI_CONFIG_FILE, 'r') as f: constant.CONFIG = json.load(f) except json.JSONDecodeError: logger.error('Failed to load config file.') write_config()
def check_cookie(): response = request('get', constant.BASE_URL) if response.status_code == 503 and 'cf-browser-verification' in response.text: logger.error( 'Blocked by Cloudflare captcha, please set your cookie and useragent' ) exit(-1) username = re.findall('"/users/\d+/(.*?)"', response.text) if not username: logger.warning( 'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie' ) else: logger.info('Login successfully! Your username: {}'.format( username[0]))
def login_parser(): html = BeautifulSoup( request('get', constant.FAV_URL).content, 'html.parser') count = html.find('span', attrs={'class': 'count'}) if not count: logger.error( "Can't get your number of favorited doujins. Did the login failed?" ) return count = int(count.text.strip('(').strip(')').replace(',', '')) if count == 0: logger.warning('No favorites found') return [] pages = int(count / 25) if pages: pages += 1 if count % (25 * pages) else 0 else: pages = 1 logger.info('You have %d favorites in %d pages.' % (count, pages)) if os.getenv('DEBUG'): pages = 1 ret = [] doujinshi_id = re.compile('data-id="([\d]+)"') def _callback(request, result): ret.append(result) thread_pool = threadpool.ThreadPool(5) for page in range(1, pages + 1): try: logger.info('Getting doujinshi ids of page %d' % page) resp = request('get', constant.FAV_URL + '?page=%d' % page).text ids = doujinshi_id.findall(resp) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) [thread_pool.putRequest(req) for req in requests_] thread_pool.wait() except Exception as e: logger.error('Error: %s, continue', str(e)) return ret
def tag_parser(tag_name, sorting='date', max_page=1, index=0): result = [] tag_name = tag_name.lower() if ',' in tag_name: tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')] else: tag_name = tag_name.strip().replace(' ', '-') if sorting == 'date': sorting = '' for p in range(1, max_page + 1): if sys.version_info >= (3, 0, 0): unicode_ = str else: unicode_ = unicode if isinstance(tag_name, (str, unicode_)): logger.debug( 'Fetching page {0} for doujinshi with tag \'{1}\''.format( p, tag_name)) response = request( 'get', url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], tag_name, sorting, p)).content result += _get_title_and_id(response) else: for i in tag_name: logger.debug( 'Fetching page {0} for doujinshi with tag \'{1}\''.format( p, i)) response = request( 'get', url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], i, sorting, p)).content result += _get_title_and_id(response) if not result: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return if not result: logger.warn('No results for tag \'{}\''.format(tag_name)) return result
def login_parser(): html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser') count = html.find('span', attrs={'class': 'count'}) if not count: logger.error("Can't get your number of favorited doujins. Did the login failed?") return count = int(count.text.strip('(').strip(')').replace(',', '')) if count == 0: logger.warning('No favorites found') return [] pages = int(count / 25) if pages: pages += 1 if count % (25 * pages) else 0 else: pages = 1 logger.info('You have %d favorites in %d pages.' % (count, pages)) if os.getenv('DEBUG'): pages = 1 ret = [] doujinshi_id = re.compile('data-id="([\d]+)"') def _callback(request, result): ret.append(result) thread_pool = threadpool.ThreadPool(5) for page in range(1, pages + 1): try: logger.info('Getting doujinshi ids of page %d' % page) resp = request('get', constant.FAV_URL + '?page=%d' % page).text ids = doujinshi_id.findall(resp) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) [thread_pool.putRequest(req) for req in requests_] thread_pool.wait() except Exception as e: logger.error('Error: %s, continue', str(e)) return ret
def favorites_parser(page=None): result = [] html = BeautifulSoup( request('get', constant.FAV_URL).content, 'html.parser') count = html.find('span', attrs={'class': 'count'}) if not count: logger.error( "Can't get your number of favorited doujins. Did the login failed?" ) return [] count = int(count.text.strip('(').strip(')').replace(',', '')) if count == 0: logger.warning('No favorites found') return [] pages = int(count / 25) if page: page_range_list = page else: if pages: pages += 1 if count % (25 * pages) else 0 else: pages = 1 logger.info('You have %d favorites in %d pages.' % (count, pages)) if os.getenv('DEBUG'): pages = 1 page_range_list = range(1, pages + 1) for page in page_range_list: try: logger.info('Getting doujinshi ids of page %d' % page) resp = request('get', constant.FAV_URL + '?page=%d' % page).content result.extend(_get_title_and_id(resp)) except Exception as e: logger.error('Error: %s, continue', str(e)) return result
def tag_guessing(tag_name): tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name)) response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content html = BeautifulSoup(response, 'html.parser') first_item = html.find('div', attrs={'class': 'gallery'}) if not first_item: logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return doujinshi_id = re.findall('(\d+)', first_item.a.attrs['href']) if not doujinshi_id: logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return ret = doujinshi_parser(doujinshi_id[0]) if 'tag' in ret and tag_name in ret['tag']: tag_id = ret['tag'][tag_name] logger.info('Tag id of tag \'{0}\' is {1}'.format(tag_name, tag_id)) else: logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return return tag_id
def tag_parser(tag_name, max_page=1): result = [] tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') for p in range(1, max_page + 1): logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format( p, tag_name)) response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content result = _get_title_and_id(response) if not result: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return if not result: logger.warn('No results for tag \'{}\''.format(tag_name)) return result
def login_parser(username, password): s = requests.Session() s.proxies = constant.PROXY s.verify = False s.headers.update({'Referer': constant.LOGIN_URL}) s.get(constant.LOGIN_URL) content = s.get(constant.LOGIN_URL).content html = BeautifulSoup(content, 'html.parser') csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) if not csrf_token_elem: raise Exception('Cannot find csrf token to login') csrf_token = csrf_token_elem.attrs['value'] login_dict = { 'csrfmiddlewaretoken': csrf_token, 'username_or_email': username, 'password': password, } resp = s.post(constant.LOGIN_URL, data=login_dict) if 'Invalid username (or email) or password' in resp.text: logger.error('Login failed, please check your username and password') exit(1) html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') count = html.find('span', attrs={'class': 'count'}) if not count: logger.error('Cannot get count of your favorites, maybe login failed.') count = int(count.text.strip('(').strip(')')) pages = count / 25 pages += 1 if count % (25 * pages) else 0 logger.info('Your have %d favorites in %d pages.' % (count, pages)) if os.getenv('DEBUG'): pages = 1 ret = [] doujinshi_id = re.compile('data-id="([\d]+)"') def _callback(request, result): ret.append(result) thread_pool = threadpool.ThreadPool(5) for page in range(1, pages+1): try: logger.info('Getting doujinshi id of page %d' % page) resp = s.get(constant.FAV_URL + '?page=%d' % page).content ids = doujinshi_id.findall(resp) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) [thread_pool.putRequest(req) for req in requests_] thread_pool.wait() except Exception as e: logger.error('Error: %s, continue', str(e)) return ret
def page_range_parser(page_range, max_page_num): pages = set() ranges = str.split(page_range, ',') for range_str in ranges: idx = range_str.find('-') if idx == -1: try: page = int(range_str) if page <= max_page_num: pages.add(page) except ValueError: logger.error('page range({0}) is not valid'.format(page_range)) else: try: left = int(range_str[:idx]) right = int(range_str[idx + 1:]) if right > max_page_num: right = max_page_num for page in range(left, right + 1): pages.add(page) except ValueError: logger.error('page range({0}) is not valid'.format(page_range)) return list(pages)
def cmd_parser(): parser = OptionParser() parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi or not') parser.add_option('--id', type='int', dest='id', action='store', help='doujinshi id of nhentai') parser.add_option('--ids', type='str', dest='ids', action='store', help='doujinshi id set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='keyword searched') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--path', type='string', dest='saved_path', action='store', default='', help='path which save the doujinshi') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') args, _ = parser.parse_args() if args.ids: _ = map(lambda id: id.strip(), args.ids.split(',')) args.ids = set(map(int, filter(lambda id: id.isdigit(), _))) if args.is_download and not args.id and not args.ids and not args.keyword: logger.critical('Doujinshi id/ids is required for downloading') parser.print_help() exit(0) if args.id: args.ids = (args.id, ) if not args.ids else args.ids if not args.keyword and not args.ids: parser.print_help() exit(0) if args.threads <= 0: args.threads = 1 elif args.threads > 10: logger.critical('Maximum number of used threads is 10') exit(0) if args.proxy: import urlparse proxy_url = urlparse.urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {proxy_url.scheme: args.proxy} return args
def main(): banner() if sys.version_info < (3, 0, 0): logger.error('nhentai now only support Python 3.x') exit(1) options = cmd_parser() logger.info('Using mirror: {0}'.format(BASE_URL)) # CONFIG['proxy'] will be changed after cmd_parser() if constant.CONFIG['proxy']['http']: logger.info('Using proxy: {0}'.format( constant.CONFIG['proxy']['http'])) if not constant.CONFIG['template']: constant.CONFIG['template'] = 'default' logger.info('Using viewer template "{}"'.format( constant.CONFIG['template'])) # check your cookie check_cookie() doujinshis = [] doujinshi_ids = [] doujinshi_list = [] page_list = paging(options.page) if options.favorites: if not options.is_download: logger.warning('You do not specify --download option') doujinshis = favorites_parser(page=page_list) elif options.keyword: if constant.CONFIG['language']: logger.info('Using default language: {0}'.format( constant.CONFIG['language'])) options.keyword += ' language:{}'.format( constant.CONFIG['language']) doujinshis = search_parser(options.keyword, sorting=options.sorting, page=page_list, is_page_all=options.page_all) elif not doujinshi_ids: doujinshi_ids = options.id print_doujinshi(doujinshis) if options.is_download and doujinshis: doujinshi_ids = [i['id'] for i in doujinshis] if options.is_save_download_history: with DB() as db: data = map(int, db.get_all()) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data)) if doujinshi_ids: for i, id_ in enumerate(doujinshi_ids): if options.delay: time.sleep(options.delay) doujinshi_info = doujinshi_parser(id_) if doujinshi_info: doujinshi_list.append( Doujinshi(name_format=options.name_format, **doujinshi_info)) if (i + 1) % 10 == 0: logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids))) if not options.is_show: downloader = Downloader(path=options.output_dir, size=options.threads, timeout=options.timeout, delay=options.delay) for doujinshi in doujinshi_list: if not options.dryrun: doujinshi.downloader = downloader doujinshi.download() doujinshi.downloader = downloader doujinshi.download() if options.generate_metadata: table = doujinshi.table generate_metadata_file(options.output_dir, table, doujinshi) if options.is_save_download_history: with DB() as db: db.add_one(doujinshi.id) if not options.is_nohtml and not options.is_cbz and not options.is_pdf: generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) elif options.is_cbz: generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) elif options.is_pdf: generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir) if options.main_viewer: generate_main_html(options.output_dir) if not platform.system() == 'Windows': logger.log(15, '🍻 All done.') else: logger.log(15, 'All done.') else: [doujinshi.show() for doujinshi in doujinshi_list]
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options parser.add_option('--download', '-D', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') # doujinshi options parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist') parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character') parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody') parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group') parser.add_option('--language', type='string', dest='language', action='store', help='download doujinshi by language') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') # page options parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search results') parser.add_option( '--max-page', type='int', dest='max_page', action='store', default=1, help='The max page when recursive download tagged doujinshi') parser.add_option('--page-range', type='string', dest='page_range', action='store', help='page range of favorites. e.g. 1,2-5,14') parser.add_option('--sorting', dest='sorting', action='store', default='date', help='sorting of doujinshi (date / popular)', choices=['date', 'popular']) # download options parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, help='slow down between downloading every doujinshi') parser.add_option( '--proxy', '-p', type='string', dest='proxy', action='store', default='', help='store a proxy, for example: -p \'http://127.0.0.1:1080\'') parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') # generate options parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--no-html', dest='is_nohtml', action='store_true', help='don\'t generate HTML after downloading') parser.add_option( '--gen-main', dest='main_viewer', action='store_true', help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') parser.add_option( '--comic-info', dest='write_comic_info', action='store_true', help='when generating Comic Book CBZ File, also write ComicInfo.xml') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', help='set cookie of nhentai to bypass Google recaptcha') parser.add_option( '--save-download-history', dest='is_save_download_history', action='store_true', default=False, help= 'save downloaded doujinshis, whose will be skipped if you re-download them' ) parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', help='clean download history') try: sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv] print() except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.main_viewer and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ not args.parody and not args.group and not args.language and not args.favorites: generate_main_html() exit(0) if args.clean_download_history: with DB() as db: db.clean_all() logger.info('Download history cleaned.') exit(0) if os.path.exists(constant.NHENTAI_COOKIE): with open(constant.NHENTAI_COOKIE, 'r') as f: constant.COOKIE = f.read() if args.cookie: try: if not os.path.exists(constant.NHENTAI_HOME): os.mkdir(constant.NHENTAI_HOME) with open(constant.NHENTAI_COOKIE, 'w') as f: f.write(args.cookie) except Exception as e: logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e))) exit(1) logger.info('Cookie saved.') exit(0) if os.path.exists(constant.NHENTAI_PROXY): with open(constant.NHENTAI_PROXY, 'r') as f: link = f.read() constant.PROXY = {'http': link, 'https': link} if args.proxy: try: if not os.path.exists(constant.NHENTAI_HOME): os.mkdir(constant.NHENTAI_HOME) proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error( 'Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: with open(constant.NHENTAI_PROXY, 'w') as f: f.write(args.proxy) except Exception as e: logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e))) exit(1) logger.info('Proxy \'{0}\' saved.'.format(args.proxy)) exit(0) if args.favorites: if not constant.COOKIE: logger.warning( 'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.' ) exit(1) if args.id: _ = [i.strip() for i in args.id.split(',')] args.id = set(int(i) for i in _ if i.isdigit()) if args.file: with open(args.file, 'r') as f: _ = [i.strip() for i in f.readlines()] args.id = set(int(i) for i in _ if i.isdigit()) if (args.is_download or args.is_show) and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ not args.parody and not args.group and not args.language and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.tag and not args.artist and \ not args.character and not args.parody and not args.group and not args.language and not args.favorites: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) return args
def signal_handler(signal, frame): logger.error('Ctrl-C signal received. Quit.') exit(1)
def cmd_parser(): load_config() parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options parser.add_option('--download', '-D', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') # doujinshi options parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') # page options parser.add_option('--page-all', dest='page_all', action='store_true', default=False, help='all search results') parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='', help='page number of search results. e.g. 1,2-5,14') parser.add_option( '--sorting', dest='sorting', action='store', default='recent', help='sorting of doujinshi (recent / popular / popular-[today|week])', choices=['recent', 'popular', 'popular-today', 'popular-week']) # download options parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, help='slow down between downloading every doujinshi') parser.add_option( '--proxy', type='string', dest='proxy', action='store', default='', help='store a proxy, for example: -p \'http://127.0.0.1:1080\'') parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') # generate options parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--no-html', dest='is_nohtml', action='store_true', help='don\'t generate HTML after downloading') parser.add_option( '--gen-main', dest='main_viewer', action='store_true', help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', help='generate PDF file') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ or PDF file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', help='set cookie of nhentai to bypass Google recaptcha') parser.add_option('--language', type='str', dest='language', action='store', help='set default language to parse doujinshis') parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False, help='set DEFAULT as language to parse doujinshis') parser.add_option( '--save-download-history', dest='is_save_download_history', action='store_true', default=False, help= 'save downloaded doujinshis, whose will be skipped if you re-download them' ) parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', help='clean download history') try: sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv] print() except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.main_viewer and not args.id and not args.keyword and not args.favorites: generate_main_html() exit(0) if args.clean_download_history: with DB() as db: db.clean_all() logger.info('Download history cleaned.') exit(0) # --- set config --- if args.cookie is not None: constant.CONFIG['cookie'] = args.cookie logger.info('Cookie saved.') write_config() exit(0) if args.language is not None: constant.CONFIG['language'] = args.language logger.info('Default language now set to \'{0}\''.format( args.language)) write_config() exit(0) # TODO: search without language if args.proxy: proxy_url = urlparse(args.proxy) if not args.proxy == '' and proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) exit(0) else: constant.CONFIG['proxy'] = { 'http': args.proxy, 'https': args.proxy, } logger.info('Proxy now set to \'{0}\'.'.format(args.proxy)) write_config() exit(0) # --- end set config --- if args.favorites: if not constant.CONFIG['cookie']: logger.warning( 'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.' ) exit(1) if args.id: _ = [i.strip() for i in args.id.split(',')] args.id = set(int(i) for i in _ if i.isdigit()) if args.file: with open(args.file, 'r') as f: _ = [i.strip() for i in f.readlines()] args.id = set(int(i) for i in _ if i.isdigit()) if (args.is_download or args.is_show ) and not args.id and not args.keyword and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.favorites: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) return args
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search results') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option( '--max-page', type='int', dest='max_page', action='store', default=1, help='The max page when recursive download tagged doujinshi') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='uses a proxy, for example: http://127.0.0.1:1080') parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--login', '-l', type='str', dest='login', action='store', help='username:password pair of nhentai account') parser.add_option('--nohtml', dest='is_nohtml', action='store_true', help='Don\'t generate HTML') parser.add_option('--cbz', dest='is_cbz', action='store_true', help='Generate Comic Book CBZ File') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='Remove downloaded doujinshi dir when generated CBZ file.') try: sys.argv = list( map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.login: try: _, _ = args.login.split(':', 1) except ValueError: logger.error('Invalid `username:password` pair.') exit(1) if not args.is_download: logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword and \ not args.login and not args.tag: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.login and not args.tag: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {'http': args.proxy, 'https': args.proxy} return args
def signal_handler(signal, frame): logger.error('Ctrl-C signal received. Stopping...') exit(1)
def doujinshi_parser(id_): if not isinstance(id_, (int, )) and (isinstance(id_, (str, )) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}/'.format(constant.DETAIL_URL, id_) try: response = request('get', url) if response.status_code in (200, ): response = response.content elif response.status_code in (404, ): logger.error("Doujinshi with id {0} cannot be found".format(id_)) return [] else: logger.debug('Slow down and retry ({}) ...'.format(id_)) time.sleep(1) return doujinshi_parser(str(id_)) except Exception as e: logger.warning('Error: {}, ignored'.format(str(e))) return None html = BeautifulSoup(response, 'html.parser') doujinshi_info = html.find('div', attrs={'id': 'info'}) title = doujinshi_info.find('h1').text pretty_name = doujinshi_info.find('h1').find('span', attrs={ 'class': 'pretty' }).text subtitle = doujinshi_info.find('h2') doujinshi['name'] = title doujinshi['pretty_name'] = pretty_name doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi_cover = html.find('div', attrs={'id': 'cover'}) img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$', doujinshi_cover.a.img.attrs['data-src']) ext = [] for i in html.find_all('div', attrs={'class': 'thumb-container'}): _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1) ext.append(ext_name) if not img_id: logger.critical('Tried yo get image id failed') exit(1) doujinshi['img_id'] = img_id.group(1) doujinshi['ext'] = ext for _ in doujinshi_info.find_all('div', class_='tag-container field-name'): if re.search('Pages:', _.text): pages = _.find('span', class_='name').string doujinshi['pages'] = int(pages) # gain information of the doujinshi information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'}) needed_fields = [ 'Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups', 'Categories' ] for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: data = [ sub_field.find('span', attrs={ 'class': 'name' }).contents[0].strip() for sub_field in field.find_all('a', attrs={'class': 'tag'}) ] doujinshi[field_name.lower()] = ', '.join(data) time_field = doujinshi_info.find('time') if time_field.has_attr('datetime'): doujinshi['date'] = time_field['datetime'] return doujinshi
def cmd_parser(): parser = OptionParser('\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--login', '-l', type='str', dest='login', action='store', help='username:password pair of nhentai account') try: sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.login: try: _, _ = args.login.split(':', 1) except ValueError: logger.error('Invalid `username:password` pair.') exit(1) if not args.is_download: logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') if args.tags: logger.warning('`--tags` is under construction') exit(1) if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.login: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) else: constant.PROXY = {'http': args.proxy, 'https': args.proxy} return args
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') try: sys.argv = list( map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.tags: logger.warning('`--tags` is under construction') exit(0) if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id: id.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(0) if not args.keyword and not args.id: parser.print_help() exit(0) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(0) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {proxy_url.scheme: args.proxy} return args