def search_parser(keyword, page): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) result = [] try: response = request('get', url=constant.SEARCH_URL, params={ 'q': keyword, 'page': page }).content except requests.ConnectionError as e: logger.critical(e) logger.warn( 'If you are in China, please configure the proxy to fu*k GFW.') raise SystemExit html = BeautifulSoup(response, 'html.parser') doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'}) for doujinshi in doujinshi_search_result: doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'}) title = doujinshi_container.text.strip() title = title if len(title) < 85 else title[:82] + '...' id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1) result.append({'id': id_, 'title': title}) if not result: logger.warn('Not found anything of keyword {}'.format(keyword)) return result
def _download(self, url, folder='', filename='', retried=0): logger.info('Start downloading: {0} ...'.format(url)) filename = filename if filename else os.path.basename(urlparse(url).path) base_filename, extension = os.path.splitext(filename) try: with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: response = request('get', url, stream=True, timeout=self.timeout) if response.status_code != 200: raise NhentaiImageNotExistException length = response.headers.get('content-length') if length is None: f.write(response.content) else: for chunk in response.iter_content(2048): f.write(chunk) except (requests.HTTPError, requests.Timeout) as e: if retried < 3: logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1) else: return 0, None except NhentaiImageNotExistException as e: os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) return -1, url except Exception as e: logger.critical(str(e)) return 0, None return 1, url
def download(self): logger.info('Starting to download doujinshi: %s' % self.name) if self.downloader: download_queue = [] if len(self.ext) != self.pages: logger.warning('Page count and ext count do not equal') for i in range(1, min(self.pages, len(self.ext)) + 1): download_queue.append( '%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i - 1])) self.downloader.download(download_queue, self.filename) with open(os.path.join(self.path, self.filename, 'ComicInfo.xml'), "w") as f: f.write(self.comicinfoXML) ''' for i in range(len(self.ext)): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i+1, EXT_MAP[self.ext[i]])) ''' else: logger.critical('Downloader has not been loaded')
def download(self, queue, folder='', regenerate_cbz=False): if not isinstance(folder, text): folder = str(folder) if self.path: folder = os.path.join(self.path, folder) if os.path.exists(folder + '.cbz'): if not regenerate_cbz: logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder)) return if not os.path.exists(folder): logger.warning('Path \'{0}\' does not exist, creating.'.format(folder)) try: os.makedirs(folder) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) else: logger.warning('Path \'{0}\' already exist.'.format(folder)) queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue] pool = multiprocessing.Pool(self.size, init_worker) [pool.apply_async(download_wrapper, args=item) for item in queue] pool.close() pool.join()
def doujinshi_parser(id_): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}'.format(constant.DETAIL_URL, id_) try: response = request('get', url).json() except Exception as e: logger.critical(str(e)) exit(1) doujinshi['name'] = str(response['title']['english'].encode('utf-8'))[2:] doujinshi['subtitle'] = response['title']['japanese'] doujinshi['img_id'] = response['media_id'] doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages'])) doujinshi['pages'] = len(response['images']['pages']) # gain information of the doujinshi needed_fields = ['character', 'artist', 'language'] for tag in response['tags']: tag_type = tag['type'] if tag_type in needed_fields: if tag_type not in doujinshi: doujinshi[tag_type] = tag['name'] else: doujinshi[tag_type] += tag['name'] return doujinshi
def download(self, queue, folder=''): if not isinstance(folder, text): folder = str(folder) if self.path: folder = os.path.join(self.path, folder) if not os.path.exists(folder): logger.warning( 'Path \'{0}\' does not exist, creating.'.format(folder)) try: os.makedirs(folder) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) else: logger.warning('Path \'{0}\' already exist.'.format(folder)) queue = [(self, url, folder) for url in queue] pool = multiprocessing.Pool(self.size, init_worker) [pool.apply_async(download_wrapper, args=item) for item in queue] pool.close() pool.join()
def _download(self, url, folder='', filename='', retried=False): logger.info('Start downloading: {0} ...'.format(url)) filename = filename if filename else os.path.basename( urlparse(url).path) base_filename, extension = os.path.splitext(filename) try: with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: response = request('get', url, stream=True, timeout=self.timeout) length = response.headers.get('content-length') if length is None: f.write(response.content) else: for chunk in response.iter_content(2048): f.write(chunk) except requests.HTTPError as e: if not retried: logger.error('Error: {0}, retrying'.format(str(e))) return self._download(url=url, folder=folder, filename=filename, retried=True) else: return None except Exception as e: logger.critical(str(e)) return None return url
def _download(self, url, folder='', filename='', retried=0): logger.info('Starting to download {0} ...'.format(url)) filename = filename if filename else os.path.basename( urlparse(url).path) base_filename, extension = os.path.splitext(filename) try: if os.path.exists( os.path.join(folder, base_filename.zfill(3) + extension)): logger.warning('File: {0} exists, ignoring'.format( os.path.join(folder, base_filename.zfill(3) + extension))) return 1, url with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: i = 0 while i < 10: try: response = request('get', url, stream=True, timeout=self.timeout) except Exception as e: i += 1 if not i < 10: logger.critical(str(e)) return 0, None continue break if response.status_code != 200: raise NhentaiImageNotExistException length = response.headers.get('content-length') if length is None: f.write(response.content) else: for chunk in response.iter_content(2048): f.write(chunk) except (requests.HTTPError, requests.Timeout) as e: if retried < 3: logger.warning('Warning: {0}, retrying({1}) ...'.format( str(e), retried)) return 0, self._download(url=url, folder=folder, filename=filename, retried=retried + 1) else: return 0, None except NhentaiImageNotExistException as e: os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) return -1, url except Exception as e: logger.critical(str(e)) return 0, None return 1, url
def __api_suspended_search_parser(keyword, page): logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) result = [] i = 0 while i < 5: try: response = request('get', url=constant.SEARCH_URL, params={ 'query': keyword, 'page': page }).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) logger.warn( 'If you are in China, please configure the proxy to fu*k GFW.' ) exit(1) continue break if 'result' not in response: raise Exception('No result in response') for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) if not result: logger.warn('No results for keywords {}'.format(keyword)) return result
def __api_suspended_search_parser(keyword, page): logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) result = [] i = 0 while i < 5: try: response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) logger.warn('If you are in China, please configure the proxy to fu*k GFW.') exit(1) continue break if 'result' not in response: raise Exception('No result in response') for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) if not result: logger.warn('No results for keywords {}'.format(keyword)) return result
def download(self, queue, folder=''): if not isinstance(folder, text): folder = str(folder) if self.path: folder = os.path.join(self.path, folder) if not os.path.exists(folder): logger.warn( 'Path \'{0}\' does not exist, creating.'.format(folder)) try: os.makedirs(folder) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) exit(1) else: logger.warn('Path \'{0}\' already exist.'.format(folder)) queue = [([url], {'folder': folder}) for url in queue] self.thread_pool = threadpool.ThreadPool(self.thread_count) requests_ = threadpool.makeRequests(self._download, queue, self._download_callback) [self.thread_pool.putRequest(req) for req in requests_] self.thread_pool.wait()
def doujinshi_parser(id_): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}/'.format(constant.DETAIL_URL, id_) try: response = request('get', url).content except Exception as e: logger.critical(str(e)) raise SystemExit html = BeautifulSoup(response, 'html.parser') doujinshi_info = html.find('div', attrs={'id': 'info'}) title = doujinshi_info.find('h1').text subtitle = doujinshi_info.find('h2') doujinshi['name'] = title doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi_cover = html.find('div', attrs={'id': 'cover'}) img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src']) ext = [] for i in html.find_all('div', attrs={'class': 'thumb-container'}): _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1) ext.append(ext_name) if not img_id: logger.critical('Tried yo get image id failed') exit(1) doujinshi['img_id'] = img_id.group(1) doujinshi['ext'] = ext pages = 0 for _ in doujinshi_info.find_all('div', class_=''): pages = re.search('([\d]+) pages', _.text) if pages: pages = pages.group(1) break doujinshi['pages'] = int(pages) # gain information of the doujinshi information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'}) needed_fields = ['Characters', 'Artists', 'Language', 'Tags'] for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: data = [sub_field.contents[0].strip() for sub_field in field.find_all('a', attrs={'class': 'tag'})] doujinshi[field_name.lower()] = ', '.join(data) return doujinshi
def download(self): logger.info('Start download doujinshi: %s' % self.name) if self.downloader: download_queue = [] for i in range(1, self.pages + 1): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) self.downloader.download(download_queue, self.id) else: logger.critical('Downloader has not be loaded')
def _download_callback(self, request, result): result, data = result if result == 0: logger.critical('fatal errors occurred, quit.') exit(1) elif result == -1: logger.warning('url {} return status code 404'.format(data)) else: logger.log(15, '{0} download successfully'.format(data))
def download(self): logger.info('Start download doujinshi: %s' % self.name) if self.downloader: download_queue = [] for i in range(1, self.pages + 1): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200]))) else: logger.critical('Downloader has not be loaded')
def search_parser(keyword, sorting, page, is_page_all=False): # keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')]) result = [] response = None if not page: page = [1] if is_page_all: url = request('get', url=constant.SEARCH_URL, params={ 'query': keyword }).url init_response = request('get', url.replace('%2B', '+')).json() page = range(1, init_response['num_pages'] + 1) total = '/{0}'.format(page[-1]) if is_page_all else '' not_exists_persist = False for p in page: i = 0 logger.info( 'Searching doujinshis using keywords "{0}" on page {1}{2}'.format( keyword, p, total)) while i < 3: try: url = request('get', url=constant.SEARCH_URL, params={ 'query': keyword, 'page': p, 'sort': sorting }).url response = request('get', url.replace('%2B', '+')).json() except Exception as e: logger.critical(str(e)) response = None break if response is None or 'result' not in response: logger.warning('No result in response in page {}'.format(p)) if not_exists_persist is True: break continue for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) not_exists_persist = False if not result: logger.warning('No results for keywords {}'.format(keyword)) return result
def download(self): logger.info('Starting to download doujinshi: %s' % self.name) if self.downloader: download_queue = [] if len(self.ext) != self.pages: logger.warning('Page count and ext count do not equal') for i in range(1, min(self.pages, len(self.ext)) + 1): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i - 1])) self.downloader.download(download_queue, self.filename) else: logger.critical('Downloader has not been loaded')
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): image_html = '' if doujinshi_obj is not None: doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) else: doujinshi_dir = '.' if not os.path.exists(doujinshi_dir): logger.warning( 'Path \'{0}\' does not exist, creating.'.format(doujinshi_dir)) try: os.makedirs(doujinshi_dir) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) file_list = os.listdir(doujinshi_dir) file_list.sort() for image in file_list: if not os.path.splitext(image)[1] in ('.jpg', '.png'): continue image_html += '<img src="{0}" class="image-item"/>\n'\ .format(image) html = readfile('viewer/{}/index.html'.format(template)) css = readfile('viewer/{}/styles.css'.format(template)) js = readfile('viewer/{}/scripts.js'.format(template)) if doujinshi_obj is not None: serialize_json(doujinshi_obj, doujinshi_dir) name = doujinshi_obj.name if sys.version_info < (3, 0): name = doujinshi_obj.name.encode('utf-8') else: name = {'title': 'nHentai HTML Viewer'} data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) try: if sys.version_info < (3, 0): with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: f.write(data) else: with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f: f.write(data.encode('utf-8')) logger.log( 15, 'HTML Viewer has been written to \'{0}\''.format( os.path.join(doujinshi_dir, 'index.html'))) except Exception as e: logger.warning('Writing HTML Viewer failed ({})'.format(str(e)))
def tag_parser(tag_id, max_page=1): logger.info('Searching for doujinshi with tag id {0}'.format(tag_id)) result = [] i = 0 while i < 5: try: response = request('get', url=constant.TAG_API_URL, params={ 'sort': 'popular', 'tag_id': tag_id }).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) exit(1) continue break page = max_page if max_page <= response['num_pages'] else int( response['num_pages']) for i in range(1, page + 1): logger.info('Getting page {} ...'.format(i)) if page != 1: i = 0 while i < 5: try: response = request('get', url=constant.TAG_API_URL, params={ 'sort': 'popular', 'tag_id': tag_id }).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) exit(1) continue break for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) if not result: logger.warn('No results for tag id {}'.format(tag_id)) return result
def __api_suspended_doujinshi_parser(id_): if not isinstance(id_, (int, )) and (isinstance(id_, (str, )) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}'.format(constant.DETAIL_URL, id_) i = 0 while 5 > i: try: response = request('get', url).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) exit(1) continue break doujinshi['name'] = response['title']['english'] doujinshi['subtitle'] = response['title']['japanese'] doujinshi['img_id'] = response['media_id'] doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']]) doujinshi['pages'] = len(response['images']['pages']) # gain information of the doujinshi needed_fields = [ 'character', 'artist', 'language', 'tag', 'parody', 'group', 'category' ] for tag in response['tags']: tag_type = tag['type'] if tag_type in needed_fields: if tag_type == 'tag': if tag_type not in doujinshi: doujinshi[tag_type] = {} tag['name'] = tag['name'].replace(' ', '-') tag['name'] = tag['name'].lower() doujinshi[tag_type][tag['name']] = tag['id'] elif tag_type not in doujinshi: doujinshi[tag_type] = tag['name'] else: doujinshi[tag_type] += ', ' + tag['name'] return doujinshi
def __api_suspended_doujinshi_parser(id_): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}'.format(constant.DETAIL_URL, id_) i = 0 while 5 > i: try: response = request('get', url).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) exit(1) continue break doujinshi['name'] = response['title']['english'] doujinshi['subtitle'] = response['title']['japanese'] doujinshi['img_id'] = response['media_id'] doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages'])) doujinshi['pages'] = len(response['images']['pages']) # gain information of the doujinshi needed_fields = ['character', 'artist', 'language', 'tag'] for tag in response['tags']: tag_type = tag['type'] if tag_type in needed_fields: if tag_type == 'tag': if tag_type not in doujinshi: doujinshi[tag_type] = {} tag['name'] = tag['name'].replace(' ', '-') tag['name'] = tag['name'].lower() doujinshi[tag_type][tag['name']] = tag['id'] elif tag_type not in doujinshi: doujinshi[tag_type] = tag['name'] else: doujinshi[tag_type] += ', ' + tag['name'] return doujinshi
def search_parser(keyword, page): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) try: response = request('get', url=constant.SEARCH_URL, params={ 'q': keyword, 'page': page }).content except requests.ConnectionError as e: logger.critical(e) logger.warn( 'If you are in China, please configure the proxy to fu*k GFW.') raise SystemExit result = _get_title_and_id(response) if not result: logger.warn('Not found anything of keyword {}'.format(keyword)) return result
def download(self): logger.info('Starting to download doujinshi: %s' % self.name) if self.downloader: download_queue = [] if len(self.ext) != self.pages: logger.warning('Page count and ext count do not equal') for i in range(1, min(self.pages, len(self.ext)) + 1): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i-1])) self.downloader.download(download_queue, self.filename) ''' for i in range(len(self.ext)): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i+1, EXT_MAP[self.ext[i]])) ''' else: logger.critical('Downloader has not been loaded')
def tag_guessing(tag_name): tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name)) i = 0 while i < 5: try: response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) exit(1) continue break html = BeautifulSoup(response, 'html.parser') first_item = html.find('div', attrs={'class': 'gallery'}) if not first_item: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return doujinshi_id = re.findall('(\d+)', first_item.a.attrs['href']) if not doujinshi_id: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return ret = doujinshi_parser(doujinshi_id[0]) if 'tag' in ret and tag_name in ret['tag']: tag_id = ret['tag'][tag_name] logger.info('Tag id of tag \'{0}\' is {1}'.format(tag_name, tag_id)) else: logger.error( 'Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return return tag_id
def search_parser(keyword, page): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) result = [] try: response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() if 'result' not in response: raise Exception('No result in response') except requests.ConnectionError as e: logger.critical(e) logger.warn('If you are in China, please configure the proxy to fu*k GFW.') exit(1) for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) if not result: logger.warn('Not found anything of keyword {}'.format(keyword)) return result
def search_parser(keyword, page): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) result = [] try: response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content except requests.ConnectionError as e: logger.critical(e) logger.warn('If you are in China, please configure the proxy to fu*k GFW.') exit(1) html = BeautifulSoup(response, 'html.parser') doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'}) for doujinshi in doujinshi_search_result: doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'}) title = doujinshi_container.text.strip() title = (title[:85] + '..') if len(title) > 85 else title id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1) result.append({'id': id_, 'title': title}) if not result: logger.warn('Not found anything of keyword {}'.format(keyword)) return result
def search_parser(keyword, sorting, page): logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) keyword = '+'.join( [i.strip().replace(' ', '-').lower() for i in keyword.split(',')]) result = [] i = 0 while i < 5: try: url = request('get', url=constant.SEARCH_URL, params={ 'query': keyword, 'page': page, 'sort': sorting }).url response = request('get', url.replace('%2B', '+')).json() except Exception as e: i += 1 if not i < 5: logger.critical(str(e)) logger.warn( 'If you are in China, please configure the proxy to fu*k GFW.' ) exit(1) continue break if 'result' not in response: raise Exception('No result in response') for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title result.append({'id': row['id'], 'title': title}) if not result: logger.warn('No results for keywords {}'.format(keyword)) return result
def download(self, queue, folder=''): if not isinstance(folder, (text)): folder = str(folder) if self.path: folder = os.path.join(self.path, folder) if not os.path.exists(folder): logger.warn('Path \'{0}\' not exist.'.format(folder)) try: os.makedirs(folder) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) exit(1) else: logger.warn('Path \'{0}\' already exist.'.format(folder)) queue = [([url], {'folder': folder}) for url in queue] self.thread_pool = threadpool.ThreadPool(self.thread_count) requests_ = threadpool.makeRequests(self._download, queue, self._download_callback) [self.thread_pool.putRequest(req) for req in requests_] self.thread_pool.wait()
def doujinshi_parser(id_): if not isinstance(id_, (int, )) and (isinstance(id_, (str, )) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) id_ = int(id_) logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) doujinshi = dict() doujinshi['id'] = id_ url = '{0}/{1}/'.format(constant.DETAIL_URL, id_) try: response = request('get', url) if response.status_code in (200, ): response = response.content elif response.status_code in (404, ): logger.error("Doujinshi with id {0} cannot be found".format(id_)) return [] else: logger.debug('Slow down and retry ({}) ...'.format(id_)) time.sleep(1) return doujinshi_parser(str(id_)) except Exception as e: logger.warning('Error: {}, ignored'.format(str(e))) return None html = BeautifulSoup(response, 'html.parser') doujinshi_info = html.find('div', attrs={'id': 'info'}) title = doujinshi_info.find('h1').text pretty_name = doujinshi_info.find('h1').find('span', attrs={ 'class': 'pretty' }).text subtitle = doujinshi_info.find('h2') doujinshi['name'] = title doujinshi['pretty_name'] = pretty_name doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi_cover = html.find('div', attrs={'id': 'cover'}) img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$', doujinshi_cover.a.img.attrs['data-src']) ext = [] for i in html.find_all('div', attrs={'class': 'thumb-container'}): _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1) ext.append(ext_name) if not img_id: logger.critical('Tried yo get image id failed') exit(1) doujinshi['img_id'] = img_id.group(1) doujinshi['ext'] = ext for _ in doujinshi_info.find_all('div', class_='tag-container field-name'): if re.search('Pages:', _.text): pages = _.find('span', class_='name').string doujinshi['pages'] = int(pages) # gain information of the doujinshi information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'}) needed_fields = [ 'Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups', 'Categories' ] for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: data = [ sub_field.find('span', attrs={ 'class': 'name' }).contents[0].strip() for sub_field in field.find_all('a', attrs={'class': 'tag'}) ] doujinshi[field_name.lower()] = ', '.join(data) time_field = doujinshi_info.find('time') if time_field.has_attr('datetime'): doujinshi['date'] = time_field['datetime'] return doujinshi
def cmd_parser(): parser = OptionParser() parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi or not') parser.add_option('--id', type='int', dest='id', action='store', help='doujinshi id of nhentai') parser.add_option('--ids', type='str', dest='ids', action='store', help='doujinshi id set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='keyword searched') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--path', type='string', dest='saved_path', action='store', default='', help='path which save the doujinshi') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') args, _ = parser.parse_args() if args.ids: _ = map(lambda id: id.strip(), args.ids.split(',')) args.ids = set(map(int, filter(lambda id: id.isdigit(), _))) if args.is_download and not args.id and not args.ids and not args.keyword: logger.critical('Doujinshi id/ids is required for downloading') parser.print_help() exit(0) if args.id: args.ids = (args.id, ) if not args.ids else args.ids if not args.keyword and not args.ids: parser.print_help() exit(0) if args.threads <= 0: args.threads = 1 elif args.threads > 10: logger.critical('Maximum number of used threads is 10') exit(0) if args.proxy: import urlparse proxy_url = urlparse.urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {proxy_url.scheme: args.proxy} return args
def _download_callback(self, request, result): if not result: logger.critical('Too many errors occurred, quit.') exit(1) logger.log(15, '{0} download successfully'.format(result))
def cmd_parser(): load_config() parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options parser.add_option('--download', '-D', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') # doujinshi options parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') # page options parser.add_option('--page-all', dest='page_all', action='store_true', default=False, help='all search results') parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='', help='page number of search results. e.g. 1,2-5,14') parser.add_option( '--sorting', dest='sorting', action='store', default='recent', help='sorting of doujinshi (recent / popular / popular-[today|week])', choices=['recent', 'popular', 'popular-today', 'popular-week']) # download options parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, help='slow down between downloading every doujinshi') parser.add_option( '--proxy', type='string', dest='proxy', action='store', default='', help='store a proxy, for example: -p \'http://127.0.0.1:1080\'') parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') # generate options parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--no-html', dest='is_nohtml', action='store_true', help='don\'t generate HTML after downloading') parser.add_option( '--gen-main', dest='main_viewer', action='store_true', help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', help='generate PDF file') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ or PDF file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', help='set cookie of nhentai to bypass Google recaptcha') parser.add_option('--language', type='str', dest='language', action='store', help='set default language to parse doujinshis') parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False, help='set DEFAULT as language to parse doujinshis') parser.add_option( '--save-download-history', dest='is_save_download_history', action='store_true', default=False, help= 'save downloaded doujinshis, whose will be skipped if you re-download them' ) parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', help='clean download history') try: sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv] print() except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.main_viewer and not args.id and not args.keyword and not args.favorites: generate_main_html() exit(0) if args.clean_download_history: with DB() as db: db.clean_all() logger.info('Download history cleaned.') exit(0) # --- set config --- if args.cookie is not None: constant.CONFIG['cookie'] = args.cookie logger.info('Cookie saved.') write_config() exit(0) if args.language is not None: constant.CONFIG['language'] = args.language logger.info('Default language now set to \'{0}\''.format( args.language)) write_config() exit(0) # TODO: search without language if args.proxy: proxy_url = urlparse(args.proxy) if not args.proxy == '' and proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) exit(0) else: constant.CONFIG['proxy'] = { 'http': args.proxy, 'https': args.proxy, } logger.info('Proxy now set to \'{0}\'.'.format(args.proxy)) write_config() exit(0) # --- end set config --- if args.favorites: if not constant.CONFIG['cookie']: logger.warning( 'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.' ) exit(1) if args.id: _ = [i.strip() for i in args.id.split(',')] args.id = set(int(i) for i in _ if i.isdigit()) if args.file: with open(args.file, 'r') as f: _ = [i.strip() for i in f.readlines()] args.id = set(int(i) for i in _ if i.isdigit()) if (args.is_download or args.is_show ) and not args.id and not args.keyword and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.favorites: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) return args
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search results') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option( '--max-page', type='int', dest='max_page', action='store', default=1, help='The max page when recursive download tagged doujinshi') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='uses a proxy, for example: http://127.0.0.1:1080') parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--login', '-l', type='str', dest='login', action='store', help='username:password pair of nhentai account') parser.add_option('--nohtml', dest='is_nohtml', action='store_true', help='Don\'t generate HTML') parser.add_option('--cbz', dest='is_cbz', action='store_true', help='Generate Comic Book CBZ File') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='Remove downloaded doujinshi dir when generated CBZ file.') try: sys.argv = list( map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.login: try: _, _ = args.login.split(':', 1) except ValueError: logger.error('Invalid `username:password` pair.') exit(1) if not args.is_download: logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword and \ not args.login and not args.tag: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.login and not args.tag: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {'http': args.proxy, 'https': args.proxy} return args
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options parser.add_option('--download', '-D', dest='is_download', action='store_true', help='download doujinshi (for search results)') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') # doujinshi options parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist') parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character') parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody') parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group') parser.add_option('--language', type='string', dest='language', action='store', help='download doujinshi by language') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') # page options parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search results') parser.add_option( '--max-page', type='int', dest='max_page', action='store', default=1, help='The max page when recursive download tagged doujinshi') parser.add_option('--page-range', type='string', dest='page_range', action='store', help='page range of favorites. e.g. 1,2-5,14') parser.add_option('--sorting', dest='sorting', action='store', default='date', help='sorting of doujinshi (date / popular)', choices=['date', 'popular']) # download options parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count for downloading doujinshi') parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30, help='timeout for downloading doujinshi') parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, help='slow down between downloading every doujinshi') parser.add_option( '--proxy', '-p', type='string', dest='proxy', action='store', default='', help='store a proxy, for example: -p \'http://127.0.0.1:1080\'') parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') # generate options parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--no-html', dest='is_nohtml', action='store_true', help='don\'t generate HTML after downloading') parser.add_option( '--gen-main', dest='main_viewer', action='store_true', help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') parser.add_option( '--comic-info', dest='write_comic_info', action='store_true', help='when generating Comic Book CBZ File, also write ComicInfo.xml') parser.add_option( '--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', help='set cookie of nhentai to bypass Google recaptcha') parser.add_option( '--save-download-history', dest='is_save_download_history', action='store_true', default=False, help= 'save downloaded doujinshis, whose will be skipped if you re-download them' ) parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', help='clean download history') try: sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv] print() except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.main_viewer and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ not args.parody and not args.group and not args.language and not args.favorites: generate_main_html() exit(0) if args.clean_download_history: with DB() as db: db.clean_all() logger.info('Download history cleaned.') exit(0) if os.path.exists(constant.NHENTAI_COOKIE): with open(constant.NHENTAI_COOKIE, 'r') as f: constant.COOKIE = f.read() if args.cookie: try: if not os.path.exists(constant.NHENTAI_HOME): os.mkdir(constant.NHENTAI_HOME) with open(constant.NHENTAI_COOKIE, 'w') as f: f.write(args.cookie) except Exception as e: logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e))) exit(1) logger.info('Cookie saved.') exit(0) if os.path.exists(constant.NHENTAI_PROXY): with open(constant.NHENTAI_PROXY, 'r') as f: link = f.read() constant.PROXY = {'http': link, 'https': link} if args.proxy: try: if not os.path.exists(constant.NHENTAI_HOME): os.mkdir(constant.NHENTAI_HOME) proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error( 'Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: with open(constant.NHENTAI_PROXY, 'w') as f: f.write(args.proxy) except Exception as e: logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e))) exit(1) logger.info('Proxy \'{0}\' saved.'.format(args.proxy)) exit(0) if args.favorites: if not constant.COOKIE: logger.warning( 'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.' ) exit(1) if args.id: _ = [i.strip() for i in args.id.split(',')] args.id = set(int(i) for i in _ if i.isdigit()) if args.file: with open(args.file, 'r') as f: _ = [i.strip() for i in f.readlines()] args.id = set(int(i) for i in _ if i.isdigit()) if (args.is_download or args.is_show) and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ not args.parody and not args.group and not args.language and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.tag and not args.artist and \ not args.character and not args.parody and not args.group and not args.language and not args.favorites: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) return args
def cmd_parser(): parser = OptionParser('\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--login', '-l', type='str', dest='login', action='store', help='username:password pair of nhentai account') try: sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.html_viewer: generate_html() exit(0) if args.login: try: _, _ = args.login.split(':', 1) except ValueError: logger.error('Invalid `username:password` pair.') exit(1) if not args.is_download: logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') if args.tags: logger.warning('`--tags` is under construction') exit(1) if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.login: parser.print_help() exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(1) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) else: constant.PROXY = {'http': args.proxy, 'https': args.proxy} return args
def cmd_parser(): parser = OptionParser( '\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') parser.add_option('--output', type='string', dest='output_dir', action='store', default='', help='output dir') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') try: sys.argv = list( map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) except (NameError, TypeError): pass except UnicodeDecodeError: exit(0) args, _ = parser.parse_args(sys.argv[1:]) if args.tags: logger.warning('`--tags` is under construction') exit(0) if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) args.id = set(map(int, filter(lambda id: id.isdigit(), _))) if (args.is_download or args.is_show) and not args.id and not args.keyword: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(0) if not args.keyword and not args.id: parser.print_help() exit(0) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') exit(0) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {proxy_url.scheme: args.proxy} return args