def handleDebugMediumPage(self, response, imageId): if self._config.enableDump: if self._config.dumpMediumPage: dump_filename = "Medium Page for Image Id {0}.html".format(imageId) PixivHelper.dump_html(dump_filename, response) PixivHelper.print_and_log('info', 'Dumping html to: {0}'.format(dump_filename)) if self._config.debugHttp: PixivHelper.safePrint(u"reply: {0}".format(response))
def handleDebugTagSearchPage(self, response, url): if self._config.enableDump: if self._config.dumpTagSearchPage: dump_filename = "TagSearch Page for {0}.html".format(url) PixivHelper.dump_html(dump_filename, response) PixivHelper.print_and_log( 'info', 'Dumping html to: {0}'.format(dump_filename)) if self._config.debugHttp: PixivHelper.safePrint(u"reply: {0}".format( PixivHelper.toUnicode(response)))
def process_new_illust_from_bookmark(caller, config, page_num=1, end_page_num=0, bookmark_count=-1): br: PixivBrowser = caller.__br__ parsed_page = None try: print("Processing New Illust from bookmark") i = page_num image_count = 1 flag = True while flag: print(f"Page #{i}") mode = "all" if config.r18mode: mode = "r18" pb = br.getFollowedNewIllusts(mode, current_page=i) for image_id in pb.imageList: print(f"Image #{image_count}") result = PixivImageHandler.process_image( caller, config, artist=None, image_id=int(image_id), bookmark_count=bookmark_count) image_count = image_count + 1 if result == PixivConstant.PIXIVUTIL_SKIP_OLDER: flag = False break PixivHelper.wait(result, config) i = i + 1 # page.close() # parsed_page.decompose() # del parsed_page if (end_page_num != 0 and i > end_page_num) or pb.isLastPage: print("Limit or last page reached.") flag = False print("Done.") except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log( 'error', 'Error at process_new_illust_from_bookmark(): {0}'.format( sys.exc_info())) if parsed_page is not None: filename = "Dump for New Illust from bookmark.html" PixivHelper.dump_html(filename, parsed_page) raise
def login(self, username, password): parsed = None try: PixivHelper.print_and_log('info', 'Logging in...') url = "https://accounts.pixiv.net/login" # get the post key res = self.open_with_retry(url) parsed = BeautifulSoup(res, features="html5lib") post_key = parsed.find('input', attrs={'name': 'post_key'}) # js_init_config = self._getInitConfig(parsed) res.close() data = {} data['pixiv_id'] = username data['password'] = password # data['captcha'] = '' # data['g_recaptcha_response'] = '' data['return_to'] = 'https://www.pixiv.net' data['lang'] = 'en' data['post_key'] = post_key['value'] data['source'] = "accounts" data['ref'] = '' request = mechanize.Request( "https://accounts.pixiv.net/api/login?lang=en", data, method='POST') response = self.open_with_retry(request) result = self.processLoginResult(response, username, password) response.close() return result except BaseException: traceback.print_exc() PixivHelper.print_and_log( 'error', 'Error at login(): {0}'.format(sys.exc_info())) PixivHelper.dump_html("login_error.html", str(parsed)) raise finally: if parsed is not None: parsed.decompose() del parsed
def getSearchTagPage(self, tags, current_page, wild_card=True, title_caption=False, start_date=None, end_date=None, member_id=None, oldest_first=False, start_page=1, include_bookmark_data=False): response_page = None result = None url = '' if member_id is not None: # from member id search by tags (artist, response_page) = self.getMemberPage( member_id, current_page, False, tags) # convert to PixivTags result = PixivTags() result.parseMemberTags(artist, member_id, tags) else: # search by tags url = PixivHelper.generate_search_tag_url(tags, current_page, title_caption, wild_card, oldest_first, start_date, end_date, member_id, self._config.r18mode) PixivHelper.print_and_log('info', 'Looping... for {0}'.format(url)) response_page = self.getPixivPage(url, returnParsed=False) self.handleDebugTagSearchPage(response_page, url) result = None if member_id is not None: result = PixivTags() parse_search_page = BeautifulSoup(response_page, features="html5lib") result.parseMemberTags(parse_search_page, member_id, tags) parse_search_page.decompose() del parse_search_page else: try: result = PixivTags() result.parseTags(response_page, tags, current_page) # parse additional information if include_bookmark_data: idx = 0 print("Retrieving bookmark information...", end=' ') for image in result.itemList: idx = idx + 1 print("\r", end=' ') print("Retrieving bookmark information... [{0}] of [{1}]".format( idx, len(result.itemList)), end=' ') img_url = "https://www.pixiv.net/ajax/illust/{0}".format( image.imageId) response_page = self._get_from_cache(img_url) if response_page is None: try: res = self.open_with_retry(img_url) response_page = res.read() res.close() except urllib.error.HTTPError as ex: if ex.code == 404: response_page = ex.read() self._put_to_cache(img_url, response_page) image_info_js = json.loads(response_page) image.bookmarkCount = int( image_info_js["body"]["bookmarkCount"]) image.imageResponse = int( image_info_js["body"]["responseCount"]) print("") except BaseException: PixivHelper.dump_html("Dump for SearchTags " + tags + ".html", response_page) raise return (result, response_page)
def process_tags(caller, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, oldest_first=False, type_mode=None, notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument config = caller.__config__ config.loadConfig(path=caller.configfile) if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option search_page = None _last_search_result = None i = page updated_limit_count = 0 try: search_tags = PixivHelper.decode_tags(tags) if use_tags_as_dir: PixivHelper.print_and_log(None, "Save to each directory using query tags.") format_src.rootDirectory += os.sep + PixivHelper.sanitize_filename(search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = 60 start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags, i, wild_card, title_caption, start_date, end_date, member_id, oldest_first, page, use_bookmark_data, bookmark_count, type_mode, r18mode=format_src.r18mode) if len(t.itemList) == 0: PixivHelper.print_and_log(None, 'No more images') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log(None, 'Getting duplicated result set, no more new images.') flag = False if flag: for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).') skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if(stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image(caller, config, None, item.imageId, user_dir=format_src.rootDirectory, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier, job_option=job_option) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...") time.sleep(2) images = images + 1 if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.print_and_log(None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and not oldest_first: if last_image_id > 0: # get the last date PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.") # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.") flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}') raise
def process_from_group(caller, config, group_id, limit=0, process_external=True): br = caller.__br__ json_response = None try: print("Download by Group Id") if limit != 0: print("Limit: {0}".format(limit)) if process_external: print("Include External Image: {0}".format(process_external)) max_id = 0 image_count = 0 flag = True while flag: url = "https://www.pixiv.net/group/images.php?format=json&max_id={0}&id={1}".format( max_id, group_id) PixivHelper.print_and_log('info', "Getting images from: {0}".format(url)) response = br.open(url) json_response = response.read() response.close() group_data = PixivGroup(json_response) max_id = group_data.maxId if group_data.imageList is not None and len( group_data.imageList) > 0: for image in group_data.imageList: if image_count > limit and limit != 0: flag = False break print("Image #{0}".format(image_count)) print("ImageId: {0}".format(image)) result = PixivImageHandler.process_image(caller, config, image_id=image) image_count = image_count + 1 PixivHelper.wait(result, config) if process_external and group_data.externalImageList is not None and len( group_data.externalImageList) > 0: for image_data in group_data.externalImageList: if image_count > limit and limit != 0: flag = False break print("Image #{0}".format(image_count)) print("Member Id : {0}".format( image_data.artist.artistId)) PixivHelper.safePrint("Member Name : " + image_data.artist.artistName) print("Member Token : {0}".format( image_data.artist.artistToken)) print("Image Url : {0}".format(image_data.imageUrls[0])) filename = PixivHelper.make_filename( config.filenameFormat, imageInfo=image_data, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=image_data.imageUrls[0], useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename( filename, config.rootDirectory) PixivHelper.safePrint("Filename : " + filename) (result, filename) = PixivDownloadHandler.download_image( caller, image_data.imageUrls[0], filename, url, config.overwrite, config.retry, backup_old_file=config.backupOldFile) PixivHelper.get_logger().debug("Download %s result: %s", filename, result) if config.setLastModified and filename is not None and os.path.isfile( filename): ts = time.mktime( image_data.worksDateDateTime.timetuple()) os.utime(filename, (ts, ts)) image_count = image_count + 1 if (group_data.imageList is None or len(group_data.imageList) == 0) and \ (group_data.externalImageList is None or len(group_data.externalImageList) == 0): flag = False print("") except BaseException: PixivHelper.print_and_log( 'error', 'Error at process_from_group(): {0}'.format(sys.exc_info())) if json_response is not None: filename = f"Dump for Download by Group {group_id}.json" PixivHelper.dump_html(filename, json_response) raise
def process_new_illust_from_bookmark(caller, config, page_num=1, end_page_num=0): br = caller.__br__ parsed_page = None try: print("Processing New Illust from bookmark") i = page_num image_count = 1 flag = True while flag: print("Page #" + str(i)) url = 'https://www.pixiv.net/bookmark_new_illust.php?p=' + str(i) if config.r18mode: url = 'https://www.pixiv.net/bookmark_new_illust_r18.php?p=' + str( i) PixivHelper.print_and_log('info', "Source URL: " + url) page = br.open(url) parsed_page = BeautifulSoup(page.read().decode("utf-8"), features="html5lib") pb = PixivNewIllustBookmark(parsed_page) if not pb.haveImages: print("No images!") break for image_id in pb.imageList: print("Image #" + str(image_count)) result = PixivImageHandler.process_image( caller, config, artist=None, image_id=int(image_id)) image_count = image_count + 1 if result == PixivConstant.PIXIVUTIL_SKIP_OLDER: flag = False break PixivHelper.wait(result, config) i = i + 1 page.close() parsed_page.decompose() del parsed_page # Non premium is only limited to 100 page # Premium user might be limited to 5000, refer to issue #112 if (end_page_num != 0 and i > end_page_num) or i > 5000 or pb.isLastPage: print("Limit or last page reached.") flag = False print("Done.") except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log( 'error', 'Error at process_new_illust_from_bookmark(): {0}'.format( sys.exc_info())) if parsed_page is not None: filename = "Dump for New Illust from bookmark.html" PixivHelper.dump_html(filename, parsed_page) raise
def process_image(caller, config, artist=None, image_id=None, user_dir='', bookmark=False, search_tags='', title_prefix="", bookmark_count=-1, image_response_count=-1, notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option parse_medium_page = None image = None result = None referer = f'https://www.pixiv.net/artworks/{image_id}' filename = f'no-filename-{image_id}.tmp' try: msg = Fore.YELLOW + Style.NORMAL + f'Processing Image Id: {image_id}' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) notifier(type="IMAGE", message=msg) # check if already downloaded. images won't be downloaded twice - needed in process_image to catch any download r = db.selectImageByImageId(image_id, cols='save_name') exists = False in_db = False if r is not None: exists = db.cleanupFileExists(r[0]) in_db = True # skip if already recorded in db and alwaysCheckFileSize is disabled and overwrite is disabled. if in_db and not config.alwaysCheckFileSize and not config.overwrite: PixivHelper.print_and_log(None, f'Already downloaded in DB: {image_id}') gc.collect() return PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT # get the medium page try: (image, parse_medium_page) = PixivBrowserFactory.getBrowser().getImagePage(image_id=image_id, parent=artist, from_bookmark=bookmark, bookmark_count=bookmark_count) if len(title_prefix) > 0: caller.set_console_title(f"{title_prefix} ImageId: {image.imageId}") else: caller.set_console_title(f"MemberId: {image.artist.artistId} ImageId: {image.imageId}") except PixivException as ex: caller.ERROR_CODE = ex.errorCode caller.__errorList.append(dict(type="Image", id=str(image_id), message=ex.message, exception=ex)) if ex.errorCode == PixivException.UNKNOWN_IMAGE_ERROR: PixivHelper.print_and_log('error', ex.message) elif ex.errorCode == PixivException.SERVER_ERROR: PixivHelper.print_and_log('error', f'Giving up image_id (medium): {image_id}') elif ex.errorCode > 2000: PixivHelper.print_and_log('error', f'Image Error for {image_id}: {ex.message}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') else: PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}') PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}') return PixivConstant.PIXIVUTIL_NOT_OK except Exception as ex: PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}') exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) return PixivConstant.PIXIVUTIL_NOT_OK download_image_flag = True # date validation and blacklist tag validation if config.dateDiff > 0: if image.worksDateDateTime != datetime.datetime.fromordinal(1).replace(tzinfo=datetime_z.utc): if image.worksDateDateTime < (datetime.datetime.today() - datetime.timedelta(config.dateDiff)).replace(tzinfo=datetime_z.utc): PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains older than: {config.dateDiff} day(s).') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_OLDER if config.useBlacklistMembers and download_image_flag: if str(image.originalArtist.artistId) in caller.__blacklistMembers: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted member id: {image.originalArtist.artistId}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST if config.useBlacklistTags and download_image_flag: for item in caller.__blacklistTags: if item in image.imageTags: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted tags: {item}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST break if config.useBlacklistTitles and download_image_flag: for item in caller.__blacklistTitles: if item in image.imageTitle: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted Title: {item}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST break if download_image_flag and not caller.DEBUG_SKIP_DOWNLOAD_IMAGE: if artist is None: PixivHelper.print_and_log(None, f'Member Name : {image.artist.artistName}') PixivHelper.print_and_log(None, f'Member Avatar: {image.artist.artistAvatar}') PixivHelper.print_and_log(None, f'Member Token : {image.artist.artistToken}') PixivHelper.print_and_log(None, f'Member Background : {image.artist.artistBackground}') PixivHelper.print_and_log(None, f"Title: {image.imageTitle}") tags_str = ', '.join(image.imageTags) PixivHelper.print_and_log(None, f"Tags : {tags_str}") PixivHelper.print_and_log(None, f"Date : {image.worksDateDateTime}") PixivHelper.print_and_log(None, f"Mode : {image.imageMode}") # get bookmark count if ("%bookmark_count%" in format_src.filenameFormat or "%image_response_count%" in format_src.filenameFormat) and image.bookmark_count == -1: PixivHelper.print_and_log(None, "Parsing bookmark page", end=' ') bookmark_url = f'https://www.pixiv.net/bookmark_detail.php?illust_id={image_id}' parse_bookmark_page = PixivBrowserFactory.getBrowser().getPixivPage(bookmark_url) image.ParseBookmarkDetails(parse_bookmark_page) parse_bookmark_page.decompose() del parse_bookmark_page PixivHelper.print_and_log(None, f"Bookmark Count : {image.bookmark_count}") caller.__br__.back() if config.useSuppressTags: for item in caller.__suppressTags: if item in image.imageTags: image.imageTags.remove(item) # get manga page if image.imageMode == 'manga': PixivHelper.print_and_log(None, f"Page Count : {image.imageCount}") if user_dir == '': # Yavos: use config-options target_dir = format_src.rootDirectory else: # Yavos: use filename from list target_dir = user_dir result = PixivConstant.PIXIVUTIL_OK manga_files = list() page = 0 # Issue #639 source_urls = image.imageUrls if config.downloadResized: source_urls = image.imageResizedUrls for img in source_urls: PixivHelper.print_and_log(None, f'Image URL : {img}') url = os.path.basename(img) split_url = url.split('.') if split_url[0].startswith(str(image_id)): filename_format = format_src.filenameFormat if image.imageMode == 'manga': filename_format = format_src.filenameMangaFormat filename = PixivHelper.make_filename(filename_format, image, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=url, bookmark=bookmark, searchTags=search_tags, useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, target_dir) if image.imageMode == 'manga' and config.createMangaDir: manga_page = caller.__re_manga_page.findall(filename) if len(manga_page) > 0: splitted_filename = filename.split(manga_page[0][0], 1) splitted_manga_page = manga_page[0][0].split("_p", 1) # filename = splitted_filename[0] + splitted_manga_page[0] + os.sep + "_p" + splitted_manga_page[1] + splitted_filename[1] filename = f"{splitted_filename[0]}{splitted_manga_page[0]}{os.sep}_p{splitted_manga_page[1]}{splitted_filename[1]}" PixivHelper.print_and_log('info', f'Filename : {filename}') result = PixivConstant.PIXIVUTIL_NOT_OK try: (result, filename) = PixivDownloadHandler.download_image(caller, img, filename, referer, config.overwrite, config.retry, config.backupOldFile, image, page, notifier) if result == PixivConstant.PIXIVUTIL_NOT_OK: PixivHelper.print_and_log('error', f'Image url not found/failed to download: {image.imageId}') elif result == PixivConstant.PIXIVUTIL_ABORTED: raise KeyboardInterrupt() manga_files.append((image_id, page, filename)) page = page + 1 except urllib.error.URLError: PixivHelper.print_and_log('error', f'Error when download_image(), giving up url: {img}') PixivHelper.print_and_log(None, '') if config.writeImageInfo or config.writeImageJSON: filename_info_format = format_src.filenameInfoFormat or format_src.filenameFormat # Issue #575 if image.imageMode == 'manga': filename_info_format = format_src.filenameMangaInfoFormat or format_src.filenameMangaFormat or filename_info_format info_filename = PixivHelper.make_filename(filename_info_format, image, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=url, appendExtension=False, bookmark=bookmark, searchTags=search_tags, useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) info_filename = PixivHelper.sanitize_filename(info_filename, target_dir) # trim _pXXX info_filename = re.sub(r'_p?\d+$', '', info_filename) if config.writeImageInfo: image.WriteInfo(info_filename + ".txt") if config.writeImageJSON: image.WriteJSON(info_filename + ".json") if image.imageMode == 'ugoira_view': if config.writeUgoiraInfo: image.WriteUgoiraData(filename + ".js") # Handle #451 if config.createUgoira and (result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE)): PixivDownloadHandler.handle_ugoira(image, filename, config, notifier) if config.writeUrlInDescription: PixivHelper.write_url_in_description(image, config.urlBlacklistRegex, config.urlDumpFilename) if in_db and not exists: result = PixivConstant.PIXIVUTIL_CHECK_DOWNLOAD # There was something in the database which had not been downloaded # Only save to db if all images is downloaded completely if result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER): try: db.insertImage(image.artist.artistId, image.imageId, image.imageMode) except BaseException: PixivHelper.print_and_log('error', f'Failed to insert image id:{image.imageId} to DB') db.updateImage(image.imageId, image.imageTitle, filename, image.imageMode) if len(manga_files) > 0: db.insertMangaImages(manga_files) # map back to PIXIVUTIL_OK (because of ugoira file check) result = 0 if image is not None: del image if parse_medium_page is not None: del parse_medium_page gc.collect() PixivHelper.print_and_log(None, '\n') return result except Exception as ex: if isinstance(ex, KeyboardInterrupt): raise caller.ERROR_CODE = getattr(ex, 'errorCode', -1) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log('error', f'Error at process_image(): {image_id}') PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') raise
def process_member(caller, config, member_id, user_dir='', page=1, end_page=0, bookmark=False, tags=None, title_prefix="", notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ config.loadConfig(path=caller.configfile) np = caller.np np_is_valid = caller.np_is_valid if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option list_page = None msg = Fore.YELLOW + Style.BRIGHT + f'Processing Member Id: {member_id}' + Style.RESET_ALL PixivHelper.print_and_log('info', msg) notifier(type="MEMBER", message=msg) if page != 1: PixivHelper.print_and_log('info', 'Start Page: ' + str(page)) if end_page != 0: PixivHelper.print_and_log('info', 'End Page: ' + str(end_page)) if config.numberOfPage != 0: PixivHelper.print_and_log( 'info', 'Number of page setting will be ignored') elif np != 0: PixivHelper.print_and_log('info', 'End Page from command line: ' + str(np)) elif config.numberOfPage != 0: PixivHelper.print_and_log( 'info', 'End Page from config: ' + str(config.numberOfPage)) # calculate the offset for display properties offset = 48 # new offset for AJAX call offset_start = (page - 1) * offset offset_stop = end_page * offset try: no_of_images = 1 is_avatar_downloaded = False flag = True updated_limit_count = 0 image_id = -1 while flag: PixivHelper.print_and_log(None, 'Page ', page) caller.set_console_title( f"{title_prefix}MemberId: {member_id} Page: {page}") # Try to get the member page while True: try: (artist, list_page ) = PixivBrowserFactory.getBrowser().getMemberPage( member_id, page, bookmark, tags, r18mode=format_src.r18mode) break except PixivException as ex: caller.ERROR_CODE = ex.errorCode PixivHelper.print_and_log( 'info', f'Member ID ({member_id}): {ex}') if ex.errorCode == PixivException.NO_IMAGES: pass else: if list_page is None: list_page = ex.htmlPage if list_page is not None: PixivHelper.dump_html( f"Dump for {member_id} Error Code {ex.errorCode}.html", list_page) if ex.errorCode == PixivException.USER_ID_NOT_EXISTS or ex.errorCode == PixivException.USER_ID_SUSPENDED: db.setIsDeletedFlagForMemberId(int(member_id)) PixivHelper.print_and_log( 'info', f'Set IsDeleted for MemberId: {member_id} not exist.' ) # db.deleteMemberByMemberId(member_id) # PixivHelper.printAndLog('info', 'Deleting MemberId: ' + str(member_id) + ' not exist.') if ex.errorCode == PixivException.OTHER_MEMBER_ERROR: PixivHelper.print_and_log(None, ex.message) caller.__errorList.append( dict(type="Member", id=str(member_id), message=ex.message, exception=ex)) return except AttributeError: # Possible layout changes, try to dump the file below raise except BaseException: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at processing Artist Info: {sys.exc_info()}') PixivHelper.print_and_log(None, f'Member Name : {artist.artistName}') PixivHelper.print_and_log(None, f'Member Avatar: {artist.artistAvatar}') PixivHelper.print_and_log(None, f'Member Token : {artist.artistToken}') PixivHelper.print_and_log( None, f'Member Background : {artist.artistBackground}') print_offset_stop = offset_stop if offset_stop < artist.totalImages and offset_stop != 0 else artist.totalImages PixivHelper.print_and_log( None, f'Processing images from {offset_start + 1} to {print_offset_stop} of {artist.totalImages}' ) if not is_avatar_downloaded and config.downloadAvatar: if user_dir == '': target_dir = format_src.rootDirectory else: target_dir = user_dir avatar_filename = PixivHelper.create_avatar_filename( artist, target_dir, format_src) if not caller.DEBUG_SKIP_PROCESS_IMAGE: if artist.artistAvatar.find('no_profile') == -1: PixivDownloadHandler.download_image( caller, artist.artistAvatar, avatar_filename, "https://www.pixiv.net/", config.overwrite, config.retry, config.backupOldFile, notifier=notifier) # Issue #508 if artist.artistBackground is not None and artist.artistBackground.startswith( "http"): bg_name = PixivHelper.create_bg_filename_from_avatar_filename( avatar_filename) PixivDownloadHandler.download_image( caller, artist.artistBackground, bg_name, "https://www.pixiv.net/", config.overwrite, config.retry, config.backupOldFile, notifier=notifier) is_avatar_downloaded = True if config.autoAddMember: db.insertNewMember(int(member_id)) db.updateMemberName(member_id, artist.artistName) if not artist.haveImages: PixivHelper.print_and_log('info', f"No image found for: {member_id}") db.updateLastDownloadDate(member_id) flag = False continue result = PixivConstant.PIXIVUTIL_NOT_OK for image_id in artist.imageList: PixivHelper.print_and_log(None, f'#{no_of_images}') retry_count = 0 while True: try: if artist.totalImages > 0: # PixivHelper.safePrint("Total Images = " + str(artist.totalImages)) total_image_page_count = artist.totalImages if (offset_stop > 0 and offset_stop < total_image_page_count): total_image_page_count = offset_stop total_image_page_count = total_image_page_count - offset_start # PixivHelper.safePrint("Total Images Offset = " + str(total_image_page_count)) else: total_image_page_count = ( (page - 1) * 20) + len(artist.imageList) title_prefix_img = f"{title_prefix}MemberId: {member_id} Page: {page} Post {no_of_images}+{updated_limit_count} of {total_image_page_count}" if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image( caller, config, artist, image_id, user_dir, bookmark, title_prefix=title_prefix_img, notifier=notifier, job_option=job_option) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except BaseException: if retry_count > config.retry: PixivHelper.print_and_log( 'error', f"Giving up image_id: {image_id}") return retry_count = retry_count + 1 PixivHelper.print_and_log( None, f"Stuff happened, trying again after 2 second ({retry_count})" ) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( "error", f"Error at process_member(): {sys.exc_info()} Member Id: {member_id}" ) time.sleep(2) if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.safePrint(f"Skipping member: {member_id}") db.updateLastDownloadDate(member_id) PixivBrowserFactory.getBrowser( config=config).clear_history() return gc.collect() continue if result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input( "Keyboard Interrupt detected, continue to next image (Y/N)" ).rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log( "info", f"Member: {member_id}, processing aborted") flag = False break else: continue # return code from process image if result == PixivConstant.PIXIVUTIL_SKIP_OLDER: PixivHelper.print_and_log( "info", "Reached older images, skippin to next member.") db.updateLastDownloadDate(member_id) flag = False break no_of_images = no_of_images + 1 PixivHelper.wait(result, config) if artist.isLastPage: db.updateLastDownloadDate(member_id) PixivHelper.print_and_log(None, "Last Page") flag = False page = page + 1 # page limit checking if end_page > 0 and page > end_page: PixivHelper.print_and_log( None, f"Page limit reached (from endPage limit ={end_page})") db.updateLastDownloadDate(member_id) flag = False else: if np_is_valid: # Yavos: overwriting config-data if page > np and np > 0: PixivHelper.print_and_log( None, f"Page limit reached (from command line ={np})") flag = False elif page > config.numberOfPage and config.numberOfPage > 0: PixivHelper.print_and_log( None, f"Page limit reached (from config ={config.numberOfPage})" ) flag = False del artist del list_page PixivBrowserFactory.getBrowser(config=config).clear_history() gc.collect() log_message = "" if int(image_id) > 0: db.updateLastDownloadedImage(member_id, image_id) log_message = f'last image_id: {image_id}' else: log_message = 'no images were found.' PixivHelper.print_and_log( "info", f"Member_id: {member_id} completed: {log_message}") except KeyboardInterrupt: raise except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at process_member(): {sys.exc_info()}') try: if list_page is not None: dump_filename = f'Error page for member {member_id} at page {page}.html' PixivHelper.dump_html(dump_filename, list_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log( 'error', f'Cannot dump page for member_id: {member_id}') raise
def process_tags(caller, config, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, sort_order='date_d', type_mode=None, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier search_page = None _last_search_result = None i = page updated_limit_count = 0 empty_page_retry = 0 try: search_tags = PixivHelper.decode_tags(tags) root_dir = config.rootDirectory if use_tags_as_dir: PixivHelper.print_and_log(None, "Save to each directory using query tags.") root_dir = config.rootDirectory + os.sep + PixivHelper.sanitize_filename(search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = PixivTags.POSTS_PER_PAGE start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags} with partial match = {wild_card} and title/caption = {title_caption}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags, i, wild_card=wild_card, title_caption=title_caption, start_date=start_date, end_date=end_date, member_id=member_id, sort_order=sort_order, start_page=page, use_bookmark_data=use_bookmark_data, bookmark_count=bookmark_count, type_mode=type_mode, r18mode=config.r18mode) PixivHelper.print_and_log("info", f'Found {len(t.itemList)} images for page {i}.') if len(t.itemList) == 0: # Issue #1090 # check if the available images matching with current page * PixivTags.POSTS_PER_PAGE # and wait for {timeout} seconds and retry the page up to {config.retry} times. if _last_search_result is not None and _last_search_result.availableImages > (PixivTags.POSTS_PER_PAGE * i) and empty_page_retry < config.retry: PixivHelper.print_and_log("warn", f'Server did not return images, expected to have more (Total Post = {_last_search_result.availableImages}, current max posts = {PixivTags.POSTS_PER_PAGE * i}).') # wait at least 2 minutes before retry delay = config.timeout if delay < 120: delay = 120 PixivHelper.print_and_log(None, f"Waiting for {delay} seconds before retrying.") PixivHelper.print_delay(delay) empty_page_retry = empty_page_retry + 1 PixivBrowserFactory.getBrowser().addheaders = [('User-agent', f'{config.useragent}{int(time.time())}')] continue else: PixivHelper.print_and_log("warn", 'No more images.') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log("warn", 'Getting duplicated result set, no more new images.') flag = False if flag: # Issue #1090 reset retry flag on succesfull load empty_page_retry = 0 for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).') skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if(stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image(caller, config, None, item.imageId, user_dir=root_dir, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...") PixivHelper.print_delay(2) images = images + 1 if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.print_and_log(None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and sort_order != 'date': if last_image_id > 0: # get the last date PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.") # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.") flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}') raise
def process_new_illust_from_bookmark(caller, config, page_num=1, end_page_num=0, bookmark_count=-1): br: PixivBrowser = caller.__br__ parsed_page = None try: print("Processing New Illust from bookmark") i = page_num image_count = 1 flag = True while flag: print(f"Page #{i}") mode = "all" if config.r18mode: mode = "r18" pb = br.getFollowedNewIllusts(mode, current_page=i) # url = 'https://www.pixiv.net/bookmark_new_illust.php?p=' + str(i) # if config.r18mode: # url = 'https://www.pixiv.net/bookmark_new_illust_r18.php?p=' + str(i) # PixivHelper.print_and_log('info', "Source URL: " + url) # page = br.open(url) # parsed_page = BeautifulSoup(page.read().decode("utf-8"), features="html5lib") # pb = PixivNewIllustBookmark(parsed_page) # if not pb.haveImages: # print("No images!") # break for image_id in pb.imageList: print(f"Image #{image_count}") result = PixivImageHandler.process_image( caller, config, artist=None, image_id=int(image_id), bookmark_count=bookmark_count) image_count = image_count + 1 if result == PixivConstant.PIXIVUTIL_SKIP_OLDER: flag = False break PixivHelper.wait(result, config) i = i + 1 # page.close() # parsed_page.decompose() # del parsed_page if (end_page_num != 0 and i > end_page_num) or pb.isLastPage: print("Limit or last page reached.") flag = False print("Done.") except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log( 'error', 'Error at process_new_illust_from_bookmark(): {0}'.format( sys.exc_info())) if parsed_page is not None: filename = "Dump for New Illust from bookmark.html" PixivHelper.dump_html(filename, parsed_page) raise